def run(self):
print("Process: " + self.name+": reading file...")
self.reader()
print("Process: " + self.name+": begin to write temporary data to file...")
for key in self.outData.keys():
self.queue.put(key, block=True, timeout=None)
with open(path+"/"+key, 'a') as File:
fcntl.flock(File.fileno(), fcntl.LOCK_EX)
for item in self.outData[key]:
File.write(item[0]+" "+key+" "+" ".join(item[1:])+"\n")
print("Process: " + self.name+": completed write...")
3.排序进程
排序进程的文件名是,主进程从队列中获取得到的
def sortFile(self):
with open(path+"/"+self.fileName, 'r') as File:
for line in File.readlines():
line = line.split(" ")
try:
# [tmpDict[line[3]], line[1],line[2], '0', tmpDict[line[6]], line[4], line[5], '0']
# 都是同一条染色体对应的Chr1-Chr2 Chr1-Chr3
self.outData[line[5]].append(
[line[0], line[2], line[3], line[4], line[6], line[7]])
except KeyError:
self.outData[line[5]] = [
[line[0], line[2], line[3], line[4], line[6], line[7]]]
with open(path+"/"+self.fileName+"_sorted", 'w') as File:
sortKey = sorted(self.outData)
for key in sortKey:
for item in self.outData[key]:
File.write(item[0]+" "+self.fileName+" " +
" ".join(item[1:4])+" "+key+" "+item[-2]+" "+item[-1])
4.封装后的函数
主进程通过队列的方式从子进程中获取染色体编号
path = 'tmp'+str(int(time.time()))
mkdir(path)
workQueue = Queue() # 用于存放子进程文件数据
read_jobs = []
sort_jobs = []
chrosomes = []
pos_list = PartitionFile(fileName, ProcessNum).partion() # 存放所有文件指针坐标
for i in range(ProcessNum):
position = pos_list[i]
myprocess = readProcess(
str(i), fileName, workQueue, position[0], position[1], processFunction)
myprocess.start()
read_jobs.append(myprocess)
for i in read_jobs:
i.join()
while True:
try:
chrosomes.append(workQueue.get(block=True, timeout=1)) # 获取子进程数据
except:
break
for i in list(set(chrosomes)):
myprocess = sortProcess(str(i), i) # 排序进程
myprocess.start()
sort_jobs.append(myprocess)
for i in sort_jobs:
i.join()
5.性能测试
单进程
Process: 0: reading file...
Process: 0: begin to write temporary data to file...
Process: 0: completed write...
sorting chrosome: Gbar_A01...
chrosome: Gbar_A01ok...
merge chrosomes to a single file...
completed!
there are some temporary file in directory: <./tmp1593916264>
if you can remove it by yourself!
Cost Time is 46.96
四个进程·
Process: 1: reading file...
Process: 0: reading file...
Process: 3: reading file...
Process: 2: reading file...
Process: 3: begin to write temporary data to file...
Process: 1: begin to write temporary data to file...
Process: 3: completed write...
Process: 2: begin to write temporary data to file...
Process: 0: begin to write temporary data to file...
Process: 1: completed write...
Process: 2: completed write...
Process: 0: completed write...
sorting chrosome: Gbar_A01...
chrosome: Gbar_A01 ok...
merge chrosomes to a single file...
completed!
there are some temporary file in directory: <./tmp1593916319>
if you can remove it by yourself!
Cost Time is 18.70