207 lines
5.9 KiB
Python
207 lines
5.9 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: UTF-8 -*-
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
Created on: 2023-02-17
|
|||
|
|
@author: cjs
|
|||
|
|
# 用途:批量查找项目的bam文件,批量进行chemo_panel2.py脚本测试
|
|||
|
|
# 版本:0.0.1
|
|||
|
|
# 最后编辑日期: 2023-02-17
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from cjs_test.cjs_logger import Logger
|
|||
|
|
from multiprocessing import Process, Queue
|
|||
|
|
from glob import glob
|
|||
|
|
import subprocess
|
|||
|
|
import datetime
|
|||
|
|
import json
|
|||
|
|
import traceback
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
# 全局参数
|
|||
|
|
Exe_Bin = ''
|
|||
|
|
Exe_Path = ''
|
|||
|
|
GLog = None
|
|||
|
|
Start_Time = None
|
|||
|
|
Bam_Dd = {} # 记录需要处理的bam路径
|
|||
|
|
# 命令参数
|
|||
|
|
Pro_Out = r''
|
|||
|
|
# 固定参数
|
|||
|
|
PS_MAX = 40 # 最大的进程数
|
|||
|
|
|
|||
|
|
|
|||
|
|
def Exit_Print(pline=''):
|
|||
|
|
"""显示错误的信息,退出脚本."""
|
|||
|
|
print('%s -O Pro_Out' % Exe_Bin)
|
|||
|
|
if len(pline) > 0:
|
|||
|
|
print(pline)
|
|||
|
|
if GLog:
|
|||
|
|
GLog.info('exit')
|
|||
|
|
GLog.close()
|
|||
|
|
sys.exit(0)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 处理运行参数
|
|||
|
|
def Get_Opts():
|
|||
|
|
"""获取运行的环境变量."""
|
|||
|
|
global Exe_Bin
|
|||
|
|
global Exe_Path
|
|||
|
|
global GLog
|
|||
|
|
global Start_Time
|
|||
|
|
global Pro_Out
|
|||
|
|
|
|||
|
|
file_real = os.path.realpath(sys.argv[0])
|
|||
|
|
Exe_Path = os.path.dirname(file_real)
|
|||
|
|
Exe_Bin = os.path.basename(file_real)
|
|||
|
|
|
|||
|
|
Start_Time = datetime.datetime.now()
|
|||
|
|
ymd = Start_Time.__format__('%Y%m%d_%H%M%S')
|
|||
|
|
|
|||
|
|
# 运行参数数量检查
|
|||
|
|
argvs = sys.argv[1:]
|
|||
|
|
argv_lens = len(argvs)
|
|||
|
|
if argv_lens < 2:
|
|||
|
|
err_line = '缺少运行的必要参数'
|
|||
|
|
Exit_Print(pline=err_line)
|
|||
|
|
|
|||
|
|
# 获取参数
|
|||
|
|
Pro_Out = ''
|
|||
|
|
argv_index = 0
|
|||
|
|
for argv in argvs:
|
|||
|
|
argv_index += 1
|
|||
|
|
if argv.startswith('-'):
|
|||
|
|
if argv.upper() == '-O':
|
|||
|
|
if argv_index < argv_lens:
|
|||
|
|
Pro_Out = argvs[argv_index]
|
|||
|
|
else:
|
|||
|
|
err_line = '%s,参数的值无法获取' % argv
|
|||
|
|
Exit_Print(pline=err_line)
|
|||
|
|
else:
|
|||
|
|
err_line = '无法识别的参数:%s' % argv
|
|||
|
|
Exit_Print(pline=err_line)
|
|||
|
|
|
|||
|
|
if len(Pro_Out) == 0:
|
|||
|
|
err_line = '未能获取到正确的芯片号'
|
|||
|
|
Exit_Print(pline=err_line)
|
|||
|
|
|
|||
|
|
# 开启日志
|
|||
|
|
log_path = os.path.join(Exe_Path, 'logs', Exe_Bin)
|
|||
|
|
if not os.path.exists(log_path):
|
|||
|
|
os.makedirs(log_path)
|
|||
|
|
log_base = '%s_%s.log' % (Exe_Bin, ymd)
|
|||
|
|
log_full = os.path.join(log_path, log_base)
|
|||
|
|
GLog = Logger(log_full, mode='w')
|
|||
|
|
GLog.info('start')
|
|||
|
|
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
def Get_Bams():
|
|||
|
|
global Bam_Dd
|
|||
|
|
for bam in glob('%s/**/*.rmdup.bam' % Pro_Out, recursive=True):
|
|||
|
|
bam_nor = ""
|
|||
|
|
bam_base = os.path.basename(bam)
|
|||
|
|
bam_dir = os.path.dirname(bam)
|
|||
|
|
pro_dir = os.path.dirname(bam_dir)
|
|||
|
|
|
|||
|
|
# 寻找json
|
|||
|
|
for in_json in glob('%s/*.json' % pro_dir):
|
|||
|
|
with open(in_json, 'r', encoding='utf8') as ff:
|
|||
|
|
load_dict = json.load(ff)
|
|||
|
|
for dkey in load_dict:
|
|||
|
|
if dkey.upper().find("NORMAL") > -1:
|
|||
|
|
bam_nor = load_dict[dkey]
|
|||
|
|
break
|
|||
|
|
if bam_nor != "":
|
|||
|
|
if bam_base.find(bam_nor) > -1: # 只选取normal的bam
|
|||
|
|
if bam_nor not in Bam_Dd:
|
|||
|
|
Bam_Dd[bam_nor] = bam
|
|||
|
|
else:
|
|||
|
|
# 已经有的项目,选取bam文件大的处理
|
|||
|
|
old_bam = Bam_Dd[bam_nor]
|
|||
|
|
# print("%s, 项目有重复的bam[%s, %s]" % (bam_nor, old_bam, bam))
|
|||
|
|
old_size = os.path.getsize(old_bam)
|
|||
|
|
new_size = os.path.getsize(bam)
|
|||
|
|
if new_size > old_size:
|
|||
|
|
Bam_Dd[bam_nor] = bam
|
|||
|
|
|
|||
|
|
|
|||
|
|
def py_chemo(df_q):
|
|||
|
|
py_full = os.path.join(Exe_Path, "chemo_panel.py")
|
|||
|
|
while (1):
|
|||
|
|
py_ls = df_q.get()
|
|||
|
|
if py_ls is None:
|
|||
|
|
break
|
|||
|
|
py_normal = py_ls[0]
|
|||
|
|
py_bam = py_ls[1]
|
|||
|
|
py_ourdir = os.path.join(Exe_Path, "cheom_out", py_normal)
|
|||
|
|
if not os.path.exists(py_ourdir):
|
|||
|
|
os.makedirs(py_ourdir)
|
|||
|
|
py_cmd = "%s -o %s -p %s --n %s" % (py_full, py_ourdir, "650gene",
|
|||
|
|
py_normal)
|
|||
|
|
py_cmd += ' --b %s' % py_bam
|
|||
|
|
ps = subprocess.Popen(py_cmd,
|
|||
|
|
stdout=subprocess.PIPE,
|
|||
|
|
stderr=subprocess.STDOUT,
|
|||
|
|
shell=True)
|
|||
|
|
while ps.poll() is None:
|
|||
|
|
std_outs = ps.stdout.readline().decode().strip()
|
|||
|
|
if len(std_outs) > 0:
|
|||
|
|
GLog.info("%s->%s" % (py_normal, std_outs))
|
|||
|
|
std_outs = ps.stdout.readline().decode().strip()
|
|||
|
|
if len(std_outs) > 0:
|
|||
|
|
GLog.info("%s->%s" % (py_normal, std_outs))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def Process_Chemo():
|
|||
|
|
"""多进程跑分析."""
|
|||
|
|
ps_num = PS_MAX
|
|||
|
|
bam_num = len(Bam_Dd)
|
|||
|
|
if PS_MAX > bam_num:
|
|||
|
|
ps_num = bam_num
|
|||
|
|
# 安排好队列
|
|||
|
|
qd = Queue()
|
|||
|
|
for bam_nor in Bam_Dd:
|
|||
|
|
bam = Bam_Dd[bam_nor]
|
|||
|
|
qd.put([bam_nor, bam])
|
|||
|
|
# 新建输出文件夹
|
|||
|
|
cheom_out = os.path.join(Exe_Path, "cheom_out")
|
|||
|
|
if not os.path.exists(cheom_out):
|
|||
|
|
os.makedirs(cheom_out)
|
|||
|
|
ps_ls = []
|
|||
|
|
for i in range(ps_num):
|
|||
|
|
qd.put(None)
|
|||
|
|
chemo_ps = Process(target=py_chemo, args=(qd,), daemon=True)
|
|||
|
|
chemo_ps.start()
|
|||
|
|
ps_ls.append(chemo_ps)
|
|||
|
|
# 检测在运行的进程
|
|||
|
|
temp_num = ps_num
|
|||
|
|
while (1):
|
|||
|
|
ps_runs = 0
|
|||
|
|
for ps in ps_ls:
|
|||
|
|
if ps.is_alive():
|
|||
|
|
ps_runs += 1
|
|||
|
|
if ps_runs < temp_num:
|
|||
|
|
temp_num = ps_runs
|
|||
|
|
print('\r在运行的进程数:%02d' % temp_num, end='')
|
|||
|
|
if ps_runs == 0:
|
|||
|
|
break
|
|||
|
|
print('\n所有分析进程结束')
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
Get_Opts()
|
|||
|
|
Get_Bams()
|
|||
|
|
print(len(Bam_Dd))
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
Process_Chemo()
|
|||
|
|
except BaseException:
|
|||
|
|
GLog.error(traceback.format_exc())
|
|||
|
|
print(traceback.format_exc())
|
|||
|
|
endtime = datetime.datetime.now()
|
|||
|
|
GLog.info('end')
|
|||
|
|
GLog.info('run time:%s seconds' % ((endtime - Start_Time).seconds))
|
|||
|
|
GLog.close()
|