207 lines
5.9 KiB
Python
Executable File
207 lines
5.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# -*- coding: UTF-8 -*-
|
||
|
||
"""
|
||
Created on: 2023-02-17
|
||
@author: cjs
|
||
# 用途:批量查找项目的bam文件,批量进行chemo_panel2.py脚本测试
|
||
# 版本:0.0.1
|
||
# 最后编辑日期: 2023-02-17
|
||
"""
|
||
|
||
from cjs_test.cjs_logger import Logger
|
||
from multiprocessing import Process, Queue
|
||
from glob import glob
|
||
import subprocess
|
||
import datetime
|
||
import json
|
||
import traceback
|
||
import os
|
||
import sys
|
||
|
||
# 全局参数
|
||
Exe_Bin = ''
|
||
Exe_Path = ''
|
||
GLog = None
|
||
Start_Time = None
|
||
Bam_Dd = {} # 记录需要处理的bam路径
|
||
# 命令参数
|
||
Pro_Out = r''
|
||
# 固定参数
|
||
PS_MAX = 40 # 最大的进程数
|
||
|
||
|
||
def Exit_Print(pline=''):
|
||
"""显示错误的信息,退出脚本."""
|
||
print('%s -O Pro_Out' % Exe_Bin)
|
||
if len(pline) > 0:
|
||
print(pline)
|
||
if GLog:
|
||
GLog.info('exit')
|
||
GLog.close()
|
||
sys.exit(0)
|
||
|
||
|
||
# 处理运行参数
|
||
def Get_Opts():
|
||
"""获取运行的环境变量."""
|
||
global Exe_Bin
|
||
global Exe_Path
|
||
global GLog
|
||
global Start_Time
|
||
global Pro_Out
|
||
|
||
file_real = os.path.realpath(sys.argv[0])
|
||
Exe_Path = os.path.dirname(file_real)
|
||
Exe_Bin = os.path.basename(file_real)
|
||
|
||
Start_Time = datetime.datetime.now()
|
||
ymd = Start_Time.__format__('%Y%m%d_%H%M%S')
|
||
|
||
# 运行参数数量检查
|
||
argvs = sys.argv[1:]
|
||
argv_lens = len(argvs)
|
||
if argv_lens < 2:
|
||
err_line = '缺少运行的必要参数'
|
||
Exit_Print(pline=err_line)
|
||
|
||
# 获取参数
|
||
Pro_Out = ''
|
||
argv_index = 0
|
||
for argv in argvs:
|
||
argv_index += 1
|
||
if argv.startswith('-'):
|
||
if argv.upper() == '-O':
|
||
if argv_index < argv_lens:
|
||
Pro_Out = argvs[argv_index]
|
||
else:
|
||
err_line = '%s,参数的值无法获取' % argv
|
||
Exit_Print(pline=err_line)
|
||
else:
|
||
err_line = '无法识别的参数:%s' % argv
|
||
Exit_Print(pline=err_line)
|
||
|
||
if len(Pro_Out) == 0:
|
||
err_line = '未能获取到正确的芯片号'
|
||
Exit_Print(pline=err_line)
|
||
|
||
# 开启日志
|
||
log_path = os.path.join(Exe_Path, 'logs', Exe_Bin)
|
||
if not os.path.exists(log_path):
|
||
os.makedirs(log_path)
|
||
log_base = '%s_%s.log' % (Exe_Bin, ymd)
|
||
log_full = os.path.join(log_path, log_base)
|
||
GLog = Logger(log_full, mode='w')
|
||
GLog.info('start')
|
||
|
||
return 0
|
||
|
||
|
||
def Get_Bams():
|
||
global Bam_Dd
|
||
for bam in glob('%s/**/*.rmdup.bam' % Pro_Out, recursive=True):
|
||
bam_nor = ""
|
||
bam_base = os.path.basename(bam)
|
||
bam_dir = os.path.dirname(bam)
|
||
pro_dir = os.path.dirname(bam_dir)
|
||
|
||
# 寻找json
|
||
for in_json in glob('%s/*.json' % pro_dir):
|
||
with open(in_json, 'r', encoding='utf8') as ff:
|
||
load_dict = json.load(ff)
|
||
for dkey in load_dict:
|
||
if dkey.upper().find("NORMAL") > -1:
|
||
bam_nor = load_dict[dkey]
|
||
break
|
||
if bam_nor != "":
|
||
if bam_base.find(bam_nor) > -1: # 只选取normal的bam
|
||
if bam_nor not in Bam_Dd:
|
||
Bam_Dd[bam_nor] = bam
|
||
else:
|
||
# 已经有的项目,选取bam文件大的处理
|
||
old_bam = Bam_Dd[bam_nor]
|
||
# print("%s, 项目有重复的bam[%s, %s]" % (bam_nor, old_bam, bam))
|
||
old_size = os.path.getsize(old_bam)
|
||
new_size = os.path.getsize(bam)
|
||
if new_size > old_size:
|
||
Bam_Dd[bam_nor] = bam
|
||
|
||
|
||
def py_chemo(df_q):
|
||
py_full = os.path.join(Exe_Path, "chemo_panel.py")
|
||
while (1):
|
||
py_ls = df_q.get()
|
||
if py_ls is None:
|
||
break
|
||
py_normal = py_ls[0]
|
||
py_bam = py_ls[1]
|
||
py_ourdir = os.path.join(Exe_Path, "cheom_out", py_normal)
|
||
if not os.path.exists(py_ourdir):
|
||
os.makedirs(py_ourdir)
|
||
py_cmd = "%s -o %s -p %s --n %s" % (py_full, py_ourdir, "650gene",
|
||
py_normal)
|
||
py_cmd += ' --b %s' % py_bam
|
||
ps = subprocess.Popen(py_cmd,
|
||
stdout=subprocess.PIPE,
|
||
stderr=subprocess.STDOUT,
|
||
shell=True)
|
||
while ps.poll() is None:
|
||
std_outs = ps.stdout.readline().decode().strip()
|
||
if len(std_outs) > 0:
|
||
GLog.info("%s->%s" % (py_normal, std_outs))
|
||
std_outs = ps.stdout.readline().decode().strip()
|
||
if len(std_outs) > 0:
|
||
GLog.info("%s->%s" % (py_normal, std_outs))
|
||
|
||
|
||
def Process_Chemo():
|
||
"""多进程跑分析."""
|
||
ps_num = PS_MAX
|
||
bam_num = len(Bam_Dd)
|
||
if PS_MAX > bam_num:
|
||
ps_num = bam_num
|
||
# 安排好队列
|
||
qd = Queue()
|
||
for bam_nor in Bam_Dd:
|
||
bam = Bam_Dd[bam_nor]
|
||
qd.put([bam_nor, bam])
|
||
# 新建输出文件夹
|
||
cheom_out = os.path.join(Exe_Path, "cheom_out")
|
||
if not os.path.exists(cheom_out):
|
||
os.makedirs(cheom_out)
|
||
ps_ls = []
|
||
for i in range(ps_num):
|
||
qd.put(None)
|
||
chemo_ps = Process(target=py_chemo, args=(qd,), daemon=True)
|
||
chemo_ps.start()
|
||
ps_ls.append(chemo_ps)
|
||
# 检测在运行的进程
|
||
temp_num = ps_num
|
||
while (1):
|
||
ps_runs = 0
|
||
for ps in ps_ls:
|
||
if ps.is_alive():
|
||
ps_runs += 1
|
||
if ps_runs < temp_num:
|
||
temp_num = ps_runs
|
||
print('\r在运行的进程数:%02d' % temp_num, end='')
|
||
if ps_runs == 0:
|
||
break
|
||
print('\n所有分析进程结束')
|
||
|
||
|
||
if __name__ == '__main__':
|
||
Get_Opts()
|
||
Get_Bams()
|
||
print(len(Bam_Dd))
|
||
|
||
try:
|
||
Process_Chemo()
|
||
except BaseException:
|
||
GLog.error(traceback.format_exc())
|
||
print(traceback.format_exc())
|
||
endtime = datetime.datetime.now()
|
||
GLog.info('end')
|
||
GLog.info('run time:%s seconds' % ((endtime - Start_Time).seconds))
|
||
GLog.close()
|