pipeline/script/chemo/test_chemo.py

207 lines
5.9 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Created on: 2023-02-17
@author: cjs
# 用途批量查找项目的bam文件批量进行chemo_panel2.py脚本测试
# 版本0.0.1
# 最后编辑日期: 2023-02-17
"""
from cjs_test.cjs_logger import Logger
from multiprocessing import Process, Queue
from glob import glob
import subprocess
import datetime
import json
import traceback
import os
import sys
# 全局参数
Exe_Bin = ''
Exe_Path = ''
GLog = None
Start_Time = None
Bam_Dd = {} # 记录需要处理的bam路径
# 命令参数
Pro_Out = r''
# 固定参数
PS_MAX = 40 # 最大的进程数
def Exit_Print(pline=''):
"""显示错误的信息,退出脚本."""
print('%s -O Pro_Out' % Exe_Bin)
if len(pline) > 0:
print(pline)
if GLog:
GLog.info('exit')
GLog.close()
sys.exit(0)
# 处理运行参数
def Get_Opts():
"""获取运行的环境变量."""
global Exe_Bin
global Exe_Path
global GLog
global Start_Time
global Pro_Out
file_real = os.path.realpath(sys.argv[0])
Exe_Path = os.path.dirname(file_real)
Exe_Bin = os.path.basename(file_real)
Start_Time = datetime.datetime.now()
ymd = Start_Time.__format__('%Y%m%d_%H%M%S')
# 运行参数数量检查
argvs = sys.argv[1:]
argv_lens = len(argvs)
if argv_lens < 2:
err_line = '缺少运行的必要参数'
Exit_Print(pline=err_line)
# 获取参数
Pro_Out = ''
argv_index = 0
for argv in argvs:
argv_index += 1
if argv.startswith('-'):
if argv.upper() == '-O':
if argv_index < argv_lens:
Pro_Out = argvs[argv_index]
else:
err_line = '%s,参数的值无法获取' % argv
Exit_Print(pline=err_line)
else:
err_line = '无法识别的参数:%s' % argv
Exit_Print(pline=err_line)
if len(Pro_Out) == 0:
err_line = '未能获取到正确的芯片号'
Exit_Print(pline=err_line)
# 开启日志
log_path = os.path.join(Exe_Path, 'logs', Exe_Bin)
if not os.path.exists(log_path):
os.makedirs(log_path)
log_base = '%s_%s.log' % (Exe_Bin, ymd)
log_full = os.path.join(log_path, log_base)
GLog = Logger(log_full, mode='w')
GLog.info('start')
return 0
def Get_Bams():
global Bam_Dd
for bam in glob('%s/**/*.rmdup.bam' % Pro_Out, recursive=True):
bam_nor = ""
bam_base = os.path.basename(bam)
bam_dir = os.path.dirname(bam)
pro_dir = os.path.dirname(bam_dir)
# 寻找json
for in_json in glob('%s/*.json' % pro_dir):
with open(in_json, 'r', encoding='utf8') as ff:
load_dict = json.load(ff)
for dkey in load_dict:
if dkey.upper().find("NORMAL") > -1:
bam_nor = load_dict[dkey]
break
if bam_nor != "":
if bam_base.find(bam_nor) > -1: # 只选取normal的bam
if bam_nor not in Bam_Dd:
Bam_Dd[bam_nor] = bam
else:
# 已经有的项目选取bam文件大的处理
old_bam = Bam_Dd[bam_nor]
# print("%s, 项目有重复的bam[%s, %s]" % (bam_nor, old_bam, bam))
old_size = os.path.getsize(old_bam)
new_size = os.path.getsize(bam)
if new_size > old_size:
Bam_Dd[bam_nor] = bam
def py_chemo(df_q):
py_full = os.path.join(Exe_Path, "chemo_panel.py")
while (1):
py_ls = df_q.get()
if py_ls is None:
break
py_normal = py_ls[0]
py_bam = py_ls[1]
py_ourdir = os.path.join(Exe_Path, "cheom_out", py_normal)
if not os.path.exists(py_ourdir):
os.makedirs(py_ourdir)
py_cmd = "%s -o %s -p %s --n %s" % (py_full, py_ourdir, "650gene",
py_normal)
py_cmd += ' --b %s' % py_bam
ps = subprocess.Popen(py_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True)
while ps.poll() is None:
std_outs = ps.stdout.readline().decode().strip()
if len(std_outs) > 0:
GLog.info("%s->%s" % (py_normal, std_outs))
std_outs = ps.stdout.readline().decode().strip()
if len(std_outs) > 0:
GLog.info("%s->%s" % (py_normal, std_outs))
def Process_Chemo():
"""多进程跑分析."""
ps_num = PS_MAX
bam_num = len(Bam_Dd)
if PS_MAX > bam_num:
ps_num = bam_num
# 安排好队列
qd = Queue()
for bam_nor in Bam_Dd:
bam = Bam_Dd[bam_nor]
qd.put([bam_nor, bam])
# 新建输出文件夹
cheom_out = os.path.join(Exe_Path, "cheom_out")
if not os.path.exists(cheom_out):
os.makedirs(cheom_out)
ps_ls = []
for i in range(ps_num):
qd.put(None)
chemo_ps = Process(target=py_chemo, args=(qd,), daemon=True)
chemo_ps.start()
ps_ls.append(chemo_ps)
# 检测在运行的进程
temp_num = ps_num
while (1):
ps_runs = 0
for ps in ps_ls:
if ps.is_alive():
ps_runs += 1
if ps_runs < temp_num:
temp_num = ps_runs
print('\r在运行的进程数:%02d' % temp_num, end='')
if ps_runs == 0:
break
print('\n所有分析进程结束')
if __name__ == '__main__':
Get_Opts()
Get_Bams()
print(len(Bam_Dd))
try:
Process_Chemo()
except BaseException:
GLog.error(traceback.format_exc())
print(traceback.format_exc())
endtime = datetime.datetime.now()
GLog.info('end')
GLog.info('run time:%s seconds' % ((endtime - Start_Time).seconds))
GLog.close()