#!/usr/bin/env python3 # -*- coding: UTF-8 -*- """ Created on: 2023-02-17 @author: cjs # 用途:统计放化疗的结果,看看rs所有的分型是不是都被检测到 # 版本:0.0.1 # 最后编辑日期: 2023-02-17 """ from cjs_test.cjs_logger import Logger from openpyxl import load_workbook from glob import glob import datetime import traceback import os import sys # 全局参数 Exe_Bin = '' Exe_Path = '' GLog = None Start_Time = None Drug_Dd = {} # 命令参数 HEAD_S = '基因名称' # 需要处理列的开始 HEAD_E = '证据等级' # 需要处理列的结束 def Exit_Print(pline=''): """显示错误的信息,退出脚本.""" print('%s -O Pro_Out' % Exe_Bin) if len(pline) > 0: print(pline) if GLog: GLog.info('exit') GLog.close() sys.exit(0) # 处理运行参数 def Get_Opts(): """获取运行的环境变量.""" global Exe_Bin global Exe_Path global GLog global Start_Time file_real = os.path.realpath(sys.argv[0]) Exe_Path = os.path.dirname(file_real) Exe_Bin = os.path.basename(file_real) Start_Time = datetime.datetime.now() ymd = Start_Time.__format__('%Y%m%d_%H%M%S') # 开启日志 log_path = os.path.join(Exe_Path, 'logs', Exe_Bin) if not os.path.exists(log_path): os.makedirs(log_path) log_base = '%s_%s.log' % (Exe_Bin, ymd) log_full = os.path.join(log_path, log_base) GLog = Logger(log_full, mode='w') GLog.info('start') return 0 def Get_Rs(): """获取rs的所有分型.""" global Drug_Dd file = '650.panel化疗位点注释及指导说明.庞杰.20230403.xlsx' Pro_xlsx = os.path.join(Exe_Path, file) wb = load_workbook(Pro_xlsx, read_only=True, data_only=False) wb_sheets = wb.sheetnames sheet = wb_sheets[0] ws1 = wb[sheet] sheet_rows = [row for row in ws1.rows] # 获取所有行 # 表头处理 head_row = sheet_rows[0] head_lines = [] for cell in head_row: cell_str = str(cell.value) head_lines.append(cell_str) head_start = head_lines.index(HEAD_S) head_end = head_lines.index(HEAD_E) end_pos = 0 for row in sheet_rows[1:]: if end_pos == 1: break row_lines = [] for col_index in range(head_start, head_end + 1): col_str = str(row[col_index].value) if col_str == 'None': end_pos = 1 break row_lines.append(col_str) # 用药信息的字典构建 snp_rs = row_lines[1] snp_type = row_lines[2] if snp_rs not in Drug_Dd: Drug_Dd[snp_rs] = [] Drug_Dd[snp_rs].append(snp_type) wb.close() def Get_Chemo_Txt(): """获取所有chemo的infos.txt .""" txts = [] cheom_out = os.path.join(Exe_Path, "cheom_out") for txt in glob('%s/**/*.drug.infos.txt' % cheom_out, recursive=True): txts.append(txt) return txts def Check_Txts(txts): # 构建字典 rs_dd = {} for rs in Drug_Dd: rs_types = Drug_Dd[rs] if rs not in rs_dd: rs_dd[rs] = {} for rs_type in rs_types: rs_dd[rs][rs_type] = 0 for txt in txts: txt_lines = [] txt_rs = {} # 保障每个txt的rs只统计一次 with open(txt, 'r') as ff: txt_lines = ff.readlines() for line in txt_lines[1:]: lns = line.split('\t') if len(lns) > 1: rs = lns[2] rs_type = lns[3] if rs not in txt_rs: txt_rs[rs] = [] if rs_type not in txt_rs[rs]: txt_rs[rs].append(rs_type) if rs_type in rs_dd[rs]: rs_dd[rs][rs_type] += 1 else: if len(rs_type) == 2: rs_ntype = rs_type[-1] + rs_type[-2] if rs_ntype in rs_dd[rs]: rs_dd[rs][rs_ntype] += 1 else: print('txt:%s, rs:%s, rs_type:%s' % (txt, rs, rs_type)) txt_res = [] for rs in rs_dd: rs_types = rs_dd[rs] rs_miss = 1 for rs_type in rs_types: type_num = rs_dd[rs][rs_type] line = '%s\t%s\t%s\n' % (rs, rs_type, type_num) txt_res.append(line) if type_num != 0: rs_miss = 0 if rs_miss == 1: print("rs:%s, 一直没有被检出" % rs) with open('rs.chemo.res.txt' ,'w') as ff: ff.writelines(txt_res) if __name__ == '__main__': Get_Opts() Get_Rs() txts = Get_Chemo_Txt() print(len(txts)) Check_Txts(txts) try: pass except BaseException: GLog.error(traceback.format_exc()) print(traceback.format_exc()) endtime = datetime.datetime.now() GLog.info('end') GLog.info('run time:%s seconds' % ((endtime - Start_Time).seconds)) GLog.close()