初始化
commit
260d86d3f1
|
|
@ -0,0 +1,215 @@
|
|||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# ---> Perl
|
||||
!Build/
|
||||
.last_cover_stats
|
||||
/META.yml
|
||||
/META.json
|
||||
/MYMETA.*
|
||||
*.o
|
||||
*.pm.tdy
|
||||
*.bs
|
||||
|
||||
# Devel::Cover
|
||||
cover_db/
|
||||
|
||||
# Devel::NYTProf
|
||||
nytprof.out
|
||||
|
||||
# Dist::Zilla
|
||||
/.build/
|
||||
|
||||
# Module::Build
|
||||
_build/
|
||||
Build
|
||||
Build.bat
|
||||
|
||||
# Module::Install
|
||||
inc/
|
||||
|
||||
# ExtUtils::MakeMaker
|
||||
/blib/
|
||||
/_eumm/
|
||||
/*.gz
|
||||
/Makefile
|
||||
/Makefile.old
|
||||
/MANIFEST.bak
|
||||
/pm_to_blib
|
||||
/*.zip
|
||||
|
||||
# ---> Perl6
|
||||
# Gitignore for Perl 6 (http://www.perl6.org)
|
||||
# As part of https://github.com/github/gitignore
|
||||
|
||||
# precompiled files
|
||||
.precomp
|
||||
lib/.precomp
|
||||
|
||||
nohup.out
|
||||
|
||||
log/*
|
||||
!log/readme.md
|
||||
example/*
|
||||
!example/readme.md
|
||||
|
||||
/.report/
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
癌种 用药方案 方案缩写 source
|
||||
非小细胞肺癌 顺铂+紫杉醇 TP lung85gene
|
||||
非小细胞肺癌 卡铂+紫杉醇 TP lung85gene
|
||||
非小细胞肺癌 顺铂+紫杉醇脂质体 LP lung85gene
|
||||
非小细胞肺癌 卡铂+紫杉醇脂质体 LP lung85gene
|
||||
非小细胞肺癌 顺铂+白蛋白紫杉醇 nab-TP lung85gene
|
||||
非小细胞肺癌 卡铂+白蛋白紫杉醇 nab-TP lung85gene
|
||||
非小细胞肺癌 顺铂+多西他赛 DP lung85gene
|
||||
非小细胞肺癌 卡铂+多西他赛 DP lung85gene
|
||||
非小细胞肺癌 奈达铂+多西他赛 DP lung85gene
|
||||
非小细胞肺癌 顺铂+吉西他滨 GP lung85gene
|
||||
非小细胞肺癌 卡铂+吉西他滨 GP lung85gene
|
||||
非小细胞肺癌 顺铂+培美曲塞 PP lung85gene
|
||||
非小细胞肺癌 卡铂+培美曲塞 PP lung85gene
|
||||
非小细胞肺癌 顺铂+长春瑞滨 NP lung85gene
|
||||
非小细胞肺癌 顺铂+依托泊苷 EP lung85gene
|
||||
小细胞肺癌 顺铂+依托泊苷 EP lung85gene
|
||||
小细胞肺癌 卡铂+依托泊苷 EC lung85gene
|
||||
小细胞肺癌 洛铂+依托泊苷 EL lung85gene
|
||||
小细胞肺癌 顺铂+伊立替康 IP lung85gene
|
||||
小细胞肺癌 卡铂+伊立替康 IC lung85gene
|
||||
结直肠癌 奥沙利铂+亚叶酸钙+氟尿嘧啶 FOLFOX crc88gene
|
||||
结直肠癌 伊立替康+亚叶酸钙+氟尿嘧啶 FOLFIRI crc88gene
|
||||
结直肠癌 奥沙利铂+卡培他滨 CAPEOX(又称Xelox) crc88gene
|
||||
结直肠癌 伊立替康+奥沙利铂+亚叶酸钙+氟尿嘧啶 FOLFOXIRI crc88gene
|
||||
结直肠癌 伊立替康+卡培他滨 CapIRI或XELIRI crc88gene
|
||||
结直肠癌 奥沙利铂+雷替曲塞 / crc88gene
|
||||
结直肠癌 伊立替康+雷替曲塞 / crc88gene
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
|
||||
from tools.parsexlsx import run
|
||||
|
||||
|
||||
def main(path):
|
||||
resjson = run(path)
|
||||
res = json.loads(resjson)
|
||||
barcode = res['c']['barcode']
|
||||
tplpath = os.path.join(os.path.dirname(__file__), 'template', 'nreport.docx')
|
||||
tpl = DocxTemplate(tplpath)
|
||||
tpl.render(res)
|
||||
path = os.path.join(os.path.dirname(__file__), 'result', f'{barcode}.docx')
|
||||
tpl.save(path)
|
||||
return path
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1])
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,486 @@
|
|||
#!/usr/bin/python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import numpy as np
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import glob
|
||||
import openpyxl
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from openpyxl.drawing.image import Image
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print(" ".join(['usage:python', sys.argv[0], 'output_dir', 'name']))
|
||||
sys.exit()
|
||||
|
||||
|
||||
def snv_fusion_cnv(output_dir, name):
|
||||
out_xlsx = "".join([output_dir, '/report/', name, '.check_new.xlsx'])
|
||||
|
||||
# genefunction
|
||||
genefunction = {}
|
||||
gf = open("/dataseq/jmdna/codes/reportbase/gene_function.txt", 'r', encoding='utf-8').readlines()
|
||||
for line in gf[1:]:
|
||||
gene = line.strip().split("\t")[0]
|
||||
func = line.strip().split("\t")[1]
|
||||
genefunction[gene.upper()] = func
|
||||
genefunction['.'] = '.'
|
||||
|
||||
##drug_mechanism
|
||||
drug_mechanism = {}
|
||||
drug_fh = open("/dataseq/jmdna/codes/reportbase/target_drug.txt", 'r', encoding='utf-8').readlines()
|
||||
for line in drug_fh[1:]:
|
||||
disease = line.split("\t")[8]
|
||||
mechanism = line.split("\t")[11]
|
||||
drugs = line.split("\t")[0].split('|') + line.split("\t")[1].split('|')
|
||||
if disease or mechanism:
|
||||
for drug in drugs:
|
||||
drug_mechanism[drug.upper()] = "\\\\".join([disease, mechanism]).strip()
|
||||
|
||||
'''
|
||||
snvindel_sheet
|
||||
'''
|
||||
|
||||
##input
|
||||
filter_file = "".join([output_dir, '/report/', name, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'])
|
||||
pos_file = "".join([output_dir, '/mutation/', name, '.snvindel.pos.dedup.txt'])
|
||||
vus_file = "".join([output_dir, '/mutation/', name, '.snvindel.vus.txt'])
|
||||
neg_file = "".join([output_dir, '/mutation/', name, '.snvindel.neg.txt'])
|
||||
##filter_file
|
||||
if os.path.getsize(filter_file) > 0:
|
||||
snv = pd.read_table(filter_file, sep="\t")
|
||||
cols = [index for index, row in snv[snv['可信'] == 0].iterrows()]
|
||||
snv.drop(cols, inplace=True)
|
||||
snv.insert(loc=24, column='ACMG_level', value=0)
|
||||
snv.insert(loc=25, column='Deleterious', value=0)
|
||||
snv.insert(loc=26, column='freq_high', value=0)
|
||||
for index, row in snv.iterrows():
|
||||
if re.search("Likely_pathogenic|drug", (row['CLNSIG']), re.I):
|
||||
snv.loc[index, 'ACMG_level'] = '2'
|
||||
elif re.search("pathogenic", (row['CLNSIG']), re.I) and not re.search("Conflicting", (row['CLNSIG']), re.I):
|
||||
snv.loc[index, 'ACMG_level'] = '1'
|
||||
else:
|
||||
snv.loc[index, 'ACMG_level'] = '3'
|
||||
snv.loc[index, "Deleterious"] = (
|
||||
snv.loc[index, ['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']].tolist().count("D"))
|
||||
snv.loc[index, "freq_high"] = ((snv.loc[
|
||||
index, ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL',
|
||||
'ExAC_nontcga_EAS', 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']]).replace('.', '0')).max()
|
||||
snv_1 = snv.iloc[:, list(range(14)) + [15, 17, 18, 20, 23, 24, 25, 26, 111, 112, 113]]
|
||||
else:
|
||||
snv_1 = pd.DataFrame(columns=[])
|
||||
|
||||
##pos_file
|
||||
if os.path.getsize(pos_file) > 0:
|
||||
pos = pd.read_table(pos_file, sep="\t")
|
||||
pos = pos.iloc[:, [7, 10, 18, 23, 24, 25, 29, 30, 31, 32]]
|
||||
pos_1 = pd.DataFrame(
|
||||
columns=['AAChange.refGene', 'OKBSIG', 'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug',
|
||||
'Response_Type', 'Evidence_Source', 'EfficacyEvidence', 'Drug_Detail', 'Gene_function',
|
||||
'Drug_Category'])
|
||||
pos = list(pos.groupby(['Gene.refGene', 'AAChange.refGene', 'fun_change']))
|
||||
for i in pos:
|
||||
for index, row in i[1].iterrows():
|
||||
drugs = row['药物中文名'].replace(" + ", ",")
|
||||
drugs = list(set(drugs.split(",")))
|
||||
drug_mm = ''
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_mechanism.keys():
|
||||
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||
if row['标签'] == '非适应症':
|
||||
row['证据等级'] = 'C'
|
||||
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||
elif re.search("耐药", row['Response_Type_C']):
|
||||
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||
else:
|
||||
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||
pos_1.loc[len(pos_1)] = [i[0][1], i[0][2], '|'.join(list(i[1]['证据等级'])),
|
||||
'|'.join(list(i[1]['AMP_mut_level'])), '|'.join(list(i[1]['疾病中文名'])),
|
||||
'|'.join(list(i[1]['药物中文名'])), \
|
||||
'|'.join(list(i[1]['Response_Type_C'])), '|'.join(list(i[1]['Evidence_Source_C'])),
|
||||
'|'.join(list(i[1]['EfficacyEvidence'])), '|'.join(list(i[1]['Drug_Detail'])),
|
||||
genefunction[i[0][0].upper()], '|'.join(list(i[1]['Drug_Category']))]
|
||||
else:
|
||||
pos_1 = pd.DataFrame(columns=[])
|
||||
|
||||
##vus_file
|
||||
if os.path.getsize(vus_file) > 0:
|
||||
vus = pd.read_table(vus_file, sep="\t")
|
||||
vus_1 = vus.iloc[:, [9, 17]]
|
||||
vus_1.insert(loc=2, column='AMP_mut_level', value='III')
|
||||
vus_1 = vus_1.rename(columns={'fun_change': 'OKBSIG'})
|
||||
else:
|
||||
vus_1 = pd.DataFrame(columns=[])
|
||||
|
||||
##neg_file
|
||||
if os.path.getsize(neg_file) > 0:
|
||||
neg = pd.read_table(neg_file, sep="\t")
|
||||
neg_1 = neg.iloc[:, [9, 17]]
|
||||
neg_1.insert(loc=2, column='AMP_mut_level', value='IIII')
|
||||
neg_1 = neg_1.rename(columns={'fun_change': 'OKBSIG'})
|
||||
else:
|
||||
neg_1 = pd.DataFrame(columns=[])
|
||||
|
||||
snvindel_sheet = pd.DataFrame(
|
||||
columns=['可信', 'Chr', 'Start', 'End', 'Ref', 'Alt', 'AAChange.refGene', 'mutant_frequency', 'total_reads',
|
||||
'mutant_reads', 'strand_bias', 'Otherinfo10', 'Func.refGene', 'Gene.refGene', 'ExonicFunc.refGene',
|
||||
'avsnp150', 'cosmic91', 'CLNDN', 'CLNSIG', 'ACMG_level', 'Deleterious', 'freq_high', 'OKBSIG',
|
||||
'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source',
|
||||
'EfficacyEvidence', 'Drug_Detail', 'Gene_function', 'Drug_Category', 'Otherinfo11', 'Otherinfo12',
|
||||
'Otherinfo13'])
|
||||
pos_vus_neg = pd.concat([pos_1, vus_1, neg_1])
|
||||
snv_pos_vus_neg = snv_1.merge(pos_vus_neg, how='left', on='AAChange.refGene')
|
||||
snvindel_sheet = pd.concat([snvindel_sheet, snv_pos_vus_neg])
|
||||
snvindel_sheet.rename(columns={"可信": "Validated"})
|
||||
snvindel_sheet = snvindel_sheet.replace(np.nan, '.')
|
||||
snvindel_sheet.rename(columns={"可信": "Validated"}, inplace=True)
|
||||
|
||||
'''
|
||||
fusion_sheet
|
||||
'''
|
||||
|
||||
fusion_pos_file = "".join([output_dir, '/fusion/', name, '.fusion.pos.dedup.txt'])
|
||||
fusion_vus_file = "".join([output_dir, '/fusion/', name, '.fusion.vus.txt'])
|
||||
if os.path.getsize(fusion_pos_file) > 0:
|
||||
fusion_pos = pd.read_table(fusion_pos_file, sep="\t")
|
||||
else:
|
||||
fusion_pos = pd.DataFrame(columns=[])
|
||||
|
||||
if os.path.getsize(fusion_vus_file) > 0:
|
||||
fusion_vus = pd.read_table(fusion_vus_file, sep="\t")
|
||||
fusion_vus.insert(loc=0, column='可信', value=1)
|
||||
else:
|
||||
fusion_vus = pd.DataFrame(columns=[])
|
||||
fusion_pos_vus = pd.concat([fusion_pos, fusion_vus])
|
||||
fusion_sheet = pd.DataFrame(
|
||||
columns=['Validated', 'CHROM1', 'POS1', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'Support_reads(PE:SR)',
|
||||
'Depth', 'FREQ1', 'FREQ2', 'OKBSIG', 'AMP_evidence_level', \
|
||||
'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence',
|
||||
'Drug_Detail', 'Gene_function', 'Drug_Category', 'INFO', 'FORMAT', 'Sample'])
|
||||
|
||||
if not fusion_pos_vus.empty:
|
||||
fusion_pos_vus = fusion_pos_vus.replace(np.nan, '.')
|
||||
fusion = list(fusion_pos_vus.groupby(
|
||||
['可信', '#CHROM', 'POS', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'FREQ1', 'FREQ2', 'fun_change',
|
||||
'INFO', 'FORMAT', name, 'Gene_Symbol']))
|
||||
for i in fusion:
|
||||
for index, row in i[1].iterrows():
|
||||
drugs = row['药物中文名'].replace(" + ", ",")
|
||||
drugs = list(set(drugs.split(",")))
|
||||
drug_mm = ''
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_mechanism.keys():
|
||||
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||
if row['标签'] == '非适应症':
|
||||
row['证据等级'] = 'C'
|
||||
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||
elif re.search("耐药", row['Response_Type_C']):
|
||||
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||
elif row['Response_Type_C'] == '.':
|
||||
i[1].loc[index, ['Drug_Category']] = '.'
|
||||
else:
|
||||
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||
fusion_sheet.loc[len(fusion_sheet)] = list(i[0][0:8]) + [i[0][13].split(":")[1],
|
||||
i[0][13].split(":")[7]] + list(i[0][8:11]) + [
|
||||
'|'.join(list(i[1]['证据等级'])),
|
||||
'|'.join(list(i[1]['AMP_mut_level'])), \
|
||||
'|'.join(list(i[1]['疾病中文名'])), '|'.join(list(i[1]['药物中文名'])),
|
||||
'|'.join(list(i[1]['Response_Type_C'])),
|
||||
'|'.join(list(i[1]['Evidence_Source_C'])),
|
||||
'|'.join(list(i[1]['EfficacyEvidence'])), \
|
||||
'|'.join(list(i[1]['Drug_Detail'])),
|
||||
genefunction[i[0][14].upper()],
|
||||
'|'.join(list(i[1]['Drug_Category']))] + list(i[0][11:14])
|
||||
fusion_sheet = fusion_sheet.replace(np.nan, '.')
|
||||
|
||||
'''
|
||||
cnv_sheet
|
||||
'''
|
||||
|
||||
cnv_pos_file = "/home/jm001/test/reference_standard/lung85gene/Tissue/BKDL202603539-1a/cnvkit/BKDL202603539-1a.cnv.pos.dedup.txt"
|
||||
cnv_sheet = pd.DataFrame(
|
||||
columns=['Validated', 'Chromosome', 'Start', 'End', 'Gene', 'Depth', 'Probes', 'Copy_number', 'OKBSIG',
|
||||
'Gene_Symbol', 'AMP_evidence_level', 'AMP_mut_level', \
|
||||
'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence', 'Drug_Detail',
|
||||
'Gene_Function', 'Drug_Category'])
|
||||
if os.path.getsize(cnv_pos_file) > 0:
|
||||
cnv_pos = pd.read_table(cnv_pos_file, sep="\t")
|
||||
cnv = list(cnv_pos.groupby(
|
||||
['可信', 'chromosome', 'start', 'end', 'gene', 'depth', 'probes', 'cn', 'fun_change', 'Gene_Symbol']))
|
||||
for i in cnv:
|
||||
for index, row in i[1].iterrows():
|
||||
drugs = row['药物中文名'].replace(" + ", ",")
|
||||
drugs = list(set(drugs.split(",")))
|
||||
drug_mm = ''
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_mechanism.keys():
|
||||
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||
if row['标签'] == '非适应症':
|
||||
row['证据等级'] = 'C'
|
||||
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||
elif re.search("耐药", row['Response_Type_C']):
|
||||
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||
elif row['Response_Type_C'] == '.':
|
||||
i[1].loc[index, ['Drug_Category']] = '.'
|
||||
else:
|
||||
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||
cnv_sheet.loc[len(cnv_sheet)] = list(i[0][0:10]) + ['|'.join(list(i[1]['证据等级'])),
|
||||
'|'.join(list(i[1]['AMP_mut_level'])), \
|
||||
'|'.join(list(i[1]['疾病中文名'])),
|
||||
'|'.join(list(i[1]['药物中文名'])),
|
||||
'|'.join(list(i[1]['Response_Type_C'])),
|
||||
'|'.join(list(i[1]['Evidence_Source_C'])),
|
||||
'|'.join(list(i[1]['EfficacyEvidence'])), \
|
||||
'|'.join(list(i[1]['Drug_Detail'])),
|
||||
genefunction[i[0][9].upper()],
|
||||
'|'.join(list(i[1]['Drug_Category']))]
|
||||
else:
|
||||
cnv_pos = pd.DataFrame(columns=[])
|
||||
|
||||
with pd.ExcelWriter(out_xlsx) as writer:
|
||||
snvindel_sheet.to_excel(writer, sheet_name="snvindel", index=False)
|
||||
fusion_sheet.to_excel(writer, sheet_name="fusion", index=False)
|
||||
cnv_sheet.to_excel(writer, sheet_name="cnv", index=False)
|
||||
|
||||
##加入cnvkit/*.cnv.png
|
||||
wb = openpyxl.load_workbook(filename=out_xlsx)
|
||||
ws = wb['cnv']
|
||||
mr = ws.max_row
|
||||
cell = 'C' + str(mr + 4)
|
||||
cnv_pic = "".join([output_dir, '/cnvkit/', name, '.cnv.png'])
|
||||
image = Image(cnv_pic)
|
||||
ws.add_image(image, cell)
|
||||
wb.save(out_xlsx)
|
||||
|
||||
|
||||
class PostProcess:
|
||||
"""
|
||||
excel处理
|
||||
"""
|
||||
|
||||
def __init__(self, path, outpath):
|
||||
self.path = path
|
||||
self.outpath = outpath
|
||||
self.neeecol = self.need_col()
|
||||
|
||||
def need_col(self):
|
||||
"""
|
||||
读取所需列
|
||||
"""
|
||||
path = os.path.join(os.path.dirname(__file__), 'columns.csv')
|
||||
cols = pd.read_csv(path)
|
||||
cols = cols.fillna('')
|
||||
cols_record = cols.to_dict('list')
|
||||
for sheet in cols_record:
|
||||
cols_record[sheet] = [x for x in cols_record[sheet] if x]
|
||||
return cols_record
|
||||
|
||||
def msi(self):
|
||||
"""
|
||||
Process msi result files
|
||||
"""
|
||||
msi_files = glob.glob(os.path.join(self.path, 'MSI', '*.msi'))
|
||||
msi_res = dict()
|
||||
if msi_files:
|
||||
df = pd.read_csv(msi_files[0], sep='\t')
|
||||
res = df.to_dict('records')[0]
|
||||
msi_res['msi_count'] = res['Total_Number_of_Sites']
|
||||
msi_res['msi_value'] = res['%']
|
||||
if msi_res['msi_value'] >= 0.3:
|
||||
msi_res['msi_result'] = 'MSI-H'
|
||||
msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感'
|
||||
else:
|
||||
msi_res['msi_result'] = 'MSS'
|
||||
msi_res['msi_predict'] = '对免疫检查点抑制剂可能不敏感'
|
||||
return [msi_res]
|
||||
|
||||
def chemo(self):
|
||||
"""
|
||||
化疗
|
||||
"""
|
||||
chemo_files = glob.glob(os.path.join(self.path, 'chemo', '*chemo.res.txt'))
|
||||
chemo_res = []
|
||||
if chemo_files:
|
||||
df = pd.read_csv(chemo_files[0], sep='\t')
|
||||
df = df.fillna('.')
|
||||
chemo_res = df.to_dict('records')
|
||||
return chemo_res
|
||||
|
||||
def heredity(self):
|
||||
"""
|
||||
遗传
|
||||
"""
|
||||
heredi_files = glob.glob(os.path.join(self.path, 'mutation', '*Germline*filtered.txt'))
|
||||
heredires = []
|
||||
if heredi_files:
|
||||
df = pd.read_csv(heredi_files[0], sep='\t')
|
||||
df = df.fillna('.')
|
||||
tmdf1 = df[
|
||||
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
|
||||
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
|
||||
df['freq_high'] = tmdf1.max(axis=1)
|
||||
tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
|
||||
df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
|
||||
df_need = df[self.neeecol.get('HCS', [])]
|
||||
try:
|
||||
heredires = df_need.to_dict('records')
|
||||
except KeyError as e:
|
||||
raise UserWarning('表头设置和配置文件不对应', e)
|
||||
return heredires
|
||||
|
||||
def MMR(self):
|
||||
"""
|
||||
MMR
|
||||
"""
|
||||
mmr_files = glob.glob(os.path.join(self.path, 'MMR', '*mmr.pre.txt'))
|
||||
mmr = []
|
||||
if mmr_files:
|
||||
df = pd.read_csv(mmr_files[0], sep='\t')
|
||||
df = df.fillna('.')
|
||||
tmdf1 = df[
|
||||
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
|
||||
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
|
||||
df['freq_high'] = tmdf1.max(axis=1)
|
||||
tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
|
||||
df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
|
||||
df_need = df[self.neeecol.get('HCS', [])]
|
||||
try:
|
||||
mmr = df_need.to_dict('records')
|
||||
except KeyError as e:
|
||||
raise UserWarning('表头设置和配置文件不对应', e)
|
||||
return mmr
|
||||
|
||||
def hotspot(self):
|
||||
hotspot_files = glob.glob(
|
||||
os.path.join(self.path, 'mutation', 'hotspot', '*hotspot.snp.indel.filter.anno.hg19_multianno.txt'))
|
||||
if hotspot_files:
|
||||
return self.txt_2_excel(hotspot_files[0])
|
||||
|
||||
def splicing(self):
|
||||
splicing_files = glob.glob(
|
||||
os.path.join(self.path, 'mutation', '*.target.splicing.txt'))
|
||||
if splicing_files:
|
||||
return self.txt_2_excel(splicing_files[0])
|
||||
|
||||
def indication(self):
|
||||
indication_files = glob.glob(
|
||||
os.path.join(self.path, 'mutation', '*indication.txt'))
|
||||
if indication_files:
|
||||
return self.txt_2_excel(indication_files[0])
|
||||
|
||||
def longindel(self):
|
||||
longindel_files = glob.glob(
|
||||
os.path.join(self.path, 'fusion', '*.longindel.pos.txt'))
|
||||
if longindel_files:
|
||||
return self.txt_2_excel(longindel_files[0])
|
||||
|
||||
def cms(self):
|
||||
"""
|
||||
样本信息
|
||||
"""
|
||||
cms_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
|
||||
cms_info_need = []
|
||||
if cms_files:
|
||||
file_read = open(cms_files[0], 'r')
|
||||
cms_info = json.load(file_read)['data']
|
||||
file_read.close()
|
||||
df = pd.DataFrame(cms_info)
|
||||
df_need = df[self.neeecol.get('sample_info', [])]
|
||||
try:
|
||||
cms_info_need = df_need.to_dict('records')
|
||||
except KeyError as e:
|
||||
raise UserWarning('表头设置和配置文件不对应', e)
|
||||
return cms_info_need
|
||||
|
||||
def qc(self):
|
||||
qc_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
|
||||
qc_res = []
|
||||
if qc_files:
|
||||
df = pd.read_csv(qc_files[0], sep='\t', header=None)
|
||||
df = df.set_index(0).T
|
||||
qc_res = df.to_dict('records')
|
||||
return qc_res
|
||||
|
||||
#
|
||||
# def snv(self):
|
||||
# # filter file
|
||||
# filter_files = glob.glob(os.path.join(self.path, 'report', '*snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'))
|
||||
# if filter_files:
|
||||
# snv = pd.read_csv(filter_files[0], sep="\t")
|
||||
# def sign_drug_Category(x):
|
||||
# if '敏感' in x['Response_Type_C'] and x['证据等级'] == 'A':
|
||||
# return 'a'
|
||||
# elif '敏感' in x['Response_Type_C'] and x['证据等级'] == 'C':
|
||||
# return 'b'
|
||||
# elif '耐药' in x['Response_Type_C']:
|
||||
# return 'd'
|
||||
# else:
|
||||
# return 'c'
|
||||
# # pos_file 处理
|
||||
# pos_files = glob.glob(os.path.join(self.path, 'mutation', '*snvindel.pos.txt'))
|
||||
# if pos_files:
|
||||
# pos = pd.read_csv(pos_files[0], sep='\t')
|
||||
# pos['证据等级'] = pos.apply(lambda x: 'C' if x['标签'] == '非适应症' else x['证据等级'], axis=1)
|
||||
# pos['Drug_Category'] = pos.apply(sign_drug_Category, axis=1)
|
||||
# pos['AMP_mut_level'] = pos['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||
# agg_list = ['证据等级', 'AMP_mut_level', '疾病中文名', '药物中文名', '证据等级', 'Response_Type_C', 'Evidence_Source_C',
|
||||
# 'EfficacyEvidence', 'Drug_Category']
|
||||
# agg_dict = {column: ','.join for column in agg_list}
|
||||
# pos_group =pos.groupby(['Gene.refGene','AAChange.refGene','fun_change']).agg(agg_dict, axis=1)
|
||||
|
||||
def txt_2_excel(self, path):
|
||||
try:
|
||||
df = pd.read_csv(path, sep='\t')
|
||||
except pd.errors.EmptyDataError:
|
||||
return []
|
||||
return df.to_dict('records')
|
||||
|
||||
def collect(self):
|
||||
writer = pd.ExcelWriter(self.outpath, mode='a', engine='openpyxl')
|
||||
sheet = {
|
||||
'MSI': self.msi(),
|
||||
'chemo': self.chemo(),
|
||||
'HCS': self.heredity(),
|
||||
'sample_info': self.cms(),
|
||||
'MMR': self.MMR(),
|
||||
'hotspot': self.hotspot(),
|
||||
'MET': self.splicing(),
|
||||
'indication': self.indication(),
|
||||
'longindel': self.longindel(),
|
||||
'qc': self.qc()
|
||||
}
|
||||
# 遍历CSV文件列表
|
||||
for sheet_name in sheet:
|
||||
# 读取CSV文件为DataFrame
|
||||
df = pd.DataFrame(sheet[sheet_name])
|
||||
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
# 保存并关闭Excel写入器
|
||||
writer.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
snv_fusion_cnv(sys.argv[1], sys.argv[2])
|
||||
# 未加日志,未添加路径
|
||||
out_xlsx = "".join([sys.argv[1], '/report/', sys.argv[2], '.check_new.xlsx'])
|
||||
postprocess = PostProcess(sys.argv[1], out_xlsx)
|
||||
postprocess.collect()
|
||||
|
|
@ -0,0 +1,864 @@
|
|||
#!/usr/bin/python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
##https://www.pianshen.com/article/5314917437/
|
||||
##https://zhuanlan.zhihu.com/p/366902690
|
||||
##https://itpcb.com/a/277599
|
||||
import docxtpl
|
||||
from docx.shared import Mm
|
||||
from docxtpl import DocxTemplate,RichText
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
|
||||
if len(sys.argv) != 5:
|
||||
print(" ".join(['usage:python',sys.argv[0],'output_dir','tumor','sample_type(t for tissue,c for cfdna)']),'projcet')
|
||||
sys.exit()
|
||||
|
||||
|
||||
output_dir=sys.argv[1]
|
||||
name=sys.argv[2]
|
||||
Sample_type=sys.argv[3]
|
||||
projcet=sys.argv[4]
|
||||
snv_base="".join([name,'.snvindel.pos.dedup.txt'])
|
||||
snv_file='/'.join([output_dir,'mutation',snv_base])
|
||||
snv_base_vus="".join([name,'.snvindel.vus.txt'])
|
||||
snv_file_vus='/'.join([output_dir,'mutation',snv_base_vus])
|
||||
fusion_base="".join([name,'.fusion.pos.dedup.txt'])
|
||||
fusion_file='/'.join([output_dir,'fusion',fusion_base])
|
||||
cnv_base="".join([name,'.cnv.pos.dedup.txt'])
|
||||
cnv_file='/'.join([output_dir,'cnvkit',cnv_base])
|
||||
qc_base=''.join([name,'_qc.txt'])
|
||||
qc_file='/'.join([output_dir,'qc',qc_base])
|
||||
report_base="".join([name,'_report.docx'])
|
||||
report_file='/'.join([output_dir,'report',report_base])
|
||||
indication_file="".join([output_dir,'/mutation/','indication.txt'])
|
||||
|
||||
context = {'list1':[],'list2':{},'list3':{},'clingene1':[],'clingene2':[],'nonclingenes':[],'genefunc':{},
|
||||
'indication':[],'mmr':[],'chemo':[]}
|
||||
|
||||
|
||||
#genefunction
|
||||
genefunction={}
|
||||
gf=open("/dataseq/jmdna/codes/reportbase/gene_function.txt",'r',encoding='utf-8').readlines()
|
||||
for line in gf[1:]:
|
||||
gene=line.strip().split("\t")[0]
|
||||
func=line.strip().split("\t")[1]
|
||||
genefunction[gene.upper()]=func
|
||||
|
||||
##sensitive_resistant_drug
|
||||
drug_disease={}
|
||||
drug_mechanism={}
|
||||
drug_fh=open("/dataseq/jmdna/codes/reportbase/target_drug.txt",'r',encoding='utf-8').readlines()
|
||||
for line in drug_fh[1:]:
|
||||
disease=line.split("\t")[8]
|
||||
mechanism=line.split("\t")[11]
|
||||
drugs=line.split("\t")[0].split('|')
|
||||
if disease or mechanism:
|
||||
for drug in drugs:
|
||||
drug_disease[drug.upper()]=disease
|
||||
drug_mechanism[drug.upper()]=mechanism
|
||||
|
||||
|
||||
sensitive_resistant_drug=[{'type':'可能敏感药物','drug':[]},{'type':'可能耐药药物','drug':[]}]
|
||||
|
||||
##somatic snvindel处理
|
||||
snv_size = os.path.getsize(snv_file)
|
||||
if snv_size>0:
|
||||
snv=pd.read_table(snv_file,sep="\t")
|
||||
cols=[index for index,row in snv[snv['可信']==0].iterrows()]
|
||||
snv.drop(cols,inplace=True)
|
||||
genes=snv['Gene.refGene'].drop_duplicates()
|
||||
if len(genes):
|
||||
for gene in genes:
|
||||
rt={}
|
||||
rt[gene]=[]
|
||||
muts=snv['AAChange.refGene'][snv['Gene.refGene']==gene].drop_duplicates()
|
||||
for mut in muts:
|
||||
info2={mut:[]}
|
||||
for index,row in snv[snv['AAChange.refGene']==mut].iterrows():
|
||||
info3={}
|
||||
info3['drug']=row['药物中文名']
|
||||
info3['effect']=row['Response_Type_C']
|
||||
info3['tumor']=row['疾病中文名']
|
||||
info3['evidence']=row['Evidence_Source_C']
|
||||
info3['sig']=row['EfficacyEvidence']
|
||||
info2[mut].append(info3)
|
||||
##sensitive_resistant_drug
|
||||
drugs=row['Drug'].replace(" + ",",")
|
||||
drugs=drugs.split(",")
|
||||
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||
drugs_chinese=drugs_chinese.split(",")
|
||||
bool=0
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_disease.keys():
|
||||
if re.search(r'敏感',row['Response_Type_C']):
|
||||
sensitive_drug={}
|
||||
sensitive_drug['name']=drugs_chinese[bool]
|
||||
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||
elif re.search(r'耐药',row['Response_Type_C']):
|
||||
resistant_drug={}
|
||||
resistant_drug['name']=drugs_chinese[bool]
|
||||
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||
bool+=1
|
||||
context['list2'][mut]=info2[mut]
|
||||
info={}
|
||||
info['gene']=snv['Gene.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
m=re.search(r'(p\..*)$',mut)
|
||||
if m:
|
||||
info['p']=m.group(1)
|
||||
else:
|
||||
m=re.search(r'(c\..*)$',mut)
|
||||
info['p']=m.group(1)
|
||||
info['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
A=[]
|
||||
B=[]
|
||||
C=[]
|
||||
D=[]
|
||||
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='A'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
A.extend(ds_new)
|
||||
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='非适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='C'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
B.extend(ds_new)
|
||||
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='.') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence=row['证据等级']
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
C.extend(ds_new)
|
||||
for index,row in snv[(snv['AAChange.refGene']==mut) & snv['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||
evidence=''
|
||||
if row['标签']=='非适应症':
|
||||
evidence='C'
|
||||
else:
|
||||
evidence=row['证据等级']
|
||||
ds=row['药物中文名'].split(",")
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
D.extend(ds_new)
|
||||
A=sorted(set(A),key=A.index)
|
||||
B=sorted(set(B),key=B.index)
|
||||
C=sorted(set(C),key=C.index)
|
||||
D=sorted(set(D),key=D.index)
|
||||
info['drug_A']="\n".join(A)
|
||||
info['drug_B']="\n".join(B)
|
||||
info['drug_C']="\n".join(C)
|
||||
info['drug_D']="\n".join(D)
|
||||
if not info['drug_A']:
|
||||
info['drug_A']='/'
|
||||
if not info['drug_B']:
|
||||
info['drug_B']='/'
|
||||
if not info['drug_C']:
|
||||
info['drug_C']='/'
|
||||
if not info['drug_D']:
|
||||
info['drug_D']='/'
|
||||
context['list1'].append(info)
|
||||
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
||||
##genefunc
|
||||
context['genefunc'][mut]=genefunction[gene.upper()]
|
||||
##clingenes
|
||||
clingene1={}
|
||||
clingene2={}
|
||||
for index,row in snv[(snv['AAChange.refGene']==mut)].iterrows():
|
||||
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||
clingene1['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
||||
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||
else:
|
||||
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||
clingene1['aacid'] = '/'
|
||||
clingene1['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
if re.match("nonsynonymous SNV",clingene1['muttype']):
|
||||
clingene1['muttype']='错义突变'
|
||||
elif re.search("^frameshift",clingene1['muttype']):
|
||||
clingene1['muttype']='移码突变'
|
||||
elif re.search("^nonframeshift",clingene1['muttype']):
|
||||
clingene1['muttype']='非移码突变'
|
||||
elif re.match("stopgain",clingene1['muttype']):
|
||||
clingene1['muttype']='提前终止'
|
||||
else:
|
||||
clingene1['muttype']='/'
|
||||
context['clingene1'].append(clingene1)
|
||||
else:
|
||||
clingene2['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
||||
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||
else:
|
||||
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||
clingene2['aacid'] = '/'
|
||||
clingene2['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||
if re.match("nonsynonymous SNV",clingene2['muttype']):
|
||||
clingene2['muttype']='错义突变'
|
||||
elif re.search("^frameshift",clingene2['muttype']):
|
||||
clingene2['muttype']='移码突变'
|
||||
elif re.search("^nonframeshift",clingene2['muttype']):
|
||||
clingene2['muttype']='非移码突变'
|
||||
elif re.match("stopgain",clingene2['muttype']):
|
||||
clingene2['muttype']='提前终止'
|
||||
else:
|
||||
clingene2['muttype']='/'
|
||||
context['clingene2'].append(clingene2)
|
||||
break
|
||||
context['list3'][gene]="\n".join(rt[gene])
|
||||
else:
|
||||
snv_size=0
|
||||
|
||||
|
||||
##target vus and nontarget vus
|
||||
|
||||
snv_size_vus = os.path.getsize(snv_file_vus)
|
||||
if snv_size_vus>0:
|
||||
snv_vus=pd.read_table(snv_file_vus,sep="\t")
|
||||
for index,row in snv_vus.iterrows():
|
||||
if float(row['Freq'].replace('%',''))>=2:
|
||||
nonclingene={}
|
||||
if len(row['AAChange.refGene'].split(":")) == 5:
|
||||
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'],nonclingene['aacid'])=row['AAChange.refGene'].split(":")
|
||||
else:
|
||||
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'])=row['AAChange.refGene'].split(":")
|
||||
nonclingene['aacid'] = '/'
|
||||
nonclingene['freq']=row['Freq']
|
||||
nonclingene['muttype']=row['ExonicFunc.refGene']
|
||||
if re.match("nonsynonymous SNV",nonclingene['muttype']):
|
||||
nonclingene['muttype']='错义突变'
|
||||
elif re.search("^frameshift",nonclingene['muttype']):
|
||||
nonclingene['muttype']='移码突变'
|
||||
elif re.search("^nonframeshift",nonclingene['muttype']):
|
||||
nonclingene['muttype']='非移码突变'
|
||||
elif re.match("stopgain",nonclingene['muttype']):
|
||||
nonclingene['muttype']='提前终止'
|
||||
else:
|
||||
nonclingene['muttype']='/'
|
||||
context['nonclingenes'].append(nonclingene)
|
||||
|
||||
|
||||
##fusion处理
|
||||
fusion_size = os.path.getsize(fusion_file)
|
||||
if fusion_size>0:
|
||||
fusion=pd.read_table(fusion_file,sep="\t")
|
||||
cols=[index for index,row in fusion[fusion['可信']==0].iterrows()]
|
||||
fusion.drop(cols,inplace=True)
|
||||
genes=fusion['Gene_Symbol'].drop_duplicates()
|
||||
if len(genes):
|
||||
for gene in genes:
|
||||
rt={}
|
||||
rt[gene]=[]
|
||||
fusions=fusion['FUSION'][fusion['Gene_Symbol']==gene].drop_duplicates()
|
||||
for mut in fusions:
|
||||
info2={mut:[]}
|
||||
for index,row in fusion[fusion['FUSION']==mut].iterrows():
|
||||
info3={}
|
||||
info3['drug']=row['药物中文名']
|
||||
info3['effect']=row['Response_Type_C']
|
||||
info3['tumor']=row['疾病中文名']
|
||||
info3['evidence']=row['Evidence_Source_C']
|
||||
info3['sig']=row['EfficacyEvidence']
|
||||
info2[mut].append(info3)
|
||||
##sensitive_resistant_drug
|
||||
drugs=row['Drug'].replace(" + ",",")
|
||||
drugs=drugs.split(",")
|
||||
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||
drugs_chinese=drugs_chinese.split(",")
|
||||
bool=0
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_disease.keys():
|
||||
if re.search(r'敏感',row['Response_Type_C']):
|
||||
sensitive_drug={}
|
||||
sensitive_drug['name']=drugs_chinese[bool]
|
||||
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||
elif re.search(r'耐药',row['Response_Type_C']):
|
||||
resistant_drug={}
|
||||
resistant_drug['name']=drugs_chinese[bool]
|
||||
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||
bool+=1
|
||||
context['list2'][mut]=info2[mut]
|
||||
info={}
|
||||
info['gene']=mut
|
||||
info['p']='融合'
|
||||
info['freq']="".join([str(fusion['FREQ1'][fusion['FUSION']==mut].reset_index(drop=True)[0]),'%'])
|
||||
A=[]
|
||||
B=[]
|
||||
C=[]
|
||||
D=[]
|
||||
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='A'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
A.extend(ds_new)
|
||||
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='C'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
B.extend(ds_new)
|
||||
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence=row['证据等级']
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
C.extend(ds_new)
|
||||
for index,row in fusion[(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||
evidence=''
|
||||
if row['标签']=='非适应症':
|
||||
evidence='C'
|
||||
else:
|
||||
evidence=row['证据等级']
|
||||
ds=row['药物中文名'].split(",")
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
D.extend(ds_new)
|
||||
A=sorted(set(A),key=A.index)
|
||||
B=sorted(set(B),key=B.index)
|
||||
C=sorted(set(C),key=C.index)
|
||||
D=sorted(set(D),key=D.index)
|
||||
info['drug_A']="\n".join(A)
|
||||
info['drug_B']="\n".join(B)
|
||||
info['drug_C']="\n".join(C)
|
||||
info['drug_D']="\n".join(D)
|
||||
# info['drug_A']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_B']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_C']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_D']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
||||
if not info['drug_A']:
|
||||
info['drug_A']='/'
|
||||
if not info['drug_B']:
|
||||
info['drug_B']='/'
|
||||
if not info['drug_C']:
|
||||
info['drug_C']='/'
|
||||
if not info['drug_D']:
|
||||
info['drug_D']='/'
|
||||
context['list1'].append(info)
|
||||
rt[gene].append("".join([info['gene'],'(',info['freq'],')']))
|
||||
##genefunc
|
||||
context['genefunc'][mut]=genefunction[gene.upper()]
|
||||
##clingenes
|
||||
clingene1={}
|
||||
clingene2={}
|
||||
for index,row in fusion[(fusion['FUSION']==mut)].iterrows():
|
||||
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||
clingene1['gene']= info['gene']
|
||||
clingene1['freq']= info['freq']
|
||||
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
||||
clingene1['muttype']= '融合'
|
||||
context['clingene1'].append(clingene1)
|
||||
else:
|
||||
clingene2['gene']= info['gene']
|
||||
clingene2['freq']= info['freq']
|
||||
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
||||
clingene2['muttype']= '融合'
|
||||
context['clingene2'].append(clingene2)
|
||||
context['list3'][gene]="\n".join(rt[gene])
|
||||
else:
|
||||
fusion_size=0
|
||||
|
||||
|
||||
##cnv处理
|
||||
cnv_size = os.path.getsize(cnv_file)
|
||||
if cnv_size>0:
|
||||
cnv=pd.read_table(cnv_file,sep="\t")
|
||||
cols=[index for index,row in cnv[cnv['可信']==0].iterrows()]
|
||||
cnv.drop(cols,inplace=True)
|
||||
genes=cnv['gene'].drop_duplicates()
|
||||
if len(genes):
|
||||
for gene in genes:
|
||||
rt={}
|
||||
rt[gene]=[]
|
||||
cnvs=cnv['Gene_Symbol'][cnv['gene']==gene].drop_duplicates()
|
||||
for mut in cnvs:
|
||||
info={}
|
||||
info['gene']=mut
|
||||
copy=cnv['cn'][cnv['Gene_Symbol']==mut].reset_index(drop=True)[0]
|
||||
if copy > 2:
|
||||
info['p']='扩增'
|
||||
else:
|
||||
info['p']='缺失'
|
||||
A=[]
|
||||
B=[]
|
||||
C=[]
|
||||
D=[]
|
||||
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='A'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
A.extend(ds_new)
|
||||
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence='C'
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
B.extend(ds_new)
|
||||
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||
ds=row['药物中文名'].split(",")
|
||||
evidence=row['证据等级']
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
C.extend(ds_new)
|
||||
for index,row in cnv[(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||
evidence=''
|
||||
if row['标签']=='非适应症':
|
||||
evidence='C'
|
||||
else:
|
||||
evidence=row['证据等级']
|
||||
ds=row['药物中文名'].split(",")
|
||||
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||
D.extend(ds_new)
|
||||
A=sorted(set(A),key=A.index)
|
||||
B=sorted(set(B),key=B.index)
|
||||
C=sorted(set(C),key=C.index)
|
||||
D=sorted(set(D),key=D.index)
|
||||
info['drug_A']="\n".join(A)
|
||||
info['drug_B']="\n".join(B)
|
||||
info['drug_C']="\n".join(C)
|
||||
info['drug_D']="\n".join(D)
|
||||
info['freq']=" ".join([str(copy),'拷贝'])
|
||||
# info['drug_A']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_B']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_C']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||
# info['drug_D']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
||||
if not info['drug_A']:
|
||||
info['drug_A']='/'
|
||||
if not info['drug_B']:
|
||||
info['drug_B']='/'
|
||||
if not info['drug_C']:
|
||||
info['drug_C']='/'
|
||||
if not info['drug_D']:
|
||||
info['drug_D']='/'
|
||||
context['list1'].append(info)
|
||||
info2={mut:[]}
|
||||
for index,row in cnv[cnv['Gene_Symbol']==mut].iterrows():
|
||||
info3={}
|
||||
info3['drug']=row['药物中文名']
|
||||
info3['effect']=row['Response_Type_C']
|
||||
info3['tumor']=row['疾病中文名']
|
||||
info3['evidence']=row['Evidence_Source_C']
|
||||
info3['sig']=row['EfficacyEvidence']
|
||||
info2[mut].append(info3)
|
||||
##sensitive_resistant_drug
|
||||
drugs=row['Drug'].replace(" + ",",")
|
||||
drugs=drugs.split(",")
|
||||
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||
drugs_chinese=drugs_chinese.split(",")
|
||||
bool=0
|
||||
for drug in drugs:
|
||||
if drug.upper() in drug_disease.keys():
|
||||
if re.search(r'敏感',row['Response_Type_C']):
|
||||
sensitive_drug={}
|
||||
sensitive_drug['name']=drugs_chinese[bool]
|
||||
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||
elif re.search(r'耐药',row['Response_Type_C']):
|
||||
resistant_drug={}
|
||||
resistant_drug['name']=drugs_chinese[bool]
|
||||
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||
bool+=1
|
||||
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
||||
context['list2'][" ".join([mut,info['p']])]=info2[mut]
|
||||
##genefunc
|
||||
context['genefunc'][" ".join([mut,info['p']])]=genefunction[mut.upper()]
|
||||
##clingenes
|
||||
clingene1={}
|
||||
clingene2={}
|
||||
for index,row in cnv[(cnv['Gene_Symbol']==mut)].iterrows():
|
||||
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||
clingene1['gene']=info['gene']
|
||||
clingene1['freq']= info['freq']
|
||||
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
||||
clingene1['muttype']= info['p']
|
||||
context['clingene1'].append(clingene1)
|
||||
else:
|
||||
clingene2['gene']=info['gene']
|
||||
clingene2['freq']= info['freq']
|
||||
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
||||
clingene2['muttype']= info['p']
|
||||
context['clingene2'].append(clingene2)
|
||||
context['list3'][gene]="\n".join(rt[gene])
|
||||
else:
|
||||
cnv_size=0
|
||||
|
||||
|
||||
##msi
|
||||
if Sample_type == 't':
|
||||
msi_file=''.join([output_dir,'/MSI/',name,'.msi'])
|
||||
msi=(open(msi_file,'r').readlines()[1]).split("\t")
|
||||
context['msi_count']=msi[0]
|
||||
context['msi_value']=round(float(msi[2].strip())/100,2)
|
||||
if context['msi_value']>=0.3:
|
||||
context['msi_result']='MSI-H'
|
||||
context['msi_predict']='对免疫检查点抑制剂可能敏感'
|
||||
else:
|
||||
context['msi_result']='MSS'
|
||||
context['msi_predict']='对免疫检查点抑制剂可能不敏感'
|
||||
|
||||
|
||||
##MMR处理
|
||||
mmr_file=''.join([output_dir,'/MMR/',name,"_mmr.txt"])
|
||||
mmr_size = os.path.getsize(mmr_file)
|
||||
mmr_result=0
|
||||
mmr_result_summary=[]
|
||||
if mmr_size>0:
|
||||
mmr_fh=open(mmr_file,'r',encoding='utf-8').readlines()
|
||||
for line in mmr_fh[1:]:
|
||||
mmr={}
|
||||
mmr['gene']= line.strip().split("\t")[0]
|
||||
mmr['transcript'] = line.strip().split("\t")[1]
|
||||
mmr['nacid'] = line.strip().split("\t")[2]
|
||||
mmr['aacid'] = line.strip().split("\t")[3]
|
||||
mmr['muttype'] = line.strip().split("\t")[5]
|
||||
mmr['freq'] = line.strip().split("\t")[4]
|
||||
mmr['sig'] = line.strip().split("\t")[6]
|
||||
if mmr['sig'] != '意义未明突变':
|
||||
mmr_result+=1
|
||||
mmr_result_summary.append(mmr['gene'] + ' ' + mmr['aacid'])
|
||||
context['mmr'].append(mmr)
|
||||
|
||||
context['mmr_result']=mmr_result
|
||||
mmr_result_summary=' | '.join(mmr_result_summary)
|
||||
if mmr_result_summary:
|
||||
context['mmr_result_summary']=mrr_result_summary
|
||||
context['mmr_predict']="对免疫检查点抑制剂可能敏感"
|
||||
else:
|
||||
context['mmr_result_summary']="未检测到相关基因突变"
|
||||
context['mmr_predict']="对免疫检查点抑制剂可能不敏感"
|
||||
|
||||
|
||||
|
||||
##chemo
|
||||
chemo_file=''.join([output_dir,'/chemo/',name,".drug.res.txt"])
|
||||
chemo_fh=open(chemo_file,'r')
|
||||
bool=0
|
||||
chemos=chemo_fh.readlines()[1:]
|
||||
chemo_result=0
|
||||
recommend_drug=[]
|
||||
normal_drug=[]
|
||||
restrict_drug=[]
|
||||
while bool<len(chemos):
|
||||
chemo={}
|
||||
lines=chemos[bool].strip().split("\t")
|
||||
chemo['bool1']=bool+1
|
||||
chemo['name1']=lines[0]
|
||||
chemo['result1']=lines[4]
|
||||
if lines[3] == '推荐':
|
||||
# chemo['result1']=RichText('推荐使用', color='00FF00')
|
||||
chemo_result+=1
|
||||
recommend_drug.append(chemo['name1'])
|
||||
elif lines[3] == '常规':
|
||||
# chemo['result1']=RichText('常规使用')
|
||||
normal_drug.append(chemo['name1'])
|
||||
elif lines[3] == '谨慎':
|
||||
# chemo['result1']=RichText('谨慎使用', color='FF0000')
|
||||
restrict_drug.append(chemo['name1'])
|
||||
if bool+1<len(chemos):
|
||||
lines=chemos[bool+1].strip().split("\t")
|
||||
chemo['bool2']=bool+2
|
||||
chemo['name2']=lines[0]
|
||||
chemo['result2']=lines[4]
|
||||
if lines[3] == '推荐':
|
||||
# chemo['result2']=RichText('推荐使用', color='00FF00')
|
||||
chemo_result+=1
|
||||
recommend_drug.append(chemo['name2'])
|
||||
elif lines[3] == '常规':
|
||||
# chemo['result2']=RichText('常规使用')
|
||||
normal_drug.append(chemo['name2'])
|
||||
elif lines[3] == '谨慎':
|
||||
# chemo['result2']=RichText('谨慎使用', color='FF0000')
|
||||
restrict_drug.append(chemo['name2'])
|
||||
# print(chemo)
|
||||
context['chemo'].append(chemo)
|
||||
bool+=2
|
||||
context['chemo_result']=chemo_result
|
||||
chemo_detail=[]
|
||||
chemo_detail_file=''.join([output_dir,'/chemo/',name,".drug.infos.txt"])
|
||||
chemo_data=pd.read_table(chemo_detail_file,sep="\t")
|
||||
chemo_drugs=chemo_data['药物'].drop_duplicates()
|
||||
context['recommend_drug']=','.join(recommend_drug)
|
||||
context['normal_drug']=','.join(normal_drug)
|
||||
context['restrict_drug']=','.join(restrict_drug)
|
||||
|
||||
for drug in chemo_drugs:
|
||||
c={}
|
||||
c['drug']=drug
|
||||
c['info']=[]
|
||||
for index,row in chemo_data[chemo_data['药物'] == drug].iterrows():
|
||||
d={}
|
||||
d['gene']=row['检测基因']
|
||||
d['site']=row['检测位点']
|
||||
d['gt']=row['基因型']
|
||||
d['level']=row['证据等级']
|
||||
d['sig']=row['用药提示']
|
||||
c['info'].append(d)
|
||||
chemo_detail.append(c)
|
||||
|
||||
context['chemo_detail']=chemo_detail
|
||||
context['sensitive_resistant_drug']=sensitive_resistant_drug
|
||||
|
||||
##化疗联合用药
|
||||
chemo_comb_file=''.join([output_dir,'/chemo/',name,".chemo.comb.txt"])
|
||||
chemo=pd.read_table(chemo_comb_file,sep="\t")
|
||||
type=chemo['癌种'].drop_duplicates()
|
||||
|
||||
bool=-1
|
||||
chemo_comb=[]
|
||||
for i in type:
|
||||
bool+=1
|
||||
chemo_comb.append({'type':i,'drug':[]})
|
||||
for index,row in chemo[chemo['癌种']==i].iterrows():
|
||||
info={}
|
||||
info['name']=row['用药方案']
|
||||
info['abbr']=row['方案缩写']
|
||||
info['sig']=row['临床提示']
|
||||
chemo_comb[bool]['drug'].append(info)
|
||||
context['chemo_comb']=chemo_comb
|
||||
|
||||
##hereditary cancer
|
||||
if os.path.exists(''.join([output_dir,'/hereditary/',name,'.hereditary.pre.txt'])):
|
||||
context['hereditary_cancer_1']=[]
|
||||
context['hereditary_cancer_2']=[]
|
||||
hereditary_file1=''.join([output_dir,'/hereditary/',name,".hereditary.txt"])
|
||||
hereditary_file2=''.join([output_dir,'/hereditary/',name,".risk.txt"])
|
||||
hereditary_file1_fh=open(hereditary_file1,'r')
|
||||
hereditary_file2_fh=open(hereditary_file2,'r')
|
||||
|
||||
hereditary_result=0
|
||||
hereditary_result_summary=[]
|
||||
hereditary_disease=[]
|
||||
if os.path.getsize(hereditary_file1)>0:
|
||||
hereditary_file1_fh=open(hereditary_file1,'r')
|
||||
for line in hereditary_file1_fh.readlines()[1:]:
|
||||
hereditary_cancer_1={}
|
||||
lines=line.strip().split("\t")
|
||||
hereditary_cancer_1['gene']=lines[0]
|
||||
hereditary_cancer_1['syndrome']=lines[1]
|
||||
hereditary_cancer_1['hereditary_type']=lines[2]
|
||||
hereditary_cancer_1['type']=lines[3]
|
||||
hereditary_cancer_1['result']=lines[4]
|
||||
hereditary_result+=len(lines[4].split(";"))
|
||||
hereditary_result_summary.append(hereditary_cancer_1['gene'] + ' ' + hereditary_cancer_1['result'])
|
||||
hereditary_disease.append(hereditary_cancer_1['syndrome'])
|
||||
context['hereditary_cancer_1'].append(hereditary_cancer_1)
|
||||
|
||||
context['hereditary_result']=hereditary_result
|
||||
if hereditary_result_summary:
|
||||
context['hereditary_disease']=';'.join(hereditary_disease)
|
||||
context['hereditary_result_summary']=' | '.join(hereditary_result_summary)
|
||||
else:
|
||||
context['hereditary_disease']='/'
|
||||
context['hereditary_result_summary']='未检测到相关基因突变'
|
||||
|
||||
hereditary_risk=[]
|
||||
bool=0
|
||||
heres=hereditary_file2_fh.readlines()[1:]
|
||||
while bool<len(heres):
|
||||
hereditary_cancer_2={}
|
||||
lines=(heres[bool]).strip().split("\t")
|
||||
hereditary_cancer_2['type1']=lines[0]
|
||||
if lines[1] == '偏高':
|
||||
hereditary_risk.append(lines[0])
|
||||
hereditary_cancer_2['risk1']=RichText('偏高', color='FF0000')
|
||||
elif lines[1] == '同一般人群':
|
||||
hereditary_cancer_2['risk1']=RichText('同一般人群')
|
||||
if bool+1<len(heres):
|
||||
lines=heres[bool+1].strip().split("\t")
|
||||
hereditary_cancer_2['type2']=lines[0]
|
||||
if lines[1] == '偏高':
|
||||
hereditary_cancer_2['risk2']=RichText('偏高', color='FF0000')
|
||||
elif lines[1] == '同一般人群':
|
||||
hereditary_cancer_2['risk2']=RichText('同一般人群')
|
||||
context['hereditary_cancer_2'].append(hereditary_cancer_2)
|
||||
bool+=2
|
||||
|
||||
if hereditary_risk:
|
||||
context['hereditary_risk']=','.join(hereditary_risk) + '风险可能较高'
|
||||
else:
|
||||
context['hereditary_risk']='风险同一般人群'
|
||||
if len(context['hereditary_cancer_1'])==0:
|
||||
context['hereditary_cancer_1']=[{'gene':'/','syndrome':'/','hereditary_type':'/','type':'/','result':'/'}]
|
||||
|
||||
##可能获益的临床药物处理
|
||||
total_drug_count=[]
|
||||
for i in context['list1']:
|
||||
if i['drug_A'] != '/':
|
||||
for j in i['drug_A'].split("\n"):
|
||||
if j not in total_drug_count:
|
||||
total_drug_count.append(j)
|
||||
if i['drug_B'] != '/':
|
||||
for j in i['drug_B'].split("\n"):
|
||||
if j not in total_drug_count:
|
||||
total_drug_count.append(j)
|
||||
if i['drug_C'] != '/':
|
||||
for j in i['drug_C'].split("\n"):
|
||||
if j not in total_drug_count:
|
||||
total_drug_count.append(j)
|
||||
|
||||
context['total_drug_count']=len(total_drug_count)
|
||||
|
||||
##去重处理
|
||||
context['clingenes1']=[]
|
||||
context['clingenes2']=[]
|
||||
for i in context['clingene1']:
|
||||
if i not in context['clingenes1']:
|
||||
context['clingenes1'].append(i)
|
||||
|
||||
for i in context['clingene2']:
|
||||
if i not in context['clingenes2'] and i not in context['clingenes1']:
|
||||
context['clingenes2'].append(i)
|
||||
|
||||
##检测到的基因变异
|
||||
context['total_mut_count']=len(context['clingenes1'])+len(context['clingenes2'])
|
||||
|
||||
|
||||
##本癌种FDA/NMPA/NCCN批准基因检测
|
||||
indication_fh=open(indication_file,'r',encoding='utf-8').readlines()
|
||||
indication_genes=[]
|
||||
for line in indication_fh[1:]:
|
||||
indication={}
|
||||
indication['gene']=line.strip().split("\t")[0]
|
||||
indication['content']=line.strip().split("\t")[1]
|
||||
if indication['gene'] in context['list3'].keys():
|
||||
indication['result']=RichText(context['list3'][indication['gene']], color='FF0000')
|
||||
else:
|
||||
indication['result']='未检出变异'
|
||||
context['indication'].append(indication)
|
||||
|
||||
##qc处理
|
||||
qc_file=''.join([output_dir,'/qc/',name,'_qc.txt'])
|
||||
qc=pd.read_table(qc_file,sep="\t",header=None,index_col=0,names=['A','B'])
|
||||
|
||||
Q30=qc.loc['Q30(%)','B']
|
||||
if Q30>=85:
|
||||
Q30_result='合格'
|
||||
else:
|
||||
Q30_result='警戒'
|
||||
|
||||
depth=qc.loc['mean_depth(dedup)','B']
|
||||
if Sample_type == 'c':
|
||||
if depth>=1000:
|
||||
depth_result='合格'
|
||||
else:
|
||||
depth_result='警戒'
|
||||
elif Sample_type == 't':
|
||||
if depth>=500:
|
||||
depth_result='合格'
|
||||
else:
|
||||
depth_result='警戒'
|
||||
|
||||
uniformity=qc.loc['coverage(>=0.2*meanx)','B']
|
||||
if uniformity>=90:
|
||||
uniformity_result='合格'
|
||||
else:
|
||||
uniformity_result='警戒'
|
||||
|
||||
if Q30_result=='合格' and depth_result=='合格' and uniformity_result=='合格':
|
||||
context['qc_result']='合格'
|
||||
else:
|
||||
context['qc_result']='警戒'
|
||||
|
||||
context['Q30']=Q30
|
||||
context['Q30_result']=Q30_result
|
||||
context['depth']=depth
|
||||
context['uniformity']=uniformity
|
||||
context['depth_result']=depth_result
|
||||
context['uniformity_result']=uniformity_result
|
||||
|
||||
|
||||
|
||||
##阴性模块处理
|
||||
##list1
|
||||
if len(context['list1'])==0:
|
||||
context['list1']=[{'gene':'/','freq':'/','drug_A':'/','drug_B':'/','drug_C':'/','drug_D':'/'}]
|
||||
|
||||
##list2
|
||||
if len(context['list2'])==0:
|
||||
context['list2']= {'/':[{'drug':'/','effect':'/','tumor':'/','evidence':'/','sig':'/'}]}
|
||||
context['genefunc']['/']='/'
|
||||
|
||||
|
||||
##clingenes,nonclingenes
|
||||
if len(context['clingenes1'])==0:
|
||||
context['clingenes1']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||
if len(context['clingenes2'])==0:
|
||||
context['clingenes2']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||
if len(context['nonclingenes'])==0:
|
||||
context['nonclingenes']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||
|
||||
##sensitive_resistant_drug
|
||||
if len(context['sensitive_resistant_drug'][0]['drug'])==0:
|
||||
context['sensitive_resistant_drug'][0]['drug']=[{'name':'/','mechanism':'/'}]
|
||||
if len(context['sensitive_resistant_drug'][1]['drug'])==0:
|
||||
context['sensitive_resistant_drug'][1]['drug']=[{'name':'/','mechanism':'/'}]
|
||||
|
||||
##mmr
|
||||
if len(context['mmr'])==0:
|
||||
context['mmr']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','muttype':'/','freq':'/','sig':'/'}]
|
||||
|
||||
##info
|
||||
post_file=''.join([output_dir,'/qc/',name,'_post.json'])
|
||||
sex='/'
|
||||
age='/'
|
||||
phone='/'
|
||||
medical_history='/'
|
||||
family_history='/'
|
||||
sample_id=name
|
||||
sample_type='/'
|
||||
report_date=time.strftime("%Y-%m-%d", time.localtime())
|
||||
arrival_date=report_date
|
||||
cancer_type='/'
|
||||
pathologic_diagnosis='/'
|
||||
|
||||
if os.path.isfile(post_file):
|
||||
# if post_file.exists():
|
||||
post_fh=open(post_file,'r')
|
||||
post=json.load(post_fh)
|
||||
name=post["data"][0]["name"]
|
||||
sex=post["data"][0]["gender"]
|
||||
age=post["data"][0]["age"]
|
||||
medical_history=post["data"][0]["treatHistory"]
|
||||
family_history=post["data"][0]["sickFamilyHistory"]
|
||||
sample_id=post["data"][0]["barcode"]
|
||||
# sample_id_control=post["data"][0]["barcode_N"]
|
||||
sample_type=post["data"][0]["source"]
|
||||
# sample_type_control=post["data"][0]["source_N"]
|
||||
# sample_type_control = post["data"][0].get('source_N', '/')
|
||||
arrival_date=post["data"][0]["receiveTime"].split(' ')[0]
|
||||
cancer_type=post["data"][0]["zlType"]
|
||||
pathologic_diagnosis=post["data"][0]["treatResult"]
|
||||
|
||||
context['info']={
|
||||
'name':name,
|
||||
'sex':sex,
|
||||
'age':age,
|
||||
'phone':phone,
|
||||
'medical_history':medical_history,
|
||||
'family_history':family_history,
|
||||
'sample_id':sample_id,
|
||||
'sample_type':sample_type,
|
||||
'report_date':report_date,
|
||||
'arrival_date':arrival_date,
|
||||
'cancer_type':cancer_type,
|
||||
'pathologic_diagnosis':pathologic_diagnosis}
|
||||
context['report_time']=report_date
|
||||
|
||||
##模板替换
|
||||
file_real = os.path.realpath(sys.argv[0])
|
||||
Exe_Path = os.path.dirname(file_real)
|
||||
|
||||
report_template={'lung85gene':{'t':'lung85-tissue-oem.docx','c':'lung85-blood-oem.docx'},
|
||||
'crc88gene':{'t':'CRC88-tissue-oem.docx','c':'CRC88-blood-oem.docx'}}
|
||||
doc_full = os.path.join(Exe_Path, report_template[projcet][Sample_type])
|
||||
|
||||
doc = DocxTemplate(doc_full)
|
||||
|
||||
doc.render(context)
|
||||
doc.save(report_file)
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
import json
|
||||
import os
|
||||
import socket
|
||||
import struct
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from main import main
|
||||
from tools.common import basedir
|
||||
|
||||
|
||||
def recvdata(conn, path):
|
||||
"""
|
||||
接受文件
|
||||
:param conn:
|
||||
:param path:
|
||||
:return:
|
||||
"""
|
||||
header_size = struct.unpack('i', conn.recv(4))[0]
|
||||
header_bytes = conn.recv(header_size)
|
||||
header_json = header_bytes.decode('utf-8')
|
||||
header_dic = json.loads(header_json)
|
||||
content_len = header_dic['contentlen']
|
||||
content_name = header_dic['contentname']
|
||||
recv_len = 0
|
||||
fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
|
||||
file = open(fielpath, 'wb')
|
||||
while recv_len < content_len:
|
||||
correntrecv = conn.recv(1024 * 1000)
|
||||
file.write(correntrecv)
|
||||
recv_len += len(correntrecv)
|
||||
file.close()
|
||||
return fielpath
|
||||
|
||||
|
||||
def senddata(conn, path, message=None):
|
||||
name = os.path.basename(os.path.realpath(path))
|
||||
if not message:
|
||||
with open(path, 'rb') as file:
|
||||
content = file.read()
|
||||
headerdic = dict(
|
||||
contentlen=len(content),
|
||||
contentname=name
|
||||
)
|
||||
headerjson = json.dumps(headerdic)
|
||||
headerbytes = headerjson.encode('utf-8')
|
||||
headersize = len(headerbytes)
|
||||
conn.send(struct.pack('i', headersize))
|
||||
conn.send(headerbytes)
|
||||
conn.sendall(content)
|
||||
else:
|
||||
headerdic = dict(
|
||||
contentlen=len(path),
|
||||
contentname='message'
|
||||
)
|
||||
headerjson = json.dumps(headerdic)
|
||||
headerbytes = headerjson.encode('utf-8')
|
||||
headersize = len(headerbytes)
|
||||
conn.send(struct.pack('i', headersize))
|
||||
conn.send(headerbytes)
|
||||
conn.sendall(path.encode('utf-8'))
|
||||
|
||||
|
||||
def server():
|
||||
myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
adrss = ("", 8190)
|
||||
myserver.bind(adrss)
|
||||
myserver.listen(5)
|
||||
while True:
|
||||
try:
|
||||
myclient, adddr = myserver.accept()
|
||||
recv_content = recvdata(myclient, os.path.join(basedir, 'xlsx'))
|
||||
outputpath = main(recv_content)
|
||||
senddata(myclient, outputpath)
|
||||
print('生成成功')
|
||||
except Exception as e:
|
||||
print(e, '有错误')
|
||||
# continue
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
outputpath = main(sys.argv[1])
|
||||
else:
|
||||
server()
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
import os
|
||||
|
||||
basedir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
|
|
@ -0,0 +1,457 @@
|
|||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from tools.readxlsx import read
|
||||
|
||||
|
||||
def tree():
|
||||
return defaultdict(tree)
|
||||
|
||||
|
||||
class BaseAssignment:
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.result = tree() # 报告结果
|
||||
self.signtb = set() # 具有明确或潜在临床意义的基因变异
|
||||
self.signdurg = set() # 潜在临床获益的治疗药物
|
||||
|
||||
self.drugs_type = dict()
|
||||
|
||||
|
||||
class Parse(BaseAssignment):
|
||||
|
||||
def __init__(self, sampledata, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.sampledata = sampledata
|
||||
|
||||
def cms(self):
|
||||
"""
|
||||
样本信息处理
|
||||
"""
|
||||
data = pd.DataFrame(self.sampledata['sample_info'])
|
||||
if data.empty:
|
||||
raise UserWarning('sample_info表为空,生成报告失败!')
|
||||
data = data.applymap(
|
||||
lambda x: str(x).replace('.', '/').replace('-', '/').replace('——', '/') if str(x) in ['.', '-', '——'] else x)
|
||||
data_dict = data.to_dict('index')[0]
|
||||
data_dict['receiveTime'] = re.split(' ', data_dict['receiveTime'])[0]
|
||||
data_dict['reportTime'] = time.strftime("%Y-%m-%d", time.localtime())
|
||||
self.result['c'] = data_dict
|
||||
|
||||
def target(self):
|
||||
data = pd.DataFrame(self.sampledata['snvindel'])
|
||||
res = []
|
||||
if data.empty:
|
||||
self.result['snvindel'] = res
|
||||
return
|
||||
|
||||
data = data[data['Validated'] == 1].reset_index()
|
||||
data['muttype'] = '/'
|
||||
data.loc[data['ExonicFunc.refGene'].str.match('nonsynonymous SNV'), 'muttype'] = '错义突变'
|
||||
data.loc[data['ExonicFunc.refGene'].str.match('^frameshift'), 'muttype'] = '移码突变'
|
||||
data.loc[data['ExonicFunc.refGene'].str.match('^nonframeshift'), 'muttype'] = '非移码突变'
|
||||
data.loc[data['ExonicFunc.refGene'].str.match('stopgain'), 'muttype'] = '提前终止'
|
||||
|
||||
# 拆分hgvs
|
||||
data[['gene', 'transcript', 'exon', 'nacid', 'aacid']] = data['AAChange.refGene'].str.split(':', expand=True)
|
||||
# 没有氨基酸改变用核苷酸改变代替
|
||||
data['aacid'] = data['aacid'].fillna(data['nacid'])
|
||||
|
||||
for alter, alter_data in data.groupby('AAChange.refGene'):
|
||||
alter_data_need = alter_data[['gene', 'transcript', 'exon', 'nacid', 'aacid', 'mutant_frequency',
|
||||
'AMP_mut_level', 'muttype', 'Gene_function']]
|
||||
alter_res = alter_data_need.iloc[0].to_dict()
|
||||
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||
drug_content = alter_data[
|
||||
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||
alter_res['alter'] = alter
|
||||
res.append(alter_res)
|
||||
|
||||
# 汇总
|
||||
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||
self.signtb.add(alter)
|
||||
|
||||
self.result['snvindel'] = res
|
||||
|
||||
def fusion(self):
|
||||
data = pd.DataFrame(self.sampledata['fusion'])
|
||||
res = []
|
||||
if data.empty:
|
||||
self.result['fusion'] = res
|
||||
return
|
||||
data = data[data['Validated'] == 1].reset_index()
|
||||
for alter, alter_data in data.groupby('FUSION'):
|
||||
alter_data_need = alter_data[['FUSION', 'FREQ1', 'AMP_mut_level', 'Gene_function']]
|
||||
alter_res = alter_data_need.iloc[0].to_dict()
|
||||
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||
drug_content = alter_data[
|
||||
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||
alter_res['alter'] = '%s 融合' % (alter_res['FUSION'].replace('-', ':'))
|
||||
res.append(alter_res)
|
||||
|
||||
# 汇总
|
||||
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||
self.signtb.add(alter)
|
||||
|
||||
self.result['fusion'] = res
|
||||
|
||||
def cnv(self):
|
||||
data = pd.DataFrame(self.sampledata['cnv'])
|
||||
res = []
|
||||
if data.empty:
|
||||
self.result['cnv'] = res
|
||||
return
|
||||
data = data[data['Validated'] == 1].reset_index()
|
||||
for alter, alter_data in data.groupby('Gene_Symbol'):
|
||||
alter_data_need = alter_data[['Gene_Symbol', 'Copy_number', 'AMP_mut_level', 'Gene_function']].reset_index()
|
||||
alter_data_need['muttype'] = '缺失'
|
||||
alter_data_need.loc[alter_data_need['Copy_number'] > 2, 'muttype'] = '扩增'
|
||||
alter_res = alter_data_need.iloc[0].to_dict()
|
||||
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||
drug_content = alter_data[
|
||||
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||
alter_res['alter'] = '%s %s' % (alter, alter_res['muttype'])
|
||||
res.append(alter_res)
|
||||
|
||||
# 汇总
|
||||
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||
self.signtb.add(alter)
|
||||
self.result['cnv'] = res
|
||||
|
||||
def hotspot(self):
|
||||
self._to_records('hotspot')
|
||||
|
||||
def met(self):
|
||||
self._to_records('MET')
|
||||
|
||||
def longindel(self):
|
||||
self._to_records('longindel')
|
||||
|
||||
def mmr(self):
|
||||
data = pd.DataFrame(self.sampledata['MMR'])
|
||||
result_summary = '未检测到相关基因突变'
|
||||
predict = '对免疫检查点抑制剂可能不敏感'
|
||||
mmr_num = 0
|
||||
res = []
|
||||
|
||||
if not data.empty:
|
||||
tmdf = data[['gene', 'p_change']].reset_index()
|
||||
tmdf['result_summary'] = tmdf.apply(lambda x: '%s %s' % (x['gene'], x['p_change']), axis=1)
|
||||
result_summary = ' | '.join(tmdf['result_summary'].to_list())
|
||||
predict = '对免疫检查点抑制剂可能敏感'
|
||||
mmr_num = len(data.index)
|
||||
res = data.to_dict('records')
|
||||
|
||||
self.result['MMR'] = res
|
||||
self.result['sum']['mmr'] = dict(
|
||||
result_summary=result_summary,
|
||||
predict=predict,
|
||||
mmr_num=mmr_num
|
||||
)
|
||||
|
||||
def msi(self):
|
||||
self._to_dicts('MSI')
|
||||
|
||||
# def chemo(self):
|
||||
# data = pd.DataFrame(self.sampledata['chemo'])
|
||||
#
|
||||
# project = data['project'].to_list()[0]
|
||||
#
|
||||
# # 分类汇总 同位点,药物合并 drug.infos.txt
|
||||
# drugrsid = data[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']]
|
||||
# drugrsid = drugrsid.drop_duplicates()
|
||||
# resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
|
||||
# ','.join).reset_index()
|
||||
# resdrugrsid.rename(columns=
|
||||
# {'drugname': '药物', 'genename': '检测基因', 'rsid': '检测位点', 'result': '基因型',
|
||||
# 'level': '证据等级', 'tips': '用药提示'},
|
||||
# inplace=True)
|
||||
# resdrugrsid = resdrugrsid.sort_values(by=['drugsort', '药物', '检测基因'])
|
||||
# self.result['chemo']['druginfo'] = resdrugrsid.to_dict('records')
|
||||
#
|
||||
# # 药物 药物疗效 推荐程度合并 drug.res.txt
|
||||
# drugtypesum = data[['drugname', 'drugtype', 'rsid', 'weights']]
|
||||
# drugtypesum = drugtypesum.drop_duplicates()
|
||||
# drugtyperes = list()
|
||||
# drugsum = dict()
|
||||
# for drug, drugdata in drugtypesum.groupby('drugname'):
|
||||
# tipsnum = drugdata.groupby(['drugtype']).agg({'weights': 'sum'}).to_dict('index')
|
||||
# sumlist = list()
|
||||
# if 'LX' in tipsnum:
|
||||
# LX = tipsnum['LX']['weights']
|
||||
# if LX > 0:
|
||||
# lxdes = '疗效较好'
|
||||
# lxnum = 1
|
||||
# elif LX == 0:
|
||||
# lxdes = '疗效一般'
|
||||
# lxnum = 0
|
||||
# else:
|
||||
# lxdes = '疗效较差'
|
||||
# lxnum = -1
|
||||
# sumlist.append(lxdes)
|
||||
# else:
|
||||
# LX = 0
|
||||
# lxnum = 0
|
||||
# if 'DF' in tipsnum:
|
||||
# DF = tipsnum['DF']['weights']
|
||||
# if DF > 0:
|
||||
# dfdes = '毒副较低'
|
||||
# dfnum = 1
|
||||
# elif DF == 0:
|
||||
# dfdes = '毒副一般'
|
||||
# dfnum = 0
|
||||
# else:
|
||||
# dfdes = '毒副较高'
|
||||
# dfnum = -1
|
||||
# sumlist.append(dfdes)
|
||||
# else:
|
||||
# DF = 0
|
||||
# dfnum = 0
|
||||
#
|
||||
# # 评价方式 疗效 1 0 -1, 毒副 1 0 -1 ,可形成9宫格
|
||||
# sumnum = lxnum + dfnum
|
||||
# if sumnum > 0:
|
||||
# sumdes = '推荐'
|
||||
# elif sumnum == 0:
|
||||
# sumdes = '常规'
|
||||
# else:
|
||||
# sumdes = '谨慎'
|
||||
#
|
||||
# # 特别药物处理
|
||||
# if (drug == "氟尿嘧啶" or drug == "卡培他滨") and DF < 0:
|
||||
# sumdes = '谨慎'
|
||||
#
|
||||
# drugtyperes.append(dict(
|
||||
# 药物名称=drug,
|
||||
# 疗效=LX,
|
||||
# 毒副=DF,
|
||||
# 推荐程度=sumdes,
|
||||
# 疗效和毒副总结=','.join(sumlist)
|
||||
# ))
|
||||
# drugsum[drug] = sumdes
|
||||
#
|
||||
# # 报告中展示药物有顺序
|
||||
# drugsort = data[['drugname', 'drugsort']].drop_duplicates()
|
||||
# drugsort_dict = drugsort.set_index('drugname')['drugsort'].to_dict()
|
||||
# drugtyperes_sort = sorted(drugtyperes, key=lambda x: (
|
||||
# drugsort_dict[x['药物名称']] if x['药物名称'] in drugsort_dict else 100, x['药物名称']))
|
||||
#
|
||||
# drugtyperes_sort_df = pd.DataFrame(drugtyperes_sort)
|
||||
# self.result['chemo']['sum'] = drugtyperes_sort_df.groupby('推荐程度')['药物名称'].apply(','.join).to_dict()
|
||||
# self.result['chemo']['drugres'] = drugtyperes_sort_df.to_dict('records')
|
||||
#
|
||||
# # 联合用药
|
||||
# drug_combine_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'database',
|
||||
# 'chemo_drug_combine.csv')
|
||||
# drug_combine = pd.read_csv(drug_combine_path, sep='\t')
|
||||
# drug_combine.fillna('.', inplace=True)
|
||||
# drug_combine_data = drug_combine[drug_combine['source'].str.contains(project)]
|
||||
# drug_combine_data = drug_combine_data.reset_index()
|
||||
# if not drug_combine_data.empty:
|
||||
# drug_combine_data['临床提示'] = drug_combine_data['用药方案'].apply(self._get_drug_plan, args=(drugsum,))
|
||||
# self.result['chemo']['combine'] = drug_combine_data.groupby('癌种').apply(
|
||||
# lambda group: group.set_index('癌种').to_dict('records')).to_dict()
|
||||
# else:
|
||||
# self.result['chemo']['combine'] = dict()
|
||||
# self.result['sum']['chemo_drug_num'] = len(drugsum.keys())
|
||||
|
||||
def chemo(self):
|
||||
chemo_res = self._to_records('chemo_res', need=True)
|
||||
chemo_res_df = pd.DataFrame(chemo_res)
|
||||
chemo_res_df.index = chemo_res_df.index + 1
|
||||
chemo_res_df = chemo_res_df.reset_index()
|
||||
self.result['chemo']['chemo_res'] = chemo_res_df.to_dict('records')
|
||||
self.result['sum']['chemo']['drug_num'] = len(chemo_res)
|
||||
self.result['sum']['chemo']['drug_category'] = pd.DataFrame(chemo_res).groupby('推荐程度')['药物名称'].apply(
|
||||
','.join).to_dict()
|
||||
|
||||
chemo_comb = self._to_records('chemo_comb', need=True)
|
||||
chemo_comb_res = dict()
|
||||
if chemo_comb:
|
||||
chemo_comb_res = pd.DataFrame(chemo_comb).groupby('癌种').apply(
|
||||
lambda group: group.set_index('癌种').to_dict('records')).to_dict()
|
||||
self.result['chemo']['chemo_comb'] = chemo_comb_res
|
||||
|
||||
chemo_info = self._to_records('chemo_info', need=True)
|
||||
chemo_info_res = dict()
|
||||
if chemo_info:
|
||||
chemo_info_res = pd.DataFrame(chemo_info).groupby('药物').apply(
|
||||
lambda group: group.set_index('药物').to_dict('records')).to_dict()
|
||||
self.result['chemo']['chemo_info'] = chemo_info_res
|
||||
|
||||
def hcs(self):
|
||||
self._to_records('HCS')
|
||||
self.result['sum']['hcs']['num'] = len(self.result['HCS'])
|
||||
|
||||
def heredity(self):
|
||||
"""
|
||||
遗传的结果文件
|
||||
:return:
|
||||
"""
|
||||
hereditary = pd.DataFrame(self.sampledata['hereditary'])
|
||||
result = '/'
|
||||
disease = '/'
|
||||
risk = '/'
|
||||
if not hereditary.empty:
|
||||
result = '|'.join(hereditary.apply(lambda x: '%s %s' % (x['基因'], x['检测结果']), axis=1).to_list())
|
||||
disease = '|'.join(hereditary['遗传性肿瘤综合征'].to_list())
|
||||
|
||||
hereditary_risk = pd.DataFrame(self.sampledata['hereditary_risk'])
|
||||
if not hereditary_risk.empty:
|
||||
risk = ','.join(hereditary_risk[hereditary_risk['风险值'] == '偏高']['肿瘤类型'].to_list())
|
||||
|
||||
self.result['hereditary'] = hereditary.to_dict('records')
|
||||
self.result['sum']['hereditary']['result'] = result
|
||||
self.result['sum']['hereditary']['disease'] = disease
|
||||
self.result['sum']['hereditary']['risk'] = risk
|
||||
|
||||
def qc(self):
|
||||
# self._to_dicts('qc')
|
||||
data = pd.DataFrame(self.sampledata['qc'])
|
||||
res = {}
|
||||
if not data.empty:
|
||||
data.rename(columns={
|
||||
'Q30(%)': 'q30',
|
||||
'mean_depth(dedup)': 'depth',
|
||||
'coverage(>=0.2*meanx)': 'coverage'
|
||||
}, inplace=True)
|
||||
res = data.to_dict('index')[0]
|
||||
self.result['qc'] = res
|
||||
|
||||
def drugs(self):
|
||||
data = pd.DataFrame(self.sampledata['drugs'])
|
||||
res = {}
|
||||
if not data.empty:
|
||||
data = data.dropna()
|
||||
data = data[data['drug_detail'] != '.']
|
||||
res = data.set_index('drug_name')['drug_detail'].to_dict()
|
||||
self.result['drugs']['drugs_detail'] = res
|
||||
|
||||
def indication(self):
|
||||
self._to_records('indication')
|
||||
|
||||
def _to_records(self, sheetname, need=False):
|
||||
"""
|
||||
for many lines
|
||||
:param sheetname:
|
||||
:return:
|
||||
"""
|
||||
data = pd.DataFrame(self.sampledata[sheetname])
|
||||
res = []
|
||||
if data.empty:
|
||||
self.result[sheetname] = res
|
||||
return
|
||||
res = data.to_dict('records')
|
||||
if need:
|
||||
return res
|
||||
self.result[sheetname] = res
|
||||
|
||||
def _to_dicts(self, sheetname):
|
||||
"""
|
||||
for single line
|
||||
:param sheetname:
|
||||
:return:
|
||||
"""
|
||||
data = pd.DataFrame(self.sampledata[sheetname])
|
||||
res = {}
|
||||
if data.empty:
|
||||
self.result[sheetname] = res
|
||||
return
|
||||
res = data.to_dict('index')[0]
|
||||
self.result[sheetname] = res
|
||||
|
||||
def _drug_category(self, groupdata):
|
||||
drug_category_res = dict()
|
||||
for drug_category, drug_category_alter_data in groupdata.groupby('Drug_Category'):
|
||||
if drug_category == '.':
|
||||
continue
|
||||
# 敏感,可能敏感药物统计
|
||||
if drug_category in ['a', 'b', 'c']:
|
||||
self.signdurg.update(set(drug_category_alter_data['DrugCn'].str.split(',').explode().tolist()))
|
||||
drug_category_alter_data['drugdes'] = drug_category_alter_data.apply(
|
||||
lambda x: '%s 【%s 级】' % (x['DrugCn'], x['AMP_evidence_level']), axis=1)
|
||||
drug_category_res[drug_category] = '\n'.join(drug_category_alter_data['drugdes'].to_list())
|
||||
|
||||
# 所有药物信息
|
||||
groupdata['list_col'] = groupdata['DrugCn'].str.replace(' + ', '+').str.split(r'[+,]')
|
||||
exploded_df = groupdata.explode('list_col').reset_index()
|
||||
exploded_df = exploded_df[(exploded_df['list_col'] != '.') & (exploded_df['list_col'] != '')]
|
||||
exploded_dict = exploded_df.groupby('Response_Type')['list_col'].agg(lambda x: list(set(x))).to_dict()
|
||||
|
||||
for drug_type in exploded_dict:
|
||||
if drug_type in self.drugs_type:
|
||||
self.drugs_type[drug_type].extend(exploded_dict[drug_type])
|
||||
else:
|
||||
self.drugs_type[drug_type] = exploded_dict[drug_type]
|
||||
|
||||
# for drugall in exploded_df['Drug_Detail'].to_list():
|
||||
# for drug in drugall.split('|'):
|
||||
# match = re.search(r'\[\[(.*?)]](.*?)$', drug)
|
||||
# if match:
|
||||
# self.drugs_record['drugs'].update({match.group(1).strip(): match.group(2).strip()})
|
||||
return drug_category_res
|
||||
|
||||
@staticmethod
|
||||
def _get_drug_plan(x, drugsum):
|
||||
tlist = x.split('+')
|
||||
tdeslist = list()
|
||||
for tdes in tlist:
|
||||
if tdes.strip() in drugsum:
|
||||
t1_des = drugsum[tdes.strip()]
|
||||
tdeslist.append(t1_des)
|
||||
|
||||
if '慎用' in tdeslist or '谨慎' in tdeslist:
|
||||
return '慎用'
|
||||
elif '推荐' in tdeslist:
|
||||
return '推荐'
|
||||
elif '常规' in tdeslist:
|
||||
return '可选'
|
||||
else:
|
||||
return '可选'
|
||||
|
||||
def collect(self):
|
||||
self.cms()
|
||||
self.target()
|
||||
self.fusion()
|
||||
self.cnv()
|
||||
self.hotspot()
|
||||
self.met()
|
||||
self.longindel()
|
||||
self.mmr()
|
||||
self.msi()
|
||||
self.chemo()
|
||||
self.hcs()
|
||||
self.heredity()
|
||||
self.qc()
|
||||
self.indication()
|
||||
self.drugs()
|
||||
|
||||
# 汇总
|
||||
self.result['sum']['signtb_num'] = len(self.signtb)
|
||||
self.result['sum']['signdrug_num'] = len(self.signdurg)
|
||||
self.result['drugs']['drugs_type'] = {key: self.drugs_type[key] for key in sorted(self.drugs_type.keys())}
|
||||
return self.result
|
||||
|
||||
|
||||
def run(path):
|
||||
parse = Parse(read(path))
|
||||
res = parse.collect()
|
||||
resjson = json.dumps(res, indent=4, ensure_ascii=False)
|
||||
with open('t.json', 'w') as f:
|
||||
f.write(resjson)
|
||||
return resjson
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run(sys.argv[1])
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
import pandas as pd
|
||||
import logging
|
||||
import json
|
||||
import sys
|
||||
|
||||
logger = logging.getLogger('main.sub')
|
||||
|
||||
|
||||
def read(merge):
|
||||
df = pd.read_excel(merge, None)
|
||||
samplelist = df['sample_info']['sampleSn'].to_list()
|
||||
if not samplelist:
|
||||
logger.error('sample_info表为空!读取excel信息失败!')
|
||||
raise UserWarning('sample_info表为空!读取excel信息失败!')
|
||||
samdict = dict()
|
||||
for name, contents in df.items():
|
||||
if contents.empty:
|
||||
samdict[name] = []
|
||||
continue
|
||||
contents.fillna('.', inplace=True)
|
||||
samdict[name] = contents.to_dict('list')
|
||||
return samdict
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
res = read(sys.argv[1])
|
||||
print(res)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue