初始化
commit
260d86d3f1
|
|
@ -0,0 +1,215 @@
|
||||||
|
# ---> Python
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# ---> Perl
|
||||||
|
!Build/
|
||||||
|
.last_cover_stats
|
||||||
|
/META.yml
|
||||||
|
/META.json
|
||||||
|
/MYMETA.*
|
||||||
|
*.o
|
||||||
|
*.pm.tdy
|
||||||
|
*.bs
|
||||||
|
|
||||||
|
# Devel::Cover
|
||||||
|
cover_db/
|
||||||
|
|
||||||
|
# Devel::NYTProf
|
||||||
|
nytprof.out
|
||||||
|
|
||||||
|
# Dist::Zilla
|
||||||
|
/.build/
|
||||||
|
|
||||||
|
# Module::Build
|
||||||
|
_build/
|
||||||
|
Build
|
||||||
|
Build.bat
|
||||||
|
|
||||||
|
# Module::Install
|
||||||
|
inc/
|
||||||
|
|
||||||
|
# ExtUtils::MakeMaker
|
||||||
|
/blib/
|
||||||
|
/_eumm/
|
||||||
|
/*.gz
|
||||||
|
/Makefile
|
||||||
|
/Makefile.old
|
||||||
|
/MANIFEST.bak
|
||||||
|
/pm_to_blib
|
||||||
|
/*.zip
|
||||||
|
|
||||||
|
# ---> Perl6
|
||||||
|
# Gitignore for Perl 6 (http://www.perl6.org)
|
||||||
|
# As part of https://github.com/github/gitignore
|
||||||
|
|
||||||
|
# precompiled files
|
||||||
|
.precomp
|
||||||
|
lib/.precomp
|
||||||
|
|
||||||
|
nohup.out
|
||||||
|
|
||||||
|
log/*
|
||||||
|
!log/readme.md
|
||||||
|
example/*
|
||||||
|
!example/readme.md
|
||||||
|
|
||||||
|
/.report/
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
癌种 用药方案 方案缩写 source
|
||||||
|
非小细胞肺癌 顺铂+紫杉醇 TP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+紫杉醇 TP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+紫杉醇脂质体 LP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+紫杉醇脂质体 LP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+白蛋白紫杉醇 nab-TP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+白蛋白紫杉醇 nab-TP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+多西他赛 DP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+多西他赛 DP lung85gene
|
||||||
|
非小细胞肺癌 奈达铂+多西他赛 DP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+吉西他滨 GP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+吉西他滨 GP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+培美曲塞 PP lung85gene
|
||||||
|
非小细胞肺癌 卡铂+培美曲塞 PP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+长春瑞滨 NP lung85gene
|
||||||
|
非小细胞肺癌 顺铂+依托泊苷 EP lung85gene
|
||||||
|
小细胞肺癌 顺铂+依托泊苷 EP lung85gene
|
||||||
|
小细胞肺癌 卡铂+依托泊苷 EC lung85gene
|
||||||
|
小细胞肺癌 洛铂+依托泊苷 EL lung85gene
|
||||||
|
小细胞肺癌 顺铂+伊立替康 IP lung85gene
|
||||||
|
小细胞肺癌 卡铂+伊立替康 IC lung85gene
|
||||||
|
结直肠癌 奥沙利铂+亚叶酸钙+氟尿嘧啶 FOLFOX crc88gene
|
||||||
|
结直肠癌 伊立替康+亚叶酸钙+氟尿嘧啶 FOLFIRI crc88gene
|
||||||
|
结直肠癌 奥沙利铂+卡培他滨 CAPEOX(又称Xelox) crc88gene
|
||||||
|
结直肠癌 伊立替康+奥沙利铂+亚叶酸钙+氟尿嘧啶 FOLFOXIRI crc88gene
|
||||||
|
结直肠癌 伊立替康+卡培他滨 CapIRI或XELIRI crc88gene
|
||||||
|
结直肠癌 奥沙利铂+雷替曲塞 / crc88gene
|
||||||
|
结直肠癌 伊立替康+雷替曲塞 / crc88gene
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from docxtpl import DocxTemplate
|
||||||
|
|
||||||
|
from tools.parsexlsx import run
|
||||||
|
|
||||||
|
|
||||||
|
def main(path):
|
||||||
|
resjson = run(path)
|
||||||
|
res = json.loads(resjson)
|
||||||
|
barcode = res['c']['barcode']
|
||||||
|
tplpath = os.path.join(os.path.dirname(__file__), 'template', 'nreport.docx')
|
||||||
|
tpl = DocxTemplate(tplpath)
|
||||||
|
tpl.render(res)
|
||||||
|
path = os.path.join(os.path.dirname(__file__), 'result', f'{barcode}.docx')
|
||||||
|
tpl.save(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1])
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,486 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
import numpy as np
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import glob
|
||||||
|
import openpyxl
|
||||||
|
from openpyxl import Workbook, load_workbook
|
||||||
|
from openpyxl.drawing.image import Image
|
||||||
|
|
||||||
|
if len(sys.argv) != 3:
|
||||||
|
print(" ".join(['usage:python', sys.argv[0], 'output_dir', 'name']))
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def snv_fusion_cnv(output_dir, name):
|
||||||
|
out_xlsx = "".join([output_dir, '/report/', name, '.check_new.xlsx'])
|
||||||
|
|
||||||
|
# genefunction
|
||||||
|
genefunction = {}
|
||||||
|
gf = open("/dataseq/jmdna/codes/reportbase/gene_function.txt", 'r', encoding='utf-8').readlines()
|
||||||
|
for line in gf[1:]:
|
||||||
|
gene = line.strip().split("\t")[0]
|
||||||
|
func = line.strip().split("\t")[1]
|
||||||
|
genefunction[gene.upper()] = func
|
||||||
|
genefunction['.'] = '.'
|
||||||
|
|
||||||
|
##drug_mechanism
|
||||||
|
drug_mechanism = {}
|
||||||
|
drug_fh = open("/dataseq/jmdna/codes/reportbase/target_drug.txt", 'r', encoding='utf-8').readlines()
|
||||||
|
for line in drug_fh[1:]:
|
||||||
|
disease = line.split("\t")[8]
|
||||||
|
mechanism = line.split("\t")[11]
|
||||||
|
drugs = line.split("\t")[0].split('|') + line.split("\t")[1].split('|')
|
||||||
|
if disease or mechanism:
|
||||||
|
for drug in drugs:
|
||||||
|
drug_mechanism[drug.upper()] = "\\\\".join([disease, mechanism]).strip()
|
||||||
|
|
||||||
|
'''
|
||||||
|
snvindel_sheet
|
||||||
|
'''
|
||||||
|
|
||||||
|
##input
|
||||||
|
filter_file = "".join([output_dir, '/report/', name, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'])
|
||||||
|
pos_file = "".join([output_dir, '/mutation/', name, '.snvindel.pos.dedup.txt'])
|
||||||
|
vus_file = "".join([output_dir, '/mutation/', name, '.snvindel.vus.txt'])
|
||||||
|
neg_file = "".join([output_dir, '/mutation/', name, '.snvindel.neg.txt'])
|
||||||
|
##filter_file
|
||||||
|
if os.path.getsize(filter_file) > 0:
|
||||||
|
snv = pd.read_table(filter_file, sep="\t")
|
||||||
|
cols = [index for index, row in snv[snv['可信'] == 0].iterrows()]
|
||||||
|
snv.drop(cols, inplace=True)
|
||||||
|
snv.insert(loc=24, column='ACMG_level', value=0)
|
||||||
|
snv.insert(loc=25, column='Deleterious', value=0)
|
||||||
|
snv.insert(loc=26, column='freq_high', value=0)
|
||||||
|
for index, row in snv.iterrows():
|
||||||
|
if re.search("Likely_pathogenic|drug", (row['CLNSIG']), re.I):
|
||||||
|
snv.loc[index, 'ACMG_level'] = '2'
|
||||||
|
elif re.search("pathogenic", (row['CLNSIG']), re.I) and not re.search("Conflicting", (row['CLNSIG']), re.I):
|
||||||
|
snv.loc[index, 'ACMG_level'] = '1'
|
||||||
|
else:
|
||||||
|
snv.loc[index, 'ACMG_level'] = '3'
|
||||||
|
snv.loc[index, "Deleterious"] = (
|
||||||
|
snv.loc[index, ['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']].tolist().count("D"))
|
||||||
|
snv.loc[index, "freq_high"] = ((snv.loc[
|
||||||
|
index, ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL',
|
||||||
|
'ExAC_nontcga_EAS', 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']]).replace('.', '0')).max()
|
||||||
|
snv_1 = snv.iloc[:, list(range(14)) + [15, 17, 18, 20, 23, 24, 25, 26, 111, 112, 113]]
|
||||||
|
else:
|
||||||
|
snv_1 = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
##pos_file
|
||||||
|
if os.path.getsize(pos_file) > 0:
|
||||||
|
pos = pd.read_table(pos_file, sep="\t")
|
||||||
|
pos = pos.iloc[:, [7, 10, 18, 23, 24, 25, 29, 30, 31, 32]]
|
||||||
|
pos_1 = pd.DataFrame(
|
||||||
|
columns=['AAChange.refGene', 'OKBSIG', 'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug',
|
||||||
|
'Response_Type', 'Evidence_Source', 'EfficacyEvidence', 'Drug_Detail', 'Gene_function',
|
||||||
|
'Drug_Category'])
|
||||||
|
pos = list(pos.groupby(['Gene.refGene', 'AAChange.refGene', 'fun_change']))
|
||||||
|
for i in pos:
|
||||||
|
for index, row in i[1].iterrows():
|
||||||
|
drugs = row['药物中文名'].replace(" + ", ",")
|
||||||
|
drugs = list(set(drugs.split(",")))
|
||||||
|
drug_mm = ''
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_mechanism.keys():
|
||||||
|
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||||
|
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||||
|
if row['标签'] == '非适应症':
|
||||||
|
row['证据等级'] = 'C'
|
||||||
|
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||||
|
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||||
|
elif re.search("耐药", row['Response_Type_C']):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||||
|
else:
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||||
|
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||||
|
pos_1.loc[len(pos_1)] = [i[0][1], i[0][2], '|'.join(list(i[1]['证据等级'])),
|
||||||
|
'|'.join(list(i[1]['AMP_mut_level'])), '|'.join(list(i[1]['疾病中文名'])),
|
||||||
|
'|'.join(list(i[1]['药物中文名'])), \
|
||||||
|
'|'.join(list(i[1]['Response_Type_C'])), '|'.join(list(i[1]['Evidence_Source_C'])),
|
||||||
|
'|'.join(list(i[1]['EfficacyEvidence'])), '|'.join(list(i[1]['Drug_Detail'])),
|
||||||
|
genefunction[i[0][0].upper()], '|'.join(list(i[1]['Drug_Category']))]
|
||||||
|
else:
|
||||||
|
pos_1 = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
##vus_file
|
||||||
|
if os.path.getsize(vus_file) > 0:
|
||||||
|
vus = pd.read_table(vus_file, sep="\t")
|
||||||
|
vus_1 = vus.iloc[:, [9, 17]]
|
||||||
|
vus_1.insert(loc=2, column='AMP_mut_level', value='III')
|
||||||
|
vus_1 = vus_1.rename(columns={'fun_change': 'OKBSIG'})
|
||||||
|
else:
|
||||||
|
vus_1 = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
##neg_file
|
||||||
|
if os.path.getsize(neg_file) > 0:
|
||||||
|
neg = pd.read_table(neg_file, sep="\t")
|
||||||
|
neg_1 = neg.iloc[:, [9, 17]]
|
||||||
|
neg_1.insert(loc=2, column='AMP_mut_level', value='IIII')
|
||||||
|
neg_1 = neg_1.rename(columns={'fun_change': 'OKBSIG'})
|
||||||
|
else:
|
||||||
|
neg_1 = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
snvindel_sheet = pd.DataFrame(
|
||||||
|
columns=['可信', 'Chr', 'Start', 'End', 'Ref', 'Alt', 'AAChange.refGene', 'mutant_frequency', 'total_reads',
|
||||||
|
'mutant_reads', 'strand_bias', 'Otherinfo10', 'Func.refGene', 'Gene.refGene', 'ExonicFunc.refGene',
|
||||||
|
'avsnp150', 'cosmic91', 'CLNDN', 'CLNSIG', 'ACMG_level', 'Deleterious', 'freq_high', 'OKBSIG',
|
||||||
|
'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source',
|
||||||
|
'EfficacyEvidence', 'Drug_Detail', 'Gene_function', 'Drug_Category', 'Otherinfo11', 'Otherinfo12',
|
||||||
|
'Otherinfo13'])
|
||||||
|
pos_vus_neg = pd.concat([pos_1, vus_1, neg_1])
|
||||||
|
snv_pos_vus_neg = snv_1.merge(pos_vus_neg, how='left', on='AAChange.refGene')
|
||||||
|
snvindel_sheet = pd.concat([snvindel_sheet, snv_pos_vus_neg])
|
||||||
|
snvindel_sheet.rename(columns={"可信": "Validated"})
|
||||||
|
snvindel_sheet = snvindel_sheet.replace(np.nan, '.')
|
||||||
|
snvindel_sheet.rename(columns={"可信": "Validated"}, inplace=True)
|
||||||
|
|
||||||
|
'''
|
||||||
|
fusion_sheet
|
||||||
|
'''
|
||||||
|
|
||||||
|
fusion_pos_file = "".join([output_dir, '/fusion/', name, '.fusion.pos.dedup.txt'])
|
||||||
|
fusion_vus_file = "".join([output_dir, '/fusion/', name, '.fusion.vus.txt'])
|
||||||
|
if os.path.getsize(fusion_pos_file) > 0:
|
||||||
|
fusion_pos = pd.read_table(fusion_pos_file, sep="\t")
|
||||||
|
else:
|
||||||
|
fusion_pos = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
if os.path.getsize(fusion_vus_file) > 0:
|
||||||
|
fusion_vus = pd.read_table(fusion_vus_file, sep="\t")
|
||||||
|
fusion_vus.insert(loc=0, column='可信', value=1)
|
||||||
|
else:
|
||||||
|
fusion_vus = pd.DataFrame(columns=[])
|
||||||
|
fusion_pos_vus = pd.concat([fusion_pos, fusion_vus])
|
||||||
|
fusion_sheet = pd.DataFrame(
|
||||||
|
columns=['Validated', 'CHROM1', 'POS1', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'Support_reads(PE:SR)',
|
||||||
|
'Depth', 'FREQ1', 'FREQ2', 'OKBSIG', 'AMP_evidence_level', \
|
||||||
|
'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence',
|
||||||
|
'Drug_Detail', 'Gene_function', 'Drug_Category', 'INFO', 'FORMAT', 'Sample'])
|
||||||
|
|
||||||
|
if not fusion_pos_vus.empty:
|
||||||
|
fusion_pos_vus = fusion_pos_vus.replace(np.nan, '.')
|
||||||
|
fusion = list(fusion_pos_vus.groupby(
|
||||||
|
['可信', '#CHROM', 'POS', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'FREQ1', 'FREQ2', 'fun_change',
|
||||||
|
'INFO', 'FORMAT', name, 'Gene_Symbol']))
|
||||||
|
for i in fusion:
|
||||||
|
for index, row in i[1].iterrows():
|
||||||
|
drugs = row['药物中文名'].replace(" + ", ",")
|
||||||
|
drugs = list(set(drugs.split(",")))
|
||||||
|
drug_mm = ''
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_mechanism.keys():
|
||||||
|
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||||
|
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||||
|
if row['标签'] == '非适应症':
|
||||||
|
row['证据等级'] = 'C'
|
||||||
|
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||||
|
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||||
|
elif re.search("耐药", row['Response_Type_C']):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||||
|
elif row['Response_Type_C'] == '.':
|
||||||
|
i[1].loc[index, ['Drug_Category']] = '.'
|
||||||
|
else:
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||||
|
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||||
|
fusion_sheet.loc[len(fusion_sheet)] = list(i[0][0:8]) + [i[0][13].split(":")[1],
|
||||||
|
i[0][13].split(":")[7]] + list(i[0][8:11]) + [
|
||||||
|
'|'.join(list(i[1]['证据等级'])),
|
||||||
|
'|'.join(list(i[1]['AMP_mut_level'])), \
|
||||||
|
'|'.join(list(i[1]['疾病中文名'])), '|'.join(list(i[1]['药物中文名'])),
|
||||||
|
'|'.join(list(i[1]['Response_Type_C'])),
|
||||||
|
'|'.join(list(i[1]['Evidence_Source_C'])),
|
||||||
|
'|'.join(list(i[1]['EfficacyEvidence'])), \
|
||||||
|
'|'.join(list(i[1]['Drug_Detail'])),
|
||||||
|
genefunction[i[0][14].upper()],
|
||||||
|
'|'.join(list(i[1]['Drug_Category']))] + list(i[0][11:14])
|
||||||
|
fusion_sheet = fusion_sheet.replace(np.nan, '.')
|
||||||
|
|
||||||
|
'''
|
||||||
|
cnv_sheet
|
||||||
|
'''
|
||||||
|
|
||||||
|
cnv_pos_file = "/home/jm001/test/reference_standard/lung85gene/Tissue/BKDL202603539-1a/cnvkit/BKDL202603539-1a.cnv.pos.dedup.txt"
|
||||||
|
cnv_sheet = pd.DataFrame(
|
||||||
|
columns=['Validated', 'Chromosome', 'Start', 'End', 'Gene', 'Depth', 'Probes', 'Copy_number', 'OKBSIG',
|
||||||
|
'Gene_Symbol', 'AMP_evidence_level', 'AMP_mut_level', \
|
||||||
|
'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence', 'Drug_Detail',
|
||||||
|
'Gene_Function', 'Drug_Category'])
|
||||||
|
if os.path.getsize(cnv_pos_file) > 0:
|
||||||
|
cnv_pos = pd.read_table(cnv_pos_file, sep="\t")
|
||||||
|
cnv = list(cnv_pos.groupby(
|
||||||
|
['可信', 'chromosome', 'start', 'end', 'gene', 'depth', 'probes', 'cn', 'fun_change', 'Gene_Symbol']))
|
||||||
|
for i in cnv:
|
||||||
|
for index, row in i[1].iterrows():
|
||||||
|
drugs = row['药物中文名'].replace(" + ", ",")
|
||||||
|
drugs = list(set(drugs.split(",")))
|
||||||
|
drug_mm = ''
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_mechanism.keys():
|
||||||
|
drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
|
||||||
|
i[1].loc[index, ['Drug_Detail']] = drug_mm
|
||||||
|
if row['标签'] == '非适应症':
|
||||||
|
row['证据等级'] = 'C'
|
||||||
|
if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'a'
|
||||||
|
elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'b'
|
||||||
|
elif re.search("耐药", row['Response_Type_C']):
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'd'
|
||||||
|
elif row['Response_Type_C'] == '.':
|
||||||
|
i[1].loc[index, ['Drug_Category']] = '.'
|
||||||
|
else:
|
||||||
|
i[1].loc[index, ['Drug_Category']] = 'c'
|
||||||
|
i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||||
|
cnv_sheet.loc[len(cnv_sheet)] = list(i[0][0:10]) + ['|'.join(list(i[1]['证据等级'])),
|
||||||
|
'|'.join(list(i[1]['AMP_mut_level'])), \
|
||||||
|
'|'.join(list(i[1]['疾病中文名'])),
|
||||||
|
'|'.join(list(i[1]['药物中文名'])),
|
||||||
|
'|'.join(list(i[1]['Response_Type_C'])),
|
||||||
|
'|'.join(list(i[1]['Evidence_Source_C'])),
|
||||||
|
'|'.join(list(i[1]['EfficacyEvidence'])), \
|
||||||
|
'|'.join(list(i[1]['Drug_Detail'])),
|
||||||
|
genefunction[i[0][9].upper()],
|
||||||
|
'|'.join(list(i[1]['Drug_Category']))]
|
||||||
|
else:
|
||||||
|
cnv_pos = pd.DataFrame(columns=[])
|
||||||
|
|
||||||
|
with pd.ExcelWriter(out_xlsx) as writer:
|
||||||
|
snvindel_sheet.to_excel(writer, sheet_name="snvindel", index=False)
|
||||||
|
fusion_sheet.to_excel(writer, sheet_name="fusion", index=False)
|
||||||
|
cnv_sheet.to_excel(writer, sheet_name="cnv", index=False)
|
||||||
|
|
||||||
|
##加入cnvkit/*.cnv.png
|
||||||
|
wb = openpyxl.load_workbook(filename=out_xlsx)
|
||||||
|
ws = wb['cnv']
|
||||||
|
mr = ws.max_row
|
||||||
|
cell = 'C' + str(mr + 4)
|
||||||
|
cnv_pic = "".join([output_dir, '/cnvkit/', name, '.cnv.png'])
|
||||||
|
image = Image(cnv_pic)
|
||||||
|
ws.add_image(image, cell)
|
||||||
|
wb.save(out_xlsx)
|
||||||
|
|
||||||
|
|
||||||
|
class PostProcess:
|
||||||
|
"""
|
||||||
|
excel处理
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, path, outpath):
|
||||||
|
self.path = path
|
||||||
|
self.outpath = outpath
|
||||||
|
self.neeecol = self.need_col()
|
||||||
|
|
||||||
|
def need_col(self):
|
||||||
|
"""
|
||||||
|
读取所需列
|
||||||
|
"""
|
||||||
|
path = os.path.join(os.path.dirname(__file__), 'columns.csv')
|
||||||
|
cols = pd.read_csv(path)
|
||||||
|
cols = cols.fillna('')
|
||||||
|
cols_record = cols.to_dict('list')
|
||||||
|
for sheet in cols_record:
|
||||||
|
cols_record[sheet] = [x for x in cols_record[sheet] if x]
|
||||||
|
return cols_record
|
||||||
|
|
||||||
|
def msi(self):
|
||||||
|
"""
|
||||||
|
Process msi result files
|
||||||
|
"""
|
||||||
|
msi_files = glob.glob(os.path.join(self.path, 'MSI', '*.msi'))
|
||||||
|
msi_res = dict()
|
||||||
|
if msi_files:
|
||||||
|
df = pd.read_csv(msi_files[0], sep='\t')
|
||||||
|
res = df.to_dict('records')[0]
|
||||||
|
msi_res['msi_count'] = res['Total_Number_of_Sites']
|
||||||
|
msi_res['msi_value'] = res['%']
|
||||||
|
if msi_res['msi_value'] >= 0.3:
|
||||||
|
msi_res['msi_result'] = 'MSI-H'
|
||||||
|
msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感'
|
||||||
|
else:
|
||||||
|
msi_res['msi_result'] = 'MSS'
|
||||||
|
msi_res['msi_predict'] = '对免疫检查点抑制剂可能不敏感'
|
||||||
|
return [msi_res]
|
||||||
|
|
||||||
|
def chemo(self):
|
||||||
|
"""
|
||||||
|
化疗
|
||||||
|
"""
|
||||||
|
chemo_files = glob.glob(os.path.join(self.path, 'chemo', '*chemo.res.txt'))
|
||||||
|
chemo_res = []
|
||||||
|
if chemo_files:
|
||||||
|
df = pd.read_csv(chemo_files[0], sep='\t')
|
||||||
|
df = df.fillna('.')
|
||||||
|
chemo_res = df.to_dict('records')
|
||||||
|
return chemo_res
|
||||||
|
|
||||||
|
def heredity(self):
|
||||||
|
"""
|
||||||
|
遗传
|
||||||
|
"""
|
||||||
|
heredi_files = glob.glob(os.path.join(self.path, 'mutation', '*Germline*filtered.txt'))
|
||||||
|
heredires = []
|
||||||
|
if heredi_files:
|
||||||
|
df = pd.read_csv(heredi_files[0], sep='\t')
|
||||||
|
df = df.fillna('.')
|
||||||
|
tmdf1 = df[
|
||||||
|
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
|
||||||
|
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
|
||||||
|
df['freq_high'] = tmdf1.max(axis=1)
|
||||||
|
tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
|
||||||
|
df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
|
||||||
|
df_need = df[self.neeecol.get('HCS', [])]
|
||||||
|
try:
|
||||||
|
heredires = df_need.to_dict('records')
|
||||||
|
except KeyError as e:
|
||||||
|
raise UserWarning('表头设置和配置文件不对应', e)
|
||||||
|
return heredires
|
||||||
|
|
||||||
|
def MMR(self):
|
||||||
|
"""
|
||||||
|
MMR
|
||||||
|
"""
|
||||||
|
mmr_files = glob.glob(os.path.join(self.path, 'MMR', '*mmr.pre.txt'))
|
||||||
|
mmr = []
|
||||||
|
if mmr_files:
|
||||||
|
df = pd.read_csv(mmr_files[0], sep='\t')
|
||||||
|
df = df.fillna('.')
|
||||||
|
tmdf1 = df[
|
||||||
|
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
|
||||||
|
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
|
||||||
|
df['freq_high'] = tmdf1.max(axis=1)
|
||||||
|
tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
|
||||||
|
df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
|
||||||
|
df_need = df[self.neeecol.get('HCS', [])]
|
||||||
|
try:
|
||||||
|
mmr = df_need.to_dict('records')
|
||||||
|
except KeyError as e:
|
||||||
|
raise UserWarning('表头设置和配置文件不对应', e)
|
||||||
|
return mmr
|
||||||
|
|
||||||
|
def hotspot(self):
|
||||||
|
hotspot_files = glob.glob(
|
||||||
|
os.path.join(self.path, 'mutation', 'hotspot', '*hotspot.snp.indel.filter.anno.hg19_multianno.txt'))
|
||||||
|
if hotspot_files:
|
||||||
|
return self.txt_2_excel(hotspot_files[0])
|
||||||
|
|
||||||
|
def splicing(self):
|
||||||
|
splicing_files = glob.glob(
|
||||||
|
os.path.join(self.path, 'mutation', '*.target.splicing.txt'))
|
||||||
|
if splicing_files:
|
||||||
|
return self.txt_2_excel(splicing_files[0])
|
||||||
|
|
||||||
|
def indication(self):
|
||||||
|
indication_files = glob.glob(
|
||||||
|
os.path.join(self.path, 'mutation', '*indication.txt'))
|
||||||
|
if indication_files:
|
||||||
|
return self.txt_2_excel(indication_files[0])
|
||||||
|
|
||||||
|
def longindel(self):
|
||||||
|
longindel_files = glob.glob(
|
||||||
|
os.path.join(self.path, 'fusion', '*.longindel.pos.txt'))
|
||||||
|
if longindel_files:
|
||||||
|
return self.txt_2_excel(longindel_files[0])
|
||||||
|
|
||||||
|
def cms(self):
|
||||||
|
"""
|
||||||
|
样本信息
|
||||||
|
"""
|
||||||
|
cms_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
|
||||||
|
cms_info_need = []
|
||||||
|
if cms_files:
|
||||||
|
file_read = open(cms_files[0], 'r')
|
||||||
|
cms_info = json.load(file_read)['data']
|
||||||
|
file_read.close()
|
||||||
|
df = pd.DataFrame(cms_info)
|
||||||
|
df_need = df[self.neeecol.get('sample_info', [])]
|
||||||
|
try:
|
||||||
|
cms_info_need = df_need.to_dict('records')
|
||||||
|
except KeyError as e:
|
||||||
|
raise UserWarning('表头设置和配置文件不对应', e)
|
||||||
|
return cms_info_need
|
||||||
|
|
||||||
|
def qc(self):
|
||||||
|
qc_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
|
||||||
|
qc_res = []
|
||||||
|
if qc_files:
|
||||||
|
df = pd.read_csv(qc_files[0], sep='\t', header=None)
|
||||||
|
df = df.set_index(0).T
|
||||||
|
qc_res = df.to_dict('records')
|
||||||
|
return qc_res
|
||||||
|
|
||||||
|
#
|
||||||
|
# def snv(self):
|
||||||
|
# # filter file
|
||||||
|
# filter_files = glob.glob(os.path.join(self.path, 'report', '*snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'))
|
||||||
|
# if filter_files:
|
||||||
|
# snv = pd.read_csv(filter_files[0], sep="\t")
|
||||||
|
# def sign_drug_Category(x):
|
||||||
|
# if '敏感' in x['Response_Type_C'] and x['证据等级'] == 'A':
|
||||||
|
# return 'a'
|
||||||
|
# elif '敏感' in x['Response_Type_C'] and x['证据等级'] == 'C':
|
||||||
|
# return 'b'
|
||||||
|
# elif '耐药' in x['Response_Type_C']:
|
||||||
|
# return 'd'
|
||||||
|
# else:
|
||||||
|
# return 'c'
|
||||||
|
# # pos_file 处理
|
||||||
|
# pos_files = glob.glob(os.path.join(self.path, 'mutation', '*snvindel.pos.txt'))
|
||||||
|
# if pos_files:
|
||||||
|
# pos = pd.read_csv(pos_files[0], sep='\t')
|
||||||
|
# pos['证据等级'] = pos.apply(lambda x: 'C' if x['标签'] == '非适应症' else x['证据等级'], axis=1)
|
||||||
|
# pos['Drug_Category'] = pos.apply(sign_drug_Category, axis=1)
|
||||||
|
# pos['AMP_mut_level'] = pos['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
|
||||||
|
# agg_list = ['证据等级', 'AMP_mut_level', '疾病中文名', '药物中文名', '证据等级', 'Response_Type_C', 'Evidence_Source_C',
|
||||||
|
# 'EfficacyEvidence', 'Drug_Category']
|
||||||
|
# agg_dict = {column: ','.join for column in agg_list}
|
||||||
|
# pos_group =pos.groupby(['Gene.refGene','AAChange.refGene','fun_change']).agg(agg_dict, axis=1)
|
||||||
|
|
||||||
|
def txt_2_excel(self, path):
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(path, sep='\t')
|
||||||
|
except pd.errors.EmptyDataError:
|
||||||
|
return []
|
||||||
|
return df.to_dict('records')
|
||||||
|
|
||||||
|
def collect(self):
|
||||||
|
writer = pd.ExcelWriter(self.outpath, mode='a', engine='openpyxl')
|
||||||
|
sheet = {
|
||||||
|
'MSI': self.msi(),
|
||||||
|
'chemo': self.chemo(),
|
||||||
|
'HCS': self.heredity(),
|
||||||
|
'sample_info': self.cms(),
|
||||||
|
'MMR': self.MMR(),
|
||||||
|
'hotspot': self.hotspot(),
|
||||||
|
'MET': self.splicing(),
|
||||||
|
'indication': self.indication(),
|
||||||
|
'longindel': self.longindel(),
|
||||||
|
'qc': self.qc()
|
||||||
|
}
|
||||||
|
# 遍历CSV文件列表
|
||||||
|
for sheet_name in sheet:
|
||||||
|
# 读取CSV文件为DataFrame
|
||||||
|
df = pd.DataFrame(sheet[sheet_name])
|
||||||
|
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||||
|
# 保存并关闭Excel写入器
|
||||||
|
writer.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
snv_fusion_cnv(sys.argv[1], sys.argv[2])
|
||||||
|
# 未加日志,未添加路径
|
||||||
|
out_xlsx = "".join([sys.argv[1], '/report/', sys.argv[2], '.check_new.xlsx'])
|
||||||
|
postprocess = PostProcess(sys.argv[1], out_xlsx)
|
||||||
|
postprocess.collect()
|
||||||
|
|
@ -0,0 +1,864 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
|
||||||
|
##https://www.pianshen.com/article/5314917437/
|
||||||
|
##https://zhuanlan.zhihu.com/p/366902690
|
||||||
|
##https://itpcb.com/a/277599
|
||||||
|
import docxtpl
|
||||||
|
from docx.shared import Mm
|
||||||
|
from docxtpl import DocxTemplate,RichText
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
if len(sys.argv) != 5:
|
||||||
|
print(" ".join(['usage:python',sys.argv[0],'output_dir','tumor','sample_type(t for tissue,c for cfdna)']),'projcet')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
output_dir=sys.argv[1]
|
||||||
|
name=sys.argv[2]
|
||||||
|
Sample_type=sys.argv[3]
|
||||||
|
projcet=sys.argv[4]
|
||||||
|
snv_base="".join([name,'.snvindel.pos.dedup.txt'])
|
||||||
|
snv_file='/'.join([output_dir,'mutation',snv_base])
|
||||||
|
snv_base_vus="".join([name,'.snvindel.vus.txt'])
|
||||||
|
snv_file_vus='/'.join([output_dir,'mutation',snv_base_vus])
|
||||||
|
fusion_base="".join([name,'.fusion.pos.dedup.txt'])
|
||||||
|
fusion_file='/'.join([output_dir,'fusion',fusion_base])
|
||||||
|
cnv_base="".join([name,'.cnv.pos.dedup.txt'])
|
||||||
|
cnv_file='/'.join([output_dir,'cnvkit',cnv_base])
|
||||||
|
qc_base=''.join([name,'_qc.txt'])
|
||||||
|
qc_file='/'.join([output_dir,'qc',qc_base])
|
||||||
|
report_base="".join([name,'_report.docx'])
|
||||||
|
report_file='/'.join([output_dir,'report',report_base])
|
||||||
|
indication_file="".join([output_dir,'/mutation/','indication.txt'])
|
||||||
|
|
||||||
|
context = {'list1':[],'list2':{},'list3':{},'clingene1':[],'clingene2':[],'nonclingenes':[],'genefunc':{},
|
||||||
|
'indication':[],'mmr':[],'chemo':[]}
|
||||||
|
|
||||||
|
|
||||||
|
#genefunction
|
||||||
|
genefunction={}
|
||||||
|
gf=open("/dataseq/jmdna/codes/reportbase/gene_function.txt",'r',encoding='utf-8').readlines()
|
||||||
|
for line in gf[1:]:
|
||||||
|
gene=line.strip().split("\t")[0]
|
||||||
|
func=line.strip().split("\t")[1]
|
||||||
|
genefunction[gene.upper()]=func
|
||||||
|
|
||||||
|
##sensitive_resistant_drug
|
||||||
|
drug_disease={}
|
||||||
|
drug_mechanism={}
|
||||||
|
drug_fh=open("/dataseq/jmdna/codes/reportbase/target_drug.txt",'r',encoding='utf-8').readlines()
|
||||||
|
for line in drug_fh[1:]:
|
||||||
|
disease=line.split("\t")[8]
|
||||||
|
mechanism=line.split("\t")[11]
|
||||||
|
drugs=line.split("\t")[0].split('|')
|
||||||
|
if disease or mechanism:
|
||||||
|
for drug in drugs:
|
||||||
|
drug_disease[drug.upper()]=disease
|
||||||
|
drug_mechanism[drug.upper()]=mechanism
|
||||||
|
|
||||||
|
|
||||||
|
sensitive_resistant_drug=[{'type':'可能敏感药物','drug':[]},{'type':'可能耐药药物','drug':[]}]
|
||||||
|
|
||||||
|
##somatic snvindel处理
|
||||||
|
snv_size = os.path.getsize(snv_file)
|
||||||
|
if snv_size>0:
|
||||||
|
snv=pd.read_table(snv_file,sep="\t")
|
||||||
|
cols=[index for index,row in snv[snv['可信']==0].iterrows()]
|
||||||
|
snv.drop(cols,inplace=True)
|
||||||
|
genes=snv['Gene.refGene'].drop_duplicates()
|
||||||
|
if len(genes):
|
||||||
|
for gene in genes:
|
||||||
|
rt={}
|
||||||
|
rt[gene]=[]
|
||||||
|
muts=snv['AAChange.refGene'][snv['Gene.refGene']==gene].drop_duplicates()
|
||||||
|
for mut in muts:
|
||||||
|
info2={mut:[]}
|
||||||
|
for index,row in snv[snv['AAChange.refGene']==mut].iterrows():
|
||||||
|
info3={}
|
||||||
|
info3['drug']=row['药物中文名']
|
||||||
|
info3['effect']=row['Response_Type_C']
|
||||||
|
info3['tumor']=row['疾病中文名']
|
||||||
|
info3['evidence']=row['Evidence_Source_C']
|
||||||
|
info3['sig']=row['EfficacyEvidence']
|
||||||
|
info2[mut].append(info3)
|
||||||
|
##sensitive_resistant_drug
|
||||||
|
drugs=row['Drug'].replace(" + ",",")
|
||||||
|
drugs=drugs.split(",")
|
||||||
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||||
|
drugs_chinese=drugs_chinese.split(",")
|
||||||
|
bool=0
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_disease.keys():
|
||||||
|
if re.search(r'敏感',row['Response_Type_C']):
|
||||||
|
sensitive_drug={}
|
||||||
|
sensitive_drug['name']=drugs_chinese[bool]
|
||||||
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||||
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||||
|
elif re.search(r'耐药',row['Response_Type_C']):
|
||||||
|
resistant_drug={}
|
||||||
|
resistant_drug['name']=drugs_chinese[bool]
|
||||||
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||||
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||||
|
bool+=1
|
||||||
|
context['list2'][mut]=info2[mut]
|
||||||
|
info={}
|
||||||
|
info['gene']=snv['Gene.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
m=re.search(r'(p\..*)$',mut)
|
||||||
|
if m:
|
||||||
|
info['p']=m.group(1)
|
||||||
|
else:
|
||||||
|
m=re.search(r'(c\..*)$',mut)
|
||||||
|
info['p']=m.group(1)
|
||||||
|
info['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
A=[]
|
||||||
|
B=[]
|
||||||
|
C=[]
|
||||||
|
D=[]
|
||||||
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='A'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
A.extend(ds_new)
|
||||||
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='非适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='C'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
B.extend(ds_new)
|
||||||
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='.') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
C.extend(ds_new)
|
||||||
|
for index,row in snv[(snv['AAChange.refGene']==mut) & snv['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||||
|
evidence=''
|
||||||
|
if row['标签']=='非适应症':
|
||||||
|
evidence='C'
|
||||||
|
else:
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
D.extend(ds_new)
|
||||||
|
A=sorted(set(A),key=A.index)
|
||||||
|
B=sorted(set(B),key=B.index)
|
||||||
|
C=sorted(set(C),key=C.index)
|
||||||
|
D=sorted(set(D),key=D.index)
|
||||||
|
info['drug_A']="\n".join(A)
|
||||||
|
info['drug_B']="\n".join(B)
|
||||||
|
info['drug_C']="\n".join(C)
|
||||||
|
info['drug_D']="\n".join(D)
|
||||||
|
if not info['drug_A']:
|
||||||
|
info['drug_A']='/'
|
||||||
|
if not info['drug_B']:
|
||||||
|
info['drug_B']='/'
|
||||||
|
if not info['drug_C']:
|
||||||
|
info['drug_C']='/'
|
||||||
|
if not info['drug_D']:
|
||||||
|
info['drug_D']='/'
|
||||||
|
context['list1'].append(info)
|
||||||
|
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
||||||
|
##genefunc
|
||||||
|
context['genefunc'][mut]=genefunction[gene.upper()]
|
||||||
|
##clingenes
|
||||||
|
clingene1={}
|
||||||
|
clingene2={}
|
||||||
|
for index,row in snv[(snv['AAChange.refGene']==mut)].iterrows():
|
||||||
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||||
|
clingene1['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
||||||
|
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||||
|
else:
|
||||||
|
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||||
|
clingene1['aacid'] = '/'
|
||||||
|
clingene1['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
if re.match("nonsynonymous SNV",clingene1['muttype']):
|
||||||
|
clingene1['muttype']='错义突变'
|
||||||
|
elif re.search("^frameshift",clingene1['muttype']):
|
||||||
|
clingene1['muttype']='移码突变'
|
||||||
|
elif re.search("^nonframeshift",clingene1['muttype']):
|
||||||
|
clingene1['muttype']='非移码突变'
|
||||||
|
elif re.match("stopgain",clingene1['muttype']):
|
||||||
|
clingene1['muttype']='提前终止'
|
||||||
|
else:
|
||||||
|
clingene1['muttype']='/'
|
||||||
|
context['clingene1'].append(clingene1)
|
||||||
|
else:
|
||||||
|
clingene2['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
||||||
|
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||||
|
else:
|
||||||
|
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
||||||
|
clingene2['aacid'] = '/'
|
||||||
|
clingene2['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
||||||
|
if re.match("nonsynonymous SNV",clingene2['muttype']):
|
||||||
|
clingene2['muttype']='错义突变'
|
||||||
|
elif re.search("^frameshift",clingene2['muttype']):
|
||||||
|
clingene2['muttype']='移码突变'
|
||||||
|
elif re.search("^nonframeshift",clingene2['muttype']):
|
||||||
|
clingene2['muttype']='非移码突变'
|
||||||
|
elif re.match("stopgain",clingene2['muttype']):
|
||||||
|
clingene2['muttype']='提前终止'
|
||||||
|
else:
|
||||||
|
clingene2['muttype']='/'
|
||||||
|
context['clingene2'].append(clingene2)
|
||||||
|
break
|
||||||
|
context['list3'][gene]="\n".join(rt[gene])
|
||||||
|
else:
|
||||||
|
snv_size=0
|
||||||
|
|
||||||
|
|
||||||
|
##target vus and nontarget vus
|
||||||
|
|
||||||
|
snv_size_vus = os.path.getsize(snv_file_vus)
|
||||||
|
if snv_size_vus>0:
|
||||||
|
snv_vus=pd.read_table(snv_file_vus,sep="\t")
|
||||||
|
for index,row in snv_vus.iterrows():
|
||||||
|
if float(row['Freq'].replace('%',''))>=2:
|
||||||
|
nonclingene={}
|
||||||
|
if len(row['AAChange.refGene'].split(":")) == 5:
|
||||||
|
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'],nonclingene['aacid'])=row['AAChange.refGene'].split(":")
|
||||||
|
else:
|
||||||
|
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'])=row['AAChange.refGene'].split(":")
|
||||||
|
nonclingene['aacid'] = '/'
|
||||||
|
nonclingene['freq']=row['Freq']
|
||||||
|
nonclingene['muttype']=row['ExonicFunc.refGene']
|
||||||
|
if re.match("nonsynonymous SNV",nonclingene['muttype']):
|
||||||
|
nonclingene['muttype']='错义突变'
|
||||||
|
elif re.search("^frameshift",nonclingene['muttype']):
|
||||||
|
nonclingene['muttype']='移码突变'
|
||||||
|
elif re.search("^nonframeshift",nonclingene['muttype']):
|
||||||
|
nonclingene['muttype']='非移码突变'
|
||||||
|
elif re.match("stopgain",nonclingene['muttype']):
|
||||||
|
nonclingene['muttype']='提前终止'
|
||||||
|
else:
|
||||||
|
nonclingene['muttype']='/'
|
||||||
|
context['nonclingenes'].append(nonclingene)
|
||||||
|
|
||||||
|
|
||||||
|
##fusion处理
|
||||||
|
fusion_size = os.path.getsize(fusion_file)
|
||||||
|
if fusion_size>0:
|
||||||
|
fusion=pd.read_table(fusion_file,sep="\t")
|
||||||
|
cols=[index for index,row in fusion[fusion['可信']==0].iterrows()]
|
||||||
|
fusion.drop(cols,inplace=True)
|
||||||
|
genes=fusion['Gene_Symbol'].drop_duplicates()
|
||||||
|
if len(genes):
|
||||||
|
for gene in genes:
|
||||||
|
rt={}
|
||||||
|
rt[gene]=[]
|
||||||
|
fusions=fusion['FUSION'][fusion['Gene_Symbol']==gene].drop_duplicates()
|
||||||
|
for mut in fusions:
|
||||||
|
info2={mut:[]}
|
||||||
|
for index,row in fusion[fusion['FUSION']==mut].iterrows():
|
||||||
|
info3={}
|
||||||
|
info3['drug']=row['药物中文名']
|
||||||
|
info3['effect']=row['Response_Type_C']
|
||||||
|
info3['tumor']=row['疾病中文名']
|
||||||
|
info3['evidence']=row['Evidence_Source_C']
|
||||||
|
info3['sig']=row['EfficacyEvidence']
|
||||||
|
info2[mut].append(info3)
|
||||||
|
##sensitive_resistant_drug
|
||||||
|
drugs=row['Drug'].replace(" + ",",")
|
||||||
|
drugs=drugs.split(",")
|
||||||
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||||
|
drugs_chinese=drugs_chinese.split(",")
|
||||||
|
bool=0
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_disease.keys():
|
||||||
|
if re.search(r'敏感',row['Response_Type_C']):
|
||||||
|
sensitive_drug={}
|
||||||
|
sensitive_drug['name']=drugs_chinese[bool]
|
||||||
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||||
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||||
|
elif re.search(r'耐药',row['Response_Type_C']):
|
||||||
|
resistant_drug={}
|
||||||
|
resistant_drug['name']=drugs_chinese[bool]
|
||||||
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||||
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||||
|
bool+=1
|
||||||
|
context['list2'][mut]=info2[mut]
|
||||||
|
info={}
|
||||||
|
info['gene']=mut
|
||||||
|
info['p']='融合'
|
||||||
|
info['freq']="".join([str(fusion['FREQ1'][fusion['FUSION']==mut].reset_index(drop=True)[0]),'%'])
|
||||||
|
A=[]
|
||||||
|
B=[]
|
||||||
|
C=[]
|
||||||
|
D=[]
|
||||||
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='A'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
A.extend(ds_new)
|
||||||
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='C'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
B.extend(ds_new)
|
||||||
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
C.extend(ds_new)
|
||||||
|
for index,row in fusion[(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||||
|
evidence=''
|
||||||
|
if row['标签']=='非适应症':
|
||||||
|
evidence='C'
|
||||||
|
else:
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
D.extend(ds_new)
|
||||||
|
A=sorted(set(A),key=A.index)
|
||||||
|
B=sorted(set(B),key=B.index)
|
||||||
|
C=sorted(set(C),key=C.index)
|
||||||
|
D=sorted(set(D),key=D.index)
|
||||||
|
info['drug_A']="\n".join(A)
|
||||||
|
info['drug_B']="\n".join(B)
|
||||||
|
info['drug_C']="\n".join(C)
|
||||||
|
info['drug_D']="\n".join(D)
|
||||||
|
# info['drug_A']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_B']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_C']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_D']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
||||||
|
if not info['drug_A']:
|
||||||
|
info['drug_A']='/'
|
||||||
|
if not info['drug_B']:
|
||||||
|
info['drug_B']='/'
|
||||||
|
if not info['drug_C']:
|
||||||
|
info['drug_C']='/'
|
||||||
|
if not info['drug_D']:
|
||||||
|
info['drug_D']='/'
|
||||||
|
context['list1'].append(info)
|
||||||
|
rt[gene].append("".join([info['gene'],'(',info['freq'],')']))
|
||||||
|
##genefunc
|
||||||
|
context['genefunc'][mut]=genefunction[gene.upper()]
|
||||||
|
##clingenes
|
||||||
|
clingene1={}
|
||||||
|
clingene2={}
|
||||||
|
for index,row in fusion[(fusion['FUSION']==mut)].iterrows():
|
||||||
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||||
|
clingene1['gene']= info['gene']
|
||||||
|
clingene1['freq']= info['freq']
|
||||||
|
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
||||||
|
clingene1['muttype']= '融合'
|
||||||
|
context['clingene1'].append(clingene1)
|
||||||
|
else:
|
||||||
|
clingene2['gene']= info['gene']
|
||||||
|
clingene2['freq']= info['freq']
|
||||||
|
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
||||||
|
clingene2['muttype']= '融合'
|
||||||
|
context['clingene2'].append(clingene2)
|
||||||
|
context['list3'][gene]="\n".join(rt[gene])
|
||||||
|
else:
|
||||||
|
fusion_size=0
|
||||||
|
|
||||||
|
|
||||||
|
##cnv处理
|
||||||
|
cnv_size = os.path.getsize(cnv_file)
|
||||||
|
if cnv_size>0:
|
||||||
|
cnv=pd.read_table(cnv_file,sep="\t")
|
||||||
|
cols=[index for index,row in cnv[cnv['可信']==0].iterrows()]
|
||||||
|
cnv.drop(cols,inplace=True)
|
||||||
|
genes=cnv['gene'].drop_duplicates()
|
||||||
|
if len(genes):
|
||||||
|
for gene in genes:
|
||||||
|
rt={}
|
||||||
|
rt[gene]=[]
|
||||||
|
cnvs=cnv['Gene_Symbol'][cnv['gene']==gene].drop_duplicates()
|
||||||
|
for mut in cnvs:
|
||||||
|
info={}
|
||||||
|
info['gene']=mut
|
||||||
|
copy=cnv['cn'][cnv['Gene_Symbol']==mut].reset_index(drop=True)[0]
|
||||||
|
if copy > 2:
|
||||||
|
info['p']='扩增'
|
||||||
|
else:
|
||||||
|
info['p']='缺失'
|
||||||
|
A=[]
|
||||||
|
B=[]
|
||||||
|
C=[]
|
||||||
|
D=[]
|
||||||
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='A'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
A.extend(ds_new)
|
||||||
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence='C'
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
B.extend(ds_new)
|
||||||
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
C.extend(ds_new)
|
||||||
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].iterrows():
|
||||||
|
evidence=''
|
||||||
|
if row['标签']=='非适应症':
|
||||||
|
evidence='C'
|
||||||
|
else:
|
||||||
|
evidence=row['证据等级']
|
||||||
|
ds=row['药物中文名'].split(",")
|
||||||
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
||||||
|
D.extend(ds_new)
|
||||||
|
A=sorted(set(A),key=A.index)
|
||||||
|
B=sorted(set(B),key=B.index)
|
||||||
|
C=sorted(set(C),key=C.index)
|
||||||
|
D=sorted(set(D),key=D.index)
|
||||||
|
info['drug_A']="\n".join(A)
|
||||||
|
info['drug_B']="\n".join(B)
|
||||||
|
info['drug_C']="\n".join(C)
|
||||||
|
info['drug_D']="\n".join(D)
|
||||||
|
info['freq']=" ".join([str(copy),'拷贝'])
|
||||||
|
# info['drug_A']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_B']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_C']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
||||||
|
# info['drug_D']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
||||||
|
if not info['drug_A']:
|
||||||
|
info['drug_A']='/'
|
||||||
|
if not info['drug_B']:
|
||||||
|
info['drug_B']='/'
|
||||||
|
if not info['drug_C']:
|
||||||
|
info['drug_C']='/'
|
||||||
|
if not info['drug_D']:
|
||||||
|
info['drug_D']='/'
|
||||||
|
context['list1'].append(info)
|
||||||
|
info2={mut:[]}
|
||||||
|
for index,row in cnv[cnv['Gene_Symbol']==mut].iterrows():
|
||||||
|
info3={}
|
||||||
|
info3['drug']=row['药物中文名']
|
||||||
|
info3['effect']=row['Response_Type_C']
|
||||||
|
info3['tumor']=row['疾病中文名']
|
||||||
|
info3['evidence']=row['Evidence_Source_C']
|
||||||
|
info3['sig']=row['EfficacyEvidence']
|
||||||
|
info2[mut].append(info3)
|
||||||
|
##sensitive_resistant_drug
|
||||||
|
drugs=row['Drug'].replace(" + ",",")
|
||||||
|
drugs=drugs.split(",")
|
||||||
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
||||||
|
drugs_chinese=drugs_chinese.split(",")
|
||||||
|
bool=0
|
||||||
|
for drug in drugs:
|
||||||
|
if drug.upper() in drug_disease.keys():
|
||||||
|
if re.search(r'敏感',row['Response_Type_C']):
|
||||||
|
sensitive_drug={}
|
||||||
|
sensitive_drug['name']=drugs_chinese[bool]
|
||||||
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
||||||
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
||||||
|
elif re.search(r'耐药',row['Response_Type_C']):
|
||||||
|
resistant_drug={}
|
||||||
|
resistant_drug['name']=drugs_chinese[bool]
|
||||||
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
||||||
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
||||||
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
||||||
|
bool+=1
|
||||||
|
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
||||||
|
context['list2'][" ".join([mut,info['p']])]=info2[mut]
|
||||||
|
##genefunc
|
||||||
|
context['genefunc'][" ".join([mut,info['p']])]=genefunction[mut.upper()]
|
||||||
|
##clingenes
|
||||||
|
clingene1={}
|
||||||
|
clingene2={}
|
||||||
|
for index,row in cnv[(cnv['Gene_Symbol']==mut)].iterrows():
|
||||||
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
||||||
|
clingene1['gene']=info['gene']
|
||||||
|
clingene1['freq']= info['freq']
|
||||||
|
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
||||||
|
clingene1['muttype']= info['p']
|
||||||
|
context['clingene1'].append(clingene1)
|
||||||
|
else:
|
||||||
|
clingene2['gene']=info['gene']
|
||||||
|
clingene2['freq']= info['freq']
|
||||||
|
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
||||||
|
clingene2['muttype']= info['p']
|
||||||
|
context['clingene2'].append(clingene2)
|
||||||
|
context['list3'][gene]="\n".join(rt[gene])
|
||||||
|
else:
|
||||||
|
cnv_size=0
|
||||||
|
|
||||||
|
|
||||||
|
##msi
|
||||||
|
if Sample_type == 't':
|
||||||
|
msi_file=''.join([output_dir,'/MSI/',name,'.msi'])
|
||||||
|
msi=(open(msi_file,'r').readlines()[1]).split("\t")
|
||||||
|
context['msi_count']=msi[0]
|
||||||
|
context['msi_value']=round(float(msi[2].strip())/100,2)
|
||||||
|
if context['msi_value']>=0.3:
|
||||||
|
context['msi_result']='MSI-H'
|
||||||
|
context['msi_predict']='对免疫检查点抑制剂可能敏感'
|
||||||
|
else:
|
||||||
|
context['msi_result']='MSS'
|
||||||
|
context['msi_predict']='对免疫检查点抑制剂可能不敏感'
|
||||||
|
|
||||||
|
|
||||||
|
##MMR处理
|
||||||
|
mmr_file=''.join([output_dir,'/MMR/',name,"_mmr.txt"])
|
||||||
|
mmr_size = os.path.getsize(mmr_file)
|
||||||
|
mmr_result=0
|
||||||
|
mmr_result_summary=[]
|
||||||
|
if mmr_size>0:
|
||||||
|
mmr_fh=open(mmr_file,'r',encoding='utf-8').readlines()
|
||||||
|
for line in mmr_fh[1:]:
|
||||||
|
mmr={}
|
||||||
|
mmr['gene']= line.strip().split("\t")[0]
|
||||||
|
mmr['transcript'] = line.strip().split("\t")[1]
|
||||||
|
mmr['nacid'] = line.strip().split("\t")[2]
|
||||||
|
mmr['aacid'] = line.strip().split("\t")[3]
|
||||||
|
mmr['muttype'] = line.strip().split("\t")[5]
|
||||||
|
mmr['freq'] = line.strip().split("\t")[4]
|
||||||
|
mmr['sig'] = line.strip().split("\t")[6]
|
||||||
|
if mmr['sig'] != '意义未明突变':
|
||||||
|
mmr_result+=1
|
||||||
|
mmr_result_summary.append(mmr['gene'] + ' ' + mmr['aacid'])
|
||||||
|
context['mmr'].append(mmr)
|
||||||
|
|
||||||
|
context['mmr_result']=mmr_result
|
||||||
|
mmr_result_summary=' | '.join(mmr_result_summary)
|
||||||
|
if mmr_result_summary:
|
||||||
|
context['mmr_result_summary']=mrr_result_summary
|
||||||
|
context['mmr_predict']="对免疫检查点抑制剂可能敏感"
|
||||||
|
else:
|
||||||
|
context['mmr_result_summary']="未检测到相关基因突变"
|
||||||
|
context['mmr_predict']="对免疫检查点抑制剂可能不敏感"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##chemo
|
||||||
|
chemo_file=''.join([output_dir,'/chemo/',name,".drug.res.txt"])
|
||||||
|
chemo_fh=open(chemo_file,'r')
|
||||||
|
bool=0
|
||||||
|
chemos=chemo_fh.readlines()[1:]
|
||||||
|
chemo_result=0
|
||||||
|
recommend_drug=[]
|
||||||
|
normal_drug=[]
|
||||||
|
restrict_drug=[]
|
||||||
|
while bool<len(chemos):
|
||||||
|
chemo={}
|
||||||
|
lines=chemos[bool].strip().split("\t")
|
||||||
|
chemo['bool1']=bool+1
|
||||||
|
chemo['name1']=lines[0]
|
||||||
|
chemo['result1']=lines[4]
|
||||||
|
if lines[3] == '推荐':
|
||||||
|
# chemo['result1']=RichText('推荐使用', color='00FF00')
|
||||||
|
chemo_result+=1
|
||||||
|
recommend_drug.append(chemo['name1'])
|
||||||
|
elif lines[3] == '常规':
|
||||||
|
# chemo['result1']=RichText('常规使用')
|
||||||
|
normal_drug.append(chemo['name1'])
|
||||||
|
elif lines[3] == '谨慎':
|
||||||
|
# chemo['result1']=RichText('谨慎使用', color='FF0000')
|
||||||
|
restrict_drug.append(chemo['name1'])
|
||||||
|
if bool+1<len(chemos):
|
||||||
|
lines=chemos[bool+1].strip().split("\t")
|
||||||
|
chemo['bool2']=bool+2
|
||||||
|
chemo['name2']=lines[0]
|
||||||
|
chemo['result2']=lines[4]
|
||||||
|
if lines[3] == '推荐':
|
||||||
|
# chemo['result2']=RichText('推荐使用', color='00FF00')
|
||||||
|
chemo_result+=1
|
||||||
|
recommend_drug.append(chemo['name2'])
|
||||||
|
elif lines[3] == '常规':
|
||||||
|
# chemo['result2']=RichText('常规使用')
|
||||||
|
normal_drug.append(chemo['name2'])
|
||||||
|
elif lines[3] == '谨慎':
|
||||||
|
# chemo['result2']=RichText('谨慎使用', color='FF0000')
|
||||||
|
restrict_drug.append(chemo['name2'])
|
||||||
|
# print(chemo)
|
||||||
|
context['chemo'].append(chemo)
|
||||||
|
bool+=2
|
||||||
|
context['chemo_result']=chemo_result
|
||||||
|
chemo_detail=[]
|
||||||
|
chemo_detail_file=''.join([output_dir,'/chemo/',name,".drug.infos.txt"])
|
||||||
|
chemo_data=pd.read_table(chemo_detail_file,sep="\t")
|
||||||
|
chemo_drugs=chemo_data['药物'].drop_duplicates()
|
||||||
|
context['recommend_drug']=','.join(recommend_drug)
|
||||||
|
context['normal_drug']=','.join(normal_drug)
|
||||||
|
context['restrict_drug']=','.join(restrict_drug)
|
||||||
|
|
||||||
|
for drug in chemo_drugs:
|
||||||
|
c={}
|
||||||
|
c['drug']=drug
|
||||||
|
c['info']=[]
|
||||||
|
for index,row in chemo_data[chemo_data['药物'] == drug].iterrows():
|
||||||
|
d={}
|
||||||
|
d['gene']=row['检测基因']
|
||||||
|
d['site']=row['检测位点']
|
||||||
|
d['gt']=row['基因型']
|
||||||
|
d['level']=row['证据等级']
|
||||||
|
d['sig']=row['用药提示']
|
||||||
|
c['info'].append(d)
|
||||||
|
chemo_detail.append(c)
|
||||||
|
|
||||||
|
context['chemo_detail']=chemo_detail
|
||||||
|
context['sensitive_resistant_drug']=sensitive_resistant_drug
|
||||||
|
|
||||||
|
##化疗联合用药
|
||||||
|
chemo_comb_file=''.join([output_dir,'/chemo/',name,".chemo.comb.txt"])
|
||||||
|
chemo=pd.read_table(chemo_comb_file,sep="\t")
|
||||||
|
type=chemo['癌种'].drop_duplicates()
|
||||||
|
|
||||||
|
bool=-1
|
||||||
|
chemo_comb=[]
|
||||||
|
for i in type:
|
||||||
|
bool+=1
|
||||||
|
chemo_comb.append({'type':i,'drug':[]})
|
||||||
|
for index,row in chemo[chemo['癌种']==i].iterrows():
|
||||||
|
info={}
|
||||||
|
info['name']=row['用药方案']
|
||||||
|
info['abbr']=row['方案缩写']
|
||||||
|
info['sig']=row['临床提示']
|
||||||
|
chemo_comb[bool]['drug'].append(info)
|
||||||
|
context['chemo_comb']=chemo_comb
|
||||||
|
|
||||||
|
##hereditary cancer
|
||||||
|
if os.path.exists(''.join([output_dir,'/hereditary/',name,'.hereditary.pre.txt'])):
|
||||||
|
context['hereditary_cancer_1']=[]
|
||||||
|
context['hereditary_cancer_2']=[]
|
||||||
|
hereditary_file1=''.join([output_dir,'/hereditary/',name,".hereditary.txt"])
|
||||||
|
hereditary_file2=''.join([output_dir,'/hereditary/',name,".risk.txt"])
|
||||||
|
hereditary_file1_fh=open(hereditary_file1,'r')
|
||||||
|
hereditary_file2_fh=open(hereditary_file2,'r')
|
||||||
|
|
||||||
|
hereditary_result=0
|
||||||
|
hereditary_result_summary=[]
|
||||||
|
hereditary_disease=[]
|
||||||
|
if os.path.getsize(hereditary_file1)>0:
|
||||||
|
hereditary_file1_fh=open(hereditary_file1,'r')
|
||||||
|
for line in hereditary_file1_fh.readlines()[1:]:
|
||||||
|
hereditary_cancer_1={}
|
||||||
|
lines=line.strip().split("\t")
|
||||||
|
hereditary_cancer_1['gene']=lines[0]
|
||||||
|
hereditary_cancer_1['syndrome']=lines[1]
|
||||||
|
hereditary_cancer_1['hereditary_type']=lines[2]
|
||||||
|
hereditary_cancer_1['type']=lines[3]
|
||||||
|
hereditary_cancer_1['result']=lines[4]
|
||||||
|
hereditary_result+=len(lines[4].split(";"))
|
||||||
|
hereditary_result_summary.append(hereditary_cancer_1['gene'] + ' ' + hereditary_cancer_1['result'])
|
||||||
|
hereditary_disease.append(hereditary_cancer_1['syndrome'])
|
||||||
|
context['hereditary_cancer_1'].append(hereditary_cancer_1)
|
||||||
|
|
||||||
|
context['hereditary_result']=hereditary_result
|
||||||
|
if hereditary_result_summary:
|
||||||
|
context['hereditary_disease']=';'.join(hereditary_disease)
|
||||||
|
context['hereditary_result_summary']=' | '.join(hereditary_result_summary)
|
||||||
|
else:
|
||||||
|
context['hereditary_disease']='/'
|
||||||
|
context['hereditary_result_summary']='未检测到相关基因突变'
|
||||||
|
|
||||||
|
hereditary_risk=[]
|
||||||
|
bool=0
|
||||||
|
heres=hereditary_file2_fh.readlines()[1:]
|
||||||
|
while bool<len(heres):
|
||||||
|
hereditary_cancer_2={}
|
||||||
|
lines=(heres[bool]).strip().split("\t")
|
||||||
|
hereditary_cancer_2['type1']=lines[0]
|
||||||
|
if lines[1] == '偏高':
|
||||||
|
hereditary_risk.append(lines[0])
|
||||||
|
hereditary_cancer_2['risk1']=RichText('偏高', color='FF0000')
|
||||||
|
elif lines[1] == '同一般人群':
|
||||||
|
hereditary_cancer_2['risk1']=RichText('同一般人群')
|
||||||
|
if bool+1<len(heres):
|
||||||
|
lines=heres[bool+1].strip().split("\t")
|
||||||
|
hereditary_cancer_2['type2']=lines[0]
|
||||||
|
if lines[1] == '偏高':
|
||||||
|
hereditary_cancer_2['risk2']=RichText('偏高', color='FF0000')
|
||||||
|
elif lines[1] == '同一般人群':
|
||||||
|
hereditary_cancer_2['risk2']=RichText('同一般人群')
|
||||||
|
context['hereditary_cancer_2'].append(hereditary_cancer_2)
|
||||||
|
bool+=2
|
||||||
|
|
||||||
|
if hereditary_risk:
|
||||||
|
context['hereditary_risk']=','.join(hereditary_risk) + '风险可能较高'
|
||||||
|
else:
|
||||||
|
context['hereditary_risk']='风险同一般人群'
|
||||||
|
if len(context['hereditary_cancer_1'])==0:
|
||||||
|
context['hereditary_cancer_1']=[{'gene':'/','syndrome':'/','hereditary_type':'/','type':'/','result':'/'}]
|
||||||
|
|
||||||
|
##可能获益的临床药物处理
|
||||||
|
total_drug_count=[]
|
||||||
|
for i in context['list1']:
|
||||||
|
if i['drug_A'] != '/':
|
||||||
|
for j in i['drug_A'].split("\n"):
|
||||||
|
if j not in total_drug_count:
|
||||||
|
total_drug_count.append(j)
|
||||||
|
if i['drug_B'] != '/':
|
||||||
|
for j in i['drug_B'].split("\n"):
|
||||||
|
if j not in total_drug_count:
|
||||||
|
total_drug_count.append(j)
|
||||||
|
if i['drug_C'] != '/':
|
||||||
|
for j in i['drug_C'].split("\n"):
|
||||||
|
if j not in total_drug_count:
|
||||||
|
total_drug_count.append(j)
|
||||||
|
|
||||||
|
context['total_drug_count']=len(total_drug_count)
|
||||||
|
|
||||||
|
##去重处理
|
||||||
|
context['clingenes1']=[]
|
||||||
|
context['clingenes2']=[]
|
||||||
|
for i in context['clingene1']:
|
||||||
|
if i not in context['clingenes1']:
|
||||||
|
context['clingenes1'].append(i)
|
||||||
|
|
||||||
|
for i in context['clingene2']:
|
||||||
|
if i not in context['clingenes2'] and i not in context['clingenes1']:
|
||||||
|
context['clingenes2'].append(i)
|
||||||
|
|
||||||
|
##检测到的基因变异
|
||||||
|
context['total_mut_count']=len(context['clingenes1'])+len(context['clingenes2'])
|
||||||
|
|
||||||
|
|
||||||
|
##本癌种FDA/NMPA/NCCN批准基因检测
|
||||||
|
indication_fh=open(indication_file,'r',encoding='utf-8').readlines()
|
||||||
|
indication_genes=[]
|
||||||
|
for line in indication_fh[1:]:
|
||||||
|
indication={}
|
||||||
|
indication['gene']=line.strip().split("\t")[0]
|
||||||
|
indication['content']=line.strip().split("\t")[1]
|
||||||
|
if indication['gene'] in context['list3'].keys():
|
||||||
|
indication['result']=RichText(context['list3'][indication['gene']], color='FF0000')
|
||||||
|
else:
|
||||||
|
indication['result']='未检出变异'
|
||||||
|
context['indication'].append(indication)
|
||||||
|
|
||||||
|
##qc处理
|
||||||
|
qc_file=''.join([output_dir,'/qc/',name,'_qc.txt'])
|
||||||
|
qc=pd.read_table(qc_file,sep="\t",header=None,index_col=0,names=['A','B'])
|
||||||
|
|
||||||
|
Q30=qc.loc['Q30(%)','B']
|
||||||
|
if Q30>=85:
|
||||||
|
Q30_result='合格'
|
||||||
|
else:
|
||||||
|
Q30_result='警戒'
|
||||||
|
|
||||||
|
depth=qc.loc['mean_depth(dedup)','B']
|
||||||
|
if Sample_type == 'c':
|
||||||
|
if depth>=1000:
|
||||||
|
depth_result='合格'
|
||||||
|
else:
|
||||||
|
depth_result='警戒'
|
||||||
|
elif Sample_type == 't':
|
||||||
|
if depth>=500:
|
||||||
|
depth_result='合格'
|
||||||
|
else:
|
||||||
|
depth_result='警戒'
|
||||||
|
|
||||||
|
uniformity=qc.loc['coverage(>=0.2*meanx)','B']
|
||||||
|
if uniformity>=90:
|
||||||
|
uniformity_result='合格'
|
||||||
|
else:
|
||||||
|
uniformity_result='警戒'
|
||||||
|
|
||||||
|
if Q30_result=='合格' and depth_result=='合格' and uniformity_result=='合格':
|
||||||
|
context['qc_result']='合格'
|
||||||
|
else:
|
||||||
|
context['qc_result']='警戒'
|
||||||
|
|
||||||
|
context['Q30']=Q30
|
||||||
|
context['Q30_result']=Q30_result
|
||||||
|
context['depth']=depth
|
||||||
|
context['uniformity']=uniformity
|
||||||
|
context['depth_result']=depth_result
|
||||||
|
context['uniformity_result']=uniformity_result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##阴性模块处理
|
||||||
|
##list1
|
||||||
|
if len(context['list1'])==0:
|
||||||
|
context['list1']=[{'gene':'/','freq':'/','drug_A':'/','drug_B':'/','drug_C':'/','drug_D':'/'}]
|
||||||
|
|
||||||
|
##list2
|
||||||
|
if len(context['list2'])==0:
|
||||||
|
context['list2']= {'/':[{'drug':'/','effect':'/','tumor':'/','evidence':'/','sig':'/'}]}
|
||||||
|
context['genefunc']['/']='/'
|
||||||
|
|
||||||
|
|
||||||
|
##clingenes,nonclingenes
|
||||||
|
if len(context['clingenes1'])==0:
|
||||||
|
context['clingenes1']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||||
|
if len(context['clingenes2'])==0:
|
||||||
|
context['clingenes2']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||||
|
if len(context['nonclingenes'])==0:
|
||||||
|
context['nonclingenes']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
||||||
|
|
||||||
|
##sensitive_resistant_drug
|
||||||
|
if len(context['sensitive_resistant_drug'][0]['drug'])==0:
|
||||||
|
context['sensitive_resistant_drug'][0]['drug']=[{'name':'/','mechanism':'/'}]
|
||||||
|
if len(context['sensitive_resistant_drug'][1]['drug'])==0:
|
||||||
|
context['sensitive_resistant_drug'][1]['drug']=[{'name':'/','mechanism':'/'}]
|
||||||
|
|
||||||
|
##mmr
|
||||||
|
if len(context['mmr'])==0:
|
||||||
|
context['mmr']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','muttype':'/','freq':'/','sig':'/'}]
|
||||||
|
|
||||||
|
##info
|
||||||
|
post_file=''.join([output_dir,'/qc/',name,'_post.json'])
|
||||||
|
sex='/'
|
||||||
|
age='/'
|
||||||
|
phone='/'
|
||||||
|
medical_history='/'
|
||||||
|
family_history='/'
|
||||||
|
sample_id=name
|
||||||
|
sample_type='/'
|
||||||
|
report_date=time.strftime("%Y-%m-%d", time.localtime())
|
||||||
|
arrival_date=report_date
|
||||||
|
cancer_type='/'
|
||||||
|
pathologic_diagnosis='/'
|
||||||
|
|
||||||
|
if os.path.isfile(post_file):
|
||||||
|
# if post_file.exists():
|
||||||
|
post_fh=open(post_file,'r')
|
||||||
|
post=json.load(post_fh)
|
||||||
|
name=post["data"][0]["name"]
|
||||||
|
sex=post["data"][0]["gender"]
|
||||||
|
age=post["data"][0]["age"]
|
||||||
|
medical_history=post["data"][0]["treatHistory"]
|
||||||
|
family_history=post["data"][0]["sickFamilyHistory"]
|
||||||
|
sample_id=post["data"][0]["barcode"]
|
||||||
|
# sample_id_control=post["data"][0]["barcode_N"]
|
||||||
|
sample_type=post["data"][0]["source"]
|
||||||
|
# sample_type_control=post["data"][0]["source_N"]
|
||||||
|
# sample_type_control = post["data"][0].get('source_N', '/')
|
||||||
|
arrival_date=post["data"][0]["receiveTime"].split(' ')[0]
|
||||||
|
cancer_type=post["data"][0]["zlType"]
|
||||||
|
pathologic_diagnosis=post["data"][0]["treatResult"]
|
||||||
|
|
||||||
|
context['info']={
|
||||||
|
'name':name,
|
||||||
|
'sex':sex,
|
||||||
|
'age':age,
|
||||||
|
'phone':phone,
|
||||||
|
'medical_history':medical_history,
|
||||||
|
'family_history':family_history,
|
||||||
|
'sample_id':sample_id,
|
||||||
|
'sample_type':sample_type,
|
||||||
|
'report_date':report_date,
|
||||||
|
'arrival_date':arrival_date,
|
||||||
|
'cancer_type':cancer_type,
|
||||||
|
'pathologic_diagnosis':pathologic_diagnosis}
|
||||||
|
context['report_time']=report_date
|
||||||
|
|
||||||
|
##模板替换
|
||||||
|
file_real = os.path.realpath(sys.argv[0])
|
||||||
|
Exe_Path = os.path.dirname(file_real)
|
||||||
|
|
||||||
|
report_template={'lung85gene':{'t':'lung85-tissue-oem.docx','c':'lung85-blood-oem.docx'},
|
||||||
|
'crc88gene':{'t':'CRC88-tissue-oem.docx','c':'CRC88-blood-oem.docx'}}
|
||||||
|
doc_full = os.path.join(Exe_Path, report_template[projcet][Sample_type])
|
||||||
|
|
||||||
|
doc = DocxTemplate(doc_full)
|
||||||
|
|
||||||
|
doc.render(context)
|
||||||
|
doc.save(report_file)
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from main import main
|
||||||
|
from tools.common import basedir
|
||||||
|
|
||||||
|
|
||||||
|
def recvdata(conn, path):
|
||||||
|
"""
|
||||||
|
接受文件
|
||||||
|
:param conn:
|
||||||
|
:param path:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
header_size = struct.unpack('i', conn.recv(4))[0]
|
||||||
|
header_bytes = conn.recv(header_size)
|
||||||
|
header_json = header_bytes.decode('utf-8')
|
||||||
|
header_dic = json.loads(header_json)
|
||||||
|
content_len = header_dic['contentlen']
|
||||||
|
content_name = header_dic['contentname']
|
||||||
|
recv_len = 0
|
||||||
|
fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
|
||||||
|
file = open(fielpath, 'wb')
|
||||||
|
while recv_len < content_len:
|
||||||
|
correntrecv = conn.recv(1024 * 1000)
|
||||||
|
file.write(correntrecv)
|
||||||
|
recv_len += len(correntrecv)
|
||||||
|
file.close()
|
||||||
|
return fielpath
|
||||||
|
|
||||||
|
|
||||||
|
def senddata(conn, path, message=None):
|
||||||
|
name = os.path.basename(os.path.realpath(path))
|
||||||
|
if not message:
|
||||||
|
with open(path, 'rb') as file:
|
||||||
|
content = file.read()
|
||||||
|
headerdic = dict(
|
||||||
|
contentlen=len(content),
|
||||||
|
contentname=name
|
||||||
|
)
|
||||||
|
headerjson = json.dumps(headerdic)
|
||||||
|
headerbytes = headerjson.encode('utf-8')
|
||||||
|
headersize = len(headerbytes)
|
||||||
|
conn.send(struct.pack('i', headersize))
|
||||||
|
conn.send(headerbytes)
|
||||||
|
conn.sendall(content)
|
||||||
|
else:
|
||||||
|
headerdic = dict(
|
||||||
|
contentlen=len(path),
|
||||||
|
contentname='message'
|
||||||
|
)
|
||||||
|
headerjson = json.dumps(headerdic)
|
||||||
|
headerbytes = headerjson.encode('utf-8')
|
||||||
|
headersize = len(headerbytes)
|
||||||
|
conn.send(struct.pack('i', headersize))
|
||||||
|
conn.send(headerbytes)
|
||||||
|
conn.sendall(path.encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
|
def server():
|
||||||
|
myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
adrss = ("", 8190)
|
||||||
|
myserver.bind(adrss)
|
||||||
|
myserver.listen(5)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
myclient, adddr = myserver.accept()
|
||||||
|
recv_content = recvdata(myclient, os.path.join(basedir, 'xlsx'))
|
||||||
|
outputpath = main(recv_content)
|
||||||
|
senddata(myclient, outputpath)
|
||||||
|
print('生成成功')
|
||||||
|
except Exception as e:
|
||||||
|
print(e, '有错误')
|
||||||
|
# continue
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
outputpath = main(sys.argv[1])
|
||||||
|
else:
|
||||||
|
server()
|
||||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
basedir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
|
@ -0,0 +1,457 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from tools.readxlsx import read
|
||||||
|
|
||||||
|
|
||||||
|
def tree():
|
||||||
|
return defaultdict(tree)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAssignment:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.result = tree() # 报告结果
|
||||||
|
self.signtb = set() # 具有明确或潜在临床意义的基因变异
|
||||||
|
self.signdurg = set() # 潜在临床获益的治疗药物
|
||||||
|
|
||||||
|
self.drugs_type = dict()
|
||||||
|
|
||||||
|
|
||||||
|
class Parse(BaseAssignment):
|
||||||
|
|
||||||
|
def __init__(self, sampledata, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.sampledata = sampledata
|
||||||
|
|
||||||
|
def cms(self):
|
||||||
|
"""
|
||||||
|
样本信息处理
|
||||||
|
"""
|
||||||
|
data = pd.DataFrame(self.sampledata['sample_info'])
|
||||||
|
if data.empty:
|
||||||
|
raise UserWarning('sample_info表为空,生成报告失败!')
|
||||||
|
data = data.applymap(
|
||||||
|
lambda x: str(x).replace('.', '/').replace('-', '/').replace('——', '/') if str(x) in ['.', '-', '——'] else x)
|
||||||
|
data_dict = data.to_dict('index')[0]
|
||||||
|
data_dict['receiveTime'] = re.split(' ', data_dict['receiveTime'])[0]
|
||||||
|
data_dict['reportTime'] = time.strftime("%Y-%m-%d", time.localtime())
|
||||||
|
self.result['c'] = data_dict
|
||||||
|
|
||||||
|
def target(self):
|
||||||
|
data = pd.DataFrame(self.sampledata['snvindel'])
|
||||||
|
res = []
|
||||||
|
if data.empty:
|
||||||
|
self.result['snvindel'] = res
|
||||||
|
return
|
||||||
|
|
||||||
|
data = data[data['Validated'] == 1].reset_index()
|
||||||
|
data['muttype'] = '/'
|
||||||
|
data.loc[data['ExonicFunc.refGene'].str.match('nonsynonymous SNV'), 'muttype'] = '错义突变'
|
||||||
|
data.loc[data['ExonicFunc.refGene'].str.match('^frameshift'), 'muttype'] = '移码突变'
|
||||||
|
data.loc[data['ExonicFunc.refGene'].str.match('^nonframeshift'), 'muttype'] = '非移码突变'
|
||||||
|
data.loc[data['ExonicFunc.refGene'].str.match('stopgain'), 'muttype'] = '提前终止'
|
||||||
|
|
||||||
|
# 拆分hgvs
|
||||||
|
data[['gene', 'transcript', 'exon', 'nacid', 'aacid']] = data['AAChange.refGene'].str.split(':', expand=True)
|
||||||
|
# 没有氨基酸改变用核苷酸改变代替
|
||||||
|
data['aacid'] = data['aacid'].fillna(data['nacid'])
|
||||||
|
|
||||||
|
for alter, alter_data in data.groupby('AAChange.refGene'):
|
||||||
|
alter_data_need = alter_data[['gene', 'transcript', 'exon', 'nacid', 'aacid', 'mutant_frequency',
|
||||||
|
'AMP_mut_level', 'muttype', 'Gene_function']]
|
||||||
|
alter_res = alter_data_need.iloc[0].to_dict()
|
||||||
|
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||||
|
drug_content = alter_data[
|
||||||
|
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||||
|
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||||
|
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||||
|
alter_res['alter'] = alter
|
||||||
|
res.append(alter_res)
|
||||||
|
|
||||||
|
# 汇总
|
||||||
|
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||||
|
self.signtb.add(alter)
|
||||||
|
|
||||||
|
self.result['snvindel'] = res
|
||||||
|
|
||||||
|
def fusion(self):
|
||||||
|
data = pd.DataFrame(self.sampledata['fusion'])
|
||||||
|
res = []
|
||||||
|
if data.empty:
|
||||||
|
self.result['fusion'] = res
|
||||||
|
return
|
||||||
|
data = data[data['Validated'] == 1].reset_index()
|
||||||
|
for alter, alter_data in data.groupby('FUSION'):
|
||||||
|
alter_data_need = alter_data[['FUSION', 'FREQ1', 'AMP_mut_level', 'Gene_function']]
|
||||||
|
alter_res = alter_data_need.iloc[0].to_dict()
|
||||||
|
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||||
|
drug_content = alter_data[
|
||||||
|
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||||
|
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||||
|
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||||
|
alter_res['alter'] = '%s 融合' % (alter_res['FUSION'].replace('-', ':'))
|
||||||
|
res.append(alter_res)
|
||||||
|
|
||||||
|
# 汇总
|
||||||
|
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||||
|
self.signtb.add(alter)
|
||||||
|
|
||||||
|
self.result['fusion'] = res
|
||||||
|
|
||||||
|
def cnv(self):
|
||||||
|
data = pd.DataFrame(self.sampledata['cnv'])
|
||||||
|
res = []
|
||||||
|
if data.empty:
|
||||||
|
self.result['cnv'] = res
|
||||||
|
return
|
||||||
|
data = data[data['Validated'] == 1].reset_index()
|
||||||
|
for alter, alter_data in data.groupby('Gene_Symbol'):
|
||||||
|
alter_data_need = alter_data[['Gene_Symbol', 'Copy_number', 'AMP_mut_level', 'Gene_function']].reset_index()
|
||||||
|
alter_data_need['muttype'] = '缺失'
|
||||||
|
alter_data_need.loc[alter_data_need['Copy_number'] > 2, 'muttype'] = '扩增'
|
||||||
|
alter_res = alter_data_need.iloc[0].to_dict()
|
||||||
|
alter_res['drug_category'] = self._drug_category(alter_data)
|
||||||
|
drug_content = alter_data[
|
||||||
|
['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
|
||||||
|
drug_content = drug_content[drug_content['DrugCn'] != '.']
|
||||||
|
alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
|
||||||
|
alter_res['alter'] = '%s %s' % (alter, alter_res['muttype'])
|
||||||
|
res.append(alter_res)
|
||||||
|
|
||||||
|
# 汇总
|
||||||
|
if alter_res['AMP_mut_level'] in ['I', 'II']:
|
||||||
|
self.signtb.add(alter)
|
||||||
|
self.result['cnv'] = res
|
||||||
|
|
||||||
|
def hotspot(self):
|
||||||
|
self._to_records('hotspot')
|
||||||
|
|
||||||
|
def met(self):
|
||||||
|
self._to_records('MET')
|
||||||
|
|
||||||
|
def longindel(self):
|
||||||
|
self._to_records('longindel')
|
||||||
|
|
||||||
|
def mmr(self):
|
||||||
|
data = pd.DataFrame(self.sampledata['MMR'])
|
||||||
|
result_summary = '未检测到相关基因突变'
|
||||||
|
predict = '对免疫检查点抑制剂可能不敏感'
|
||||||
|
mmr_num = 0
|
||||||
|
res = []
|
||||||
|
|
||||||
|
if not data.empty:
|
||||||
|
tmdf = data[['gene', 'p_change']].reset_index()
|
||||||
|
tmdf['result_summary'] = tmdf.apply(lambda x: '%s %s' % (x['gene'], x['p_change']), axis=1)
|
||||||
|
result_summary = ' | '.join(tmdf['result_summary'].to_list())
|
||||||
|
predict = '对免疫检查点抑制剂可能敏感'
|
||||||
|
mmr_num = len(data.index)
|
||||||
|
res = data.to_dict('records')
|
||||||
|
|
||||||
|
self.result['MMR'] = res
|
||||||
|
self.result['sum']['mmr'] = dict(
|
||||||
|
result_summary=result_summary,
|
||||||
|
predict=predict,
|
||||||
|
mmr_num=mmr_num
|
||||||
|
)
|
||||||
|
|
||||||
|
def msi(self):
|
||||||
|
self._to_dicts('MSI')
|
||||||
|
|
||||||
|
# def chemo(self):
|
||||||
|
# data = pd.DataFrame(self.sampledata['chemo'])
|
||||||
|
#
|
||||||
|
# project = data['project'].to_list()[0]
|
||||||
|
#
|
||||||
|
# # 分类汇总 同位点,药物合并 drug.infos.txt
|
||||||
|
# drugrsid = data[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']]
|
||||||
|
# drugrsid = drugrsid.drop_duplicates()
|
||||||
|
# resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
|
||||||
|
# ','.join).reset_index()
|
||||||
|
# resdrugrsid.rename(columns=
|
||||||
|
# {'drugname': '药物', 'genename': '检测基因', 'rsid': '检测位点', 'result': '基因型',
|
||||||
|
# 'level': '证据等级', 'tips': '用药提示'},
|
||||||
|
# inplace=True)
|
||||||
|
# resdrugrsid = resdrugrsid.sort_values(by=['drugsort', '药物', '检测基因'])
|
||||||
|
# self.result['chemo']['druginfo'] = resdrugrsid.to_dict('records')
|
||||||
|
#
|
||||||
|
# # 药物 药物疗效 推荐程度合并 drug.res.txt
|
||||||
|
# drugtypesum = data[['drugname', 'drugtype', 'rsid', 'weights']]
|
||||||
|
# drugtypesum = drugtypesum.drop_duplicates()
|
||||||
|
# drugtyperes = list()
|
||||||
|
# drugsum = dict()
|
||||||
|
# for drug, drugdata in drugtypesum.groupby('drugname'):
|
||||||
|
# tipsnum = drugdata.groupby(['drugtype']).agg({'weights': 'sum'}).to_dict('index')
|
||||||
|
# sumlist = list()
|
||||||
|
# if 'LX' in tipsnum:
|
||||||
|
# LX = tipsnum['LX']['weights']
|
||||||
|
# if LX > 0:
|
||||||
|
# lxdes = '疗效较好'
|
||||||
|
# lxnum = 1
|
||||||
|
# elif LX == 0:
|
||||||
|
# lxdes = '疗效一般'
|
||||||
|
# lxnum = 0
|
||||||
|
# else:
|
||||||
|
# lxdes = '疗效较差'
|
||||||
|
# lxnum = -1
|
||||||
|
# sumlist.append(lxdes)
|
||||||
|
# else:
|
||||||
|
# LX = 0
|
||||||
|
# lxnum = 0
|
||||||
|
# if 'DF' in tipsnum:
|
||||||
|
# DF = tipsnum['DF']['weights']
|
||||||
|
# if DF > 0:
|
||||||
|
# dfdes = '毒副较低'
|
||||||
|
# dfnum = 1
|
||||||
|
# elif DF == 0:
|
||||||
|
# dfdes = '毒副一般'
|
||||||
|
# dfnum = 0
|
||||||
|
# else:
|
||||||
|
# dfdes = '毒副较高'
|
||||||
|
# dfnum = -1
|
||||||
|
# sumlist.append(dfdes)
|
||||||
|
# else:
|
||||||
|
# DF = 0
|
||||||
|
# dfnum = 0
|
||||||
|
#
|
||||||
|
# # 评价方式 疗效 1 0 -1, 毒副 1 0 -1 ,可形成9宫格
|
||||||
|
# sumnum = lxnum + dfnum
|
||||||
|
# if sumnum > 0:
|
||||||
|
# sumdes = '推荐'
|
||||||
|
# elif sumnum == 0:
|
||||||
|
# sumdes = '常规'
|
||||||
|
# else:
|
||||||
|
# sumdes = '谨慎'
|
||||||
|
#
|
||||||
|
# # 特别药物处理
|
||||||
|
# if (drug == "氟尿嘧啶" or drug == "卡培他滨") and DF < 0:
|
||||||
|
# sumdes = '谨慎'
|
||||||
|
#
|
||||||
|
# drugtyperes.append(dict(
|
||||||
|
# 药物名称=drug,
|
||||||
|
# 疗效=LX,
|
||||||
|
# 毒副=DF,
|
||||||
|
# 推荐程度=sumdes,
|
||||||
|
# 疗效和毒副总结=','.join(sumlist)
|
||||||
|
# ))
|
||||||
|
# drugsum[drug] = sumdes
|
||||||
|
#
|
||||||
|
# # 报告中展示药物有顺序
|
||||||
|
# drugsort = data[['drugname', 'drugsort']].drop_duplicates()
|
||||||
|
# drugsort_dict = drugsort.set_index('drugname')['drugsort'].to_dict()
|
||||||
|
# drugtyperes_sort = sorted(drugtyperes, key=lambda x: (
|
||||||
|
# drugsort_dict[x['药物名称']] if x['药物名称'] in drugsort_dict else 100, x['药物名称']))
|
||||||
|
#
|
||||||
|
# drugtyperes_sort_df = pd.DataFrame(drugtyperes_sort)
|
||||||
|
# self.result['chemo']['sum'] = drugtyperes_sort_df.groupby('推荐程度')['药物名称'].apply(','.join).to_dict()
|
||||||
|
# self.result['chemo']['drugres'] = drugtyperes_sort_df.to_dict('records')
|
||||||
|
#
|
||||||
|
# # 联合用药
|
||||||
|
# drug_combine_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'database',
|
||||||
|
# 'chemo_drug_combine.csv')
|
||||||
|
# drug_combine = pd.read_csv(drug_combine_path, sep='\t')
|
||||||
|
# drug_combine.fillna('.', inplace=True)
|
||||||
|
# drug_combine_data = drug_combine[drug_combine['source'].str.contains(project)]
|
||||||
|
# drug_combine_data = drug_combine_data.reset_index()
|
||||||
|
# if not drug_combine_data.empty:
|
||||||
|
# drug_combine_data['临床提示'] = drug_combine_data['用药方案'].apply(self._get_drug_plan, args=(drugsum,))
|
||||||
|
# self.result['chemo']['combine'] = drug_combine_data.groupby('癌种').apply(
|
||||||
|
# lambda group: group.set_index('癌种').to_dict('records')).to_dict()
|
||||||
|
# else:
|
||||||
|
# self.result['chemo']['combine'] = dict()
|
||||||
|
# self.result['sum']['chemo_drug_num'] = len(drugsum.keys())
|
||||||
|
|
||||||
|
def chemo(self):
|
||||||
|
chemo_res = self._to_records('chemo_res', need=True)
|
||||||
|
chemo_res_df = pd.DataFrame(chemo_res)
|
||||||
|
chemo_res_df.index = chemo_res_df.index + 1
|
||||||
|
chemo_res_df = chemo_res_df.reset_index()
|
||||||
|
self.result['chemo']['chemo_res'] = chemo_res_df.to_dict('records')
|
||||||
|
self.result['sum']['chemo']['drug_num'] = len(chemo_res)
|
||||||
|
self.result['sum']['chemo']['drug_category'] = pd.DataFrame(chemo_res).groupby('推荐程度')['药物名称'].apply(
|
||||||
|
','.join).to_dict()
|
||||||
|
|
||||||
|
chemo_comb = self._to_records('chemo_comb', need=True)
|
||||||
|
chemo_comb_res = dict()
|
||||||
|
if chemo_comb:
|
||||||
|
chemo_comb_res = pd.DataFrame(chemo_comb).groupby('癌种').apply(
|
||||||
|
lambda group: group.set_index('癌种').to_dict('records')).to_dict()
|
||||||
|
self.result['chemo']['chemo_comb'] = chemo_comb_res
|
||||||
|
|
||||||
|
chemo_info = self._to_records('chemo_info', need=True)
|
||||||
|
chemo_info_res = dict()
|
||||||
|
if chemo_info:
|
||||||
|
chemo_info_res = pd.DataFrame(chemo_info).groupby('药物').apply(
|
||||||
|
lambda group: group.set_index('药物').to_dict('records')).to_dict()
|
||||||
|
self.result['chemo']['chemo_info'] = chemo_info_res
|
||||||
|
|
||||||
|
def hcs(self):
|
||||||
|
self._to_records('HCS')
|
||||||
|
self.result['sum']['hcs']['num'] = len(self.result['HCS'])
|
||||||
|
|
||||||
|
def heredity(self):
|
||||||
|
"""
|
||||||
|
遗传的结果文件
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
hereditary = pd.DataFrame(self.sampledata['hereditary'])
|
||||||
|
result = '/'
|
||||||
|
disease = '/'
|
||||||
|
risk = '/'
|
||||||
|
if not hereditary.empty:
|
||||||
|
result = '|'.join(hereditary.apply(lambda x: '%s %s' % (x['基因'], x['检测结果']), axis=1).to_list())
|
||||||
|
disease = '|'.join(hereditary['遗传性肿瘤综合征'].to_list())
|
||||||
|
|
||||||
|
hereditary_risk = pd.DataFrame(self.sampledata['hereditary_risk'])
|
||||||
|
if not hereditary_risk.empty:
|
||||||
|
risk = ','.join(hereditary_risk[hereditary_risk['风险值'] == '偏高']['肿瘤类型'].to_list())
|
||||||
|
|
||||||
|
self.result['hereditary'] = hereditary.to_dict('records')
|
||||||
|
self.result['sum']['hereditary']['result'] = result
|
||||||
|
self.result['sum']['hereditary']['disease'] = disease
|
||||||
|
self.result['sum']['hereditary']['risk'] = risk
|
||||||
|
|
||||||
|
def qc(self):
|
||||||
|
# self._to_dicts('qc')
|
||||||
|
data = pd.DataFrame(self.sampledata['qc'])
|
||||||
|
res = {}
|
||||||
|
if not data.empty:
|
||||||
|
data.rename(columns={
|
||||||
|
'Q30(%)': 'q30',
|
||||||
|
'mean_depth(dedup)': 'depth',
|
||||||
|
'coverage(>=0.2*meanx)': 'coverage'
|
||||||
|
}, inplace=True)
|
||||||
|
res = data.to_dict('index')[0]
|
||||||
|
self.result['qc'] = res
|
||||||
|
|
||||||
|
def drugs(self):
|
||||||
|
data = pd.DataFrame(self.sampledata['drugs'])
|
||||||
|
res = {}
|
||||||
|
if not data.empty:
|
||||||
|
data = data.dropna()
|
||||||
|
data = data[data['drug_detail'] != '.']
|
||||||
|
res = data.set_index('drug_name')['drug_detail'].to_dict()
|
||||||
|
self.result['drugs']['drugs_detail'] = res
|
||||||
|
|
||||||
|
def indication(self):
|
||||||
|
self._to_records('indication')
|
||||||
|
|
||||||
|
def _to_records(self, sheetname, need=False):
|
||||||
|
"""
|
||||||
|
for many lines
|
||||||
|
:param sheetname:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
data = pd.DataFrame(self.sampledata[sheetname])
|
||||||
|
res = []
|
||||||
|
if data.empty:
|
||||||
|
self.result[sheetname] = res
|
||||||
|
return
|
||||||
|
res = data.to_dict('records')
|
||||||
|
if need:
|
||||||
|
return res
|
||||||
|
self.result[sheetname] = res
|
||||||
|
|
||||||
|
def _to_dicts(self, sheetname):
|
||||||
|
"""
|
||||||
|
for single line
|
||||||
|
:param sheetname:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
data = pd.DataFrame(self.sampledata[sheetname])
|
||||||
|
res = {}
|
||||||
|
if data.empty:
|
||||||
|
self.result[sheetname] = res
|
||||||
|
return
|
||||||
|
res = data.to_dict('index')[0]
|
||||||
|
self.result[sheetname] = res
|
||||||
|
|
||||||
|
def _drug_category(self, groupdata):
|
||||||
|
drug_category_res = dict()
|
||||||
|
for drug_category, drug_category_alter_data in groupdata.groupby('Drug_Category'):
|
||||||
|
if drug_category == '.':
|
||||||
|
continue
|
||||||
|
# 敏感,可能敏感药物统计
|
||||||
|
if drug_category in ['a', 'b', 'c']:
|
||||||
|
self.signdurg.update(set(drug_category_alter_data['DrugCn'].str.split(',').explode().tolist()))
|
||||||
|
drug_category_alter_data['drugdes'] = drug_category_alter_data.apply(
|
||||||
|
lambda x: '%s 【%s 级】' % (x['DrugCn'], x['AMP_evidence_level']), axis=1)
|
||||||
|
drug_category_res[drug_category] = '\n'.join(drug_category_alter_data['drugdes'].to_list())
|
||||||
|
|
||||||
|
# 所有药物信息
|
||||||
|
groupdata['list_col'] = groupdata['DrugCn'].str.replace(' + ', '+').str.split(r'[+,]')
|
||||||
|
exploded_df = groupdata.explode('list_col').reset_index()
|
||||||
|
exploded_df = exploded_df[(exploded_df['list_col'] != '.') & (exploded_df['list_col'] != '')]
|
||||||
|
exploded_dict = exploded_df.groupby('Response_Type')['list_col'].agg(lambda x: list(set(x))).to_dict()
|
||||||
|
|
||||||
|
for drug_type in exploded_dict:
|
||||||
|
if drug_type in self.drugs_type:
|
||||||
|
self.drugs_type[drug_type].extend(exploded_dict[drug_type])
|
||||||
|
else:
|
||||||
|
self.drugs_type[drug_type] = exploded_dict[drug_type]
|
||||||
|
|
||||||
|
# for drugall in exploded_df['Drug_Detail'].to_list():
|
||||||
|
# for drug in drugall.split('|'):
|
||||||
|
# match = re.search(r'\[\[(.*?)]](.*?)$', drug)
|
||||||
|
# if match:
|
||||||
|
# self.drugs_record['drugs'].update({match.group(1).strip(): match.group(2).strip()})
|
||||||
|
return drug_category_res
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_drug_plan(x, drugsum):
|
||||||
|
tlist = x.split('+')
|
||||||
|
tdeslist = list()
|
||||||
|
for tdes in tlist:
|
||||||
|
if tdes.strip() in drugsum:
|
||||||
|
t1_des = drugsum[tdes.strip()]
|
||||||
|
tdeslist.append(t1_des)
|
||||||
|
|
||||||
|
if '慎用' in tdeslist or '谨慎' in tdeslist:
|
||||||
|
return '慎用'
|
||||||
|
elif '推荐' in tdeslist:
|
||||||
|
return '推荐'
|
||||||
|
elif '常规' in tdeslist:
|
||||||
|
return '可选'
|
||||||
|
else:
|
||||||
|
return '可选'
|
||||||
|
|
||||||
|
def collect(self):
|
||||||
|
self.cms()
|
||||||
|
self.target()
|
||||||
|
self.fusion()
|
||||||
|
self.cnv()
|
||||||
|
self.hotspot()
|
||||||
|
self.met()
|
||||||
|
self.longindel()
|
||||||
|
self.mmr()
|
||||||
|
self.msi()
|
||||||
|
self.chemo()
|
||||||
|
self.hcs()
|
||||||
|
self.heredity()
|
||||||
|
self.qc()
|
||||||
|
self.indication()
|
||||||
|
self.drugs()
|
||||||
|
|
||||||
|
# 汇总
|
||||||
|
self.result['sum']['signtb_num'] = len(self.signtb)
|
||||||
|
self.result['sum']['signdrug_num'] = len(self.signdurg)
|
||||||
|
self.result['drugs']['drugs_type'] = {key: self.drugs_type[key] for key in sorted(self.drugs_type.keys())}
|
||||||
|
return self.result
|
||||||
|
|
||||||
|
|
||||||
|
def run(path):
|
||||||
|
parse = Parse(read(path))
|
||||||
|
res = parse.collect()
|
||||||
|
resjson = json.dumps(res, indent=4, ensure_ascii=False)
|
||||||
|
with open('t.json', 'w') as f:
|
||||||
|
f.write(resjson)
|
||||||
|
return resjson
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
run(sys.argv[1])
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
import pandas as pd
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
logger = logging.getLogger('main.sub')
|
||||||
|
|
||||||
|
|
||||||
|
def read(merge):
|
||||||
|
df = pd.read_excel(merge, None)
|
||||||
|
samplelist = df['sample_info']['sampleSn'].to_list()
|
||||||
|
if not samplelist:
|
||||||
|
logger.error('sample_info表为空!读取excel信息失败!')
|
||||||
|
raise UserWarning('sample_info表为空!读取excel信息失败!')
|
||||||
|
samdict = dict()
|
||||||
|
for name, contents in df.items():
|
||||||
|
if contents.empty:
|
||||||
|
samdict[name] = []
|
||||||
|
continue
|
||||||
|
contents.fillna('.', inplace=True)
|
||||||
|
samdict[name] = contents.to_dict('list')
|
||||||
|
return samdict
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
res = read(sys.argv[1])
|
||||||
|
print(res)
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue