初始化

2023-07-31 13:49:34 +08:00 · 2023-07-31 13:49:34 +08:00 · 260d86d3f1
commit 260d86d3f1
25 changed files with 8552 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,215 @@
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # ---> Perl
 !Build/
 .last_cover_stats
 /META.yml
 /META.json
 /MYMETA.*
 *.o
 *.pm.tdy
 *.bs
 # Devel::Cover
 cover_db/
 # Devel::NYTProf
 nytprof.out
 # Dist::Zilla
 /.build/
 # Module::Build
 _build/
 Build
 Build.bat
 # Module::Install
 inc/
 # ExtUtils::MakeMaker
 /blib/
 /_eumm/
 /*.gz
 /Makefile
 /Makefile.old
 /MANIFEST.bak
 /pm_to_blib
 /*.zip
 # ---> Perl6
 # Gitignore for Perl 6 (http://www.perl6.org)
 # As part of https://github.com/github/gitignore
 # precompiled files
 .precomp
 lib/.precomp
 nohup.out
 log/*
 !log/readme.md
 example/*
 !example/readme.md
 /.report/
--- a/README.md
+++ b/README.md
--- a/database/chemo_drug_combine.csv
+++ b/database/chemo_drug_combine.csv
@ -0,0 +1,28 @@
 癌种	用药方案	方案缩写	source
 非小细胞肺癌	顺铂+紫杉醇	TP	lung85gene
 非小细胞肺癌	卡铂+紫杉醇	TP	lung85gene
 非小细胞肺癌	顺铂+紫杉醇脂质体	LP	lung85gene
 非小细胞肺癌	卡铂+紫杉醇脂质体	LP	lung85gene
 非小细胞肺癌	顺铂+白蛋白紫杉醇	nab-TP	lung85gene
 非小细胞肺癌	卡铂+白蛋白紫杉醇	nab-TP	lung85gene
 非小细胞肺癌	顺铂+多西他赛	DP	lung85gene
 非小细胞肺癌	卡铂+多西他赛	DP	lung85gene
 非小细胞肺癌	奈达铂+多西他赛	DP	lung85gene
 非小细胞肺癌	顺铂+吉西他滨	GP	lung85gene
 非小细胞肺癌	卡铂+吉西他滨	GP	lung85gene
 非小细胞肺癌	顺铂+培美曲塞	PP	lung85gene
 非小细胞肺癌	卡铂+培美曲塞	PP	lung85gene
 非小细胞肺癌	顺铂+长春瑞滨	NP	lung85gene
 非小细胞肺癌	顺铂+依托泊苷	EP	lung85gene
 小细胞肺癌	顺铂+依托泊苷	EP	lung85gene
 小细胞肺癌	卡铂+依托泊苷	EC	lung85gene
 小细胞肺癌	洛铂+依托泊苷	EL	lung85gene
 小细胞肺癌	顺铂+伊立替康	IP	lung85gene
 小细胞肺癌	卡铂+伊立替康	IC	lung85gene
 结直肠癌	奥沙利铂+亚叶酸钙+氟尿嘧啶	FOLFOX	crc88gene
 结直肠癌	伊立替康+亚叶酸钙+氟尿嘧啶	FOLFIRI	crc88gene
 结直肠癌	奥沙利铂+卡培他滨	CAPEOX（又称Xelox）	crc88gene
 结直肠癌	伊立替康+奥沙利铂+亚叶酸钙+氟尿嘧啶	FOLFOXIRI	crc88gene
 结直肠癌	伊立替康+卡培他滨	CapIRI或XELIRI	crc88gene
 结直肠癌	奥沙利铂+雷替曲塞	/	crc88gene
 结直肠癌	伊立替康+雷替曲塞	/	crc88gene
--- a/main.py
+++ b/main.py
@ -0,0 +1,24 @@
 import json
 import os
 import socket
 import sys
 from docxtpl import DocxTemplate
 from tools.parsexlsx import run
 def main(path):
    resjson = run(path)
    res = json.loads(resjson)
    barcode = res['c']['barcode']
    tplpath = os.path.join(os.path.dirname(__file__), 'template', 'nreport.docx')
    tpl = DocxTemplate(tplpath)
    tpl.render(res)
    path = os.path.join(os.path.dirname(__file__), 'result', f'{barcode}.docx')
    tpl.save(path)
    return path
 if __name__ == '__main__':
    main(sys.argv[1])
--- a/result/2023WSSW000709-T.docx
+++ b/result/2023WSSW000709-T.docx
--- a/result/BC-20220705.docx
+++ b/result/BC-20220705.docx
--- a/result/readme.md
+++ b/result/readme.md
--- a/scripts/BC-20220314_report.docx
+++ b/scripts/BC-20220314_report.docx
--- a/scripts/BC-20220705_report.docx
+++ b/scripts/BC-20220705_report.docx
--- a/scripts/check_report_merge_20230713.py
+++ b/scripts/check_report_merge_20230713.py
@ -0,0 +1,486 @@
 #!/usr/bin/python3
 # -*- coding: UTF-8 -*-
 import pandas as pd
 from pandas import DataFrame
 import numpy as np
 import logging
 import re
 import sys
 import os
 import json
 import glob
 import openpyxl
 from openpyxl import Workbook, load_workbook
 from openpyxl.drawing.image import Image
 if len(sys.argv) != 3:
    print(" ".join(['usage:python', sys.argv[0], 'output_dir', 'name']))
    sys.exit()
 def snv_fusion_cnv(output_dir, name):
    out_xlsx = "".join([output_dir, '/report/', name, '.check_new.xlsx'])
    # genefunction
    genefunction = {}
    gf = open("/dataseq/jmdna/codes/reportbase/gene_function.txt", 'r', encoding='utf-8').readlines()
    for line in gf[1:]:
        gene = line.strip().split("\t")[0]
        func = line.strip().split("\t")[1]
        genefunction[gene.upper()] = func
        genefunction['.'] = '.'
        ##drug_mechanism
    drug_mechanism = {}
    drug_fh = open("/dataseq/jmdna/codes/reportbase/target_drug.txt", 'r', encoding='utf-8').readlines()
    for line in drug_fh[1:]:
        disease = line.split("\t")[8]
        mechanism = line.split("\t")[11]
        drugs = line.split("\t")[0].split('|') + line.split("\t")[1].split('|')
        if disease or mechanism:
            for drug in drugs:
                drug_mechanism[drug.upper()] = "\\\\".join([disease, mechanism]).strip()
    '''
    snvindel_sheet
    '''
    ##input
    filter_file = "".join([output_dir, '/report/', name, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'])
    pos_file = "".join([output_dir, '/mutation/', name, '.snvindel.pos.dedup.txt'])
    vus_file = "".join([output_dir, '/mutation/', name, '.snvindel.vus.txt'])
    neg_file = "".join([output_dir, '/mutation/', name, '.snvindel.neg.txt'])
    ##filter_file
    if os.path.getsize(filter_file) > 0:
        snv = pd.read_table(filter_file, sep="\t")
        cols = [index for index, row in snv[snv['可信'] == 0].iterrows()]
        snv.drop(cols, inplace=True)
        snv.insert(loc=24, column='ACMG_level', value=0)
        snv.insert(loc=25, column='Deleterious', value=0)
        snv.insert(loc=26, column='freq_high', value=0)
        for index, row in snv.iterrows():
            if re.search("Likely_pathogenic|drug", (row['CLNSIG']), re.I):
                snv.loc[index, 'ACMG_level'] = '2'
            elif re.search("pathogenic", (row['CLNSIG']), re.I) and not re.search("Conflicting", (row['CLNSIG']), re.I):
                snv.loc[index, 'ACMG_level'] = '1'
            else:
                snv.loc[index, 'ACMG_level'] = '3'
            snv.loc[index, "Deleterious"] = (
                snv.loc[index, ['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']].tolist().count("D"))
            snv.loc[index, "freq_high"] = ((snv.loc[
                index, ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL',
                        'ExAC_nontcga_EAS', 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']]).replace('.', '0')).max()
        snv_1 = snv.iloc[:, list(range(14)) + [15, 17, 18, 20, 23, 24, 25, 26, 111, 112, 113]]
    else:
        snv_1 = pd.DataFrame(columns=[])
    ##pos_file
    if os.path.getsize(pos_file) > 0:
        pos = pd.read_table(pos_file, sep="\t")
        pos = pos.iloc[:, [7, 10, 18, 23, 24, 25, 29, 30, 31, 32]]
        pos_1 = pd.DataFrame(
            columns=['AAChange.refGene', 'OKBSIG', 'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug',
                     'Response_Type', 'Evidence_Source', 'EfficacyEvidence', 'Drug_Detail', 'Gene_function',
                     'Drug_Category'])
        pos = list(pos.groupby(['Gene.refGene', 'AAChange.refGene', 'fun_change']))
        for i in pos:
            for index, row in i[1].iterrows():
                drugs = row['药物中文名'].replace(" + ", ",")
                drugs = list(set(drugs.split(",")))
                drug_mm = ''
                for drug in drugs:
                    if drug.upper() in drug_mechanism.keys():
                        drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
                i[1].loc[index, ['Drug_Detail']] = drug_mm
                if row['标签'] == '非适应症':
                    row['证据等级'] = 'C'
                if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
                    i[1].loc[index, ['Drug_Category']] = 'a'
                elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
                    i[1].loc[index, ['Drug_Category']] = 'b'
                elif re.search("耐药", row['Response_Type_C']):
                    i[1].loc[index, ['Drug_Category']] = 'd'
                else:
                    i[1].loc[index, ['Drug_Category']] = 'c'
            i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
            pos_1.loc[len(pos_1)] = [i[0][1], i[0][2], '|'.join(list(i[1]['证据等级'])),
                                     '|'.join(list(i[1]['AMP_mut_level'])), '|'.join(list(i[1]['疾病中文名'])),
                                     '|'.join(list(i[1]['药物中文名'])), \
                                     '|'.join(list(i[1]['Response_Type_C'])), '|'.join(list(i[1]['Evidence_Source_C'])),
                                     '|'.join(list(i[1]['EfficacyEvidence'])), '|'.join(list(i[1]['Drug_Detail'])),
                                     genefunction[i[0][0].upper()], '|'.join(list(i[1]['Drug_Category']))]
    else:
        pos_1 = pd.DataFrame(columns=[])
    ##vus_file
    if os.path.getsize(vus_file) > 0:
        vus = pd.read_table(vus_file, sep="\t")
        vus_1 = vus.iloc[:, [9, 17]]
        vus_1.insert(loc=2, column='AMP_mut_level', value='III')
        vus_1 = vus_1.rename(columns={'fun_change': 'OKBSIG'})
    else:
        vus_1 = pd.DataFrame(columns=[])
    ##neg_file
    if os.path.getsize(neg_file) > 0:
        neg = pd.read_table(neg_file, sep="\t")
        neg_1 = neg.iloc[:, [9, 17]]
        neg_1.insert(loc=2, column='AMP_mut_level', value='IIII')
        neg_1 = neg_1.rename(columns={'fun_change': 'OKBSIG'})
    else:
        neg_1 = pd.DataFrame(columns=[])
    snvindel_sheet = pd.DataFrame(
        columns=['可信', 'Chr', 'Start', 'End', 'Ref', 'Alt', 'AAChange.refGene', 'mutant_frequency', 'total_reads',
                 'mutant_reads', 'strand_bias', 'Otherinfo10', 'Func.refGene', 'Gene.refGene', 'ExonicFunc.refGene',
                 'avsnp150', 'cosmic91', 'CLNDN', 'CLNSIG', 'ACMG_level', 'Deleterious', 'freq_high', 'OKBSIG',
                 'AMP_evidence_level', 'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source',
                 'EfficacyEvidence', 'Drug_Detail', 'Gene_function', 'Drug_Category', 'Otherinfo11', 'Otherinfo12',
                 'Otherinfo13'])
    pos_vus_neg = pd.concat([pos_1, vus_1, neg_1])
    snv_pos_vus_neg = snv_1.merge(pos_vus_neg, how='left', on='AAChange.refGene')
    snvindel_sheet = pd.concat([snvindel_sheet, snv_pos_vus_neg])
    snvindel_sheet.rename(columns={"可信": "Validated"})
    snvindel_sheet = snvindel_sheet.replace(np.nan, '.')
    snvindel_sheet.rename(columns={"可信": "Validated"}, inplace=True)
    '''
    fusion_sheet
    '''
    fusion_pos_file = "".join([output_dir, '/fusion/', name, '.fusion.pos.dedup.txt'])
    fusion_vus_file = "".join([output_dir, '/fusion/', name, '.fusion.vus.txt'])
    if os.path.getsize(fusion_pos_file) > 0:
        fusion_pos = pd.read_table(fusion_pos_file, sep="\t")
    else:
        fusion_pos = pd.DataFrame(columns=[])
    if os.path.getsize(fusion_vus_file) > 0:
        fusion_vus = pd.read_table(fusion_vus_file, sep="\t")
        fusion_vus.insert(loc=0, column='可信', value=1)
    else:
        fusion_vus = pd.DataFrame(columns=[])
    fusion_pos_vus = pd.concat([fusion_pos, fusion_vus])
    fusion_sheet = pd.DataFrame(
        columns=['Validated', 'CHROM1', 'POS1', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'Support_reads(PE:SR)',
                 'Depth', 'FREQ1', 'FREQ2', 'OKBSIG', 'AMP_evidence_level', \
                 'AMP_mut_level', 'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence',
                 'Drug_Detail', 'Gene_function', 'Drug_Category', 'INFO', 'FORMAT', 'Sample'])
    if not fusion_pos_vus.empty:
        fusion_pos_vus = fusion_pos_vus.replace(np.nan, '.')
        fusion = list(fusion_pos_vus.groupby(
            ['可信', '#CHROM', 'POS', 'CHROM2', 'POS2', 'GENE1', 'GENE2', 'FUSION', 'FREQ1', 'FREQ2', 'fun_change',
             'INFO', 'FORMAT', name, 'Gene_Symbol']))
        for i in fusion:
            for index, row in i[1].iterrows():
                drugs = row['药物中文名'].replace(" + ", ",")
                drugs = list(set(drugs.split(",")))
                drug_mm = ''
                for drug in drugs:
                    if drug.upper() in drug_mechanism.keys():
                        drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
                i[1].loc[index, ['Drug_Detail']] = drug_mm
                if row['标签'] == '非适应症':
                    row['证据等级'] = 'C'
                if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
                    i[1].loc[index, ['Drug_Category']] = 'a'
                elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
                    i[1].loc[index, ['Drug_Category']] = 'b'
                elif re.search("耐药", row['Response_Type_C']):
                    i[1].loc[index, ['Drug_Category']] = 'd'
                elif row['Response_Type_C'] == '.':
                    i[1].loc[index, ['Drug_Category']] = '.'
                else:
                    i[1].loc[index, ['Drug_Category']] = 'c'
            i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
            fusion_sheet.loc[len(fusion_sheet)] = list(i[0][0:8]) + [i[0][13].split(":")[1],
                                                                     i[0][13].split(":")[7]] + list(i[0][8:11]) + [
                                                      '|'.join(list(i[1]['证据等级'])),
                                                      '|'.join(list(i[1]['AMP_mut_level'])), \
                                                      '|'.join(list(i[1]['疾病中文名'])), '|'.join(list(i[1]['药物中文名'])),
                                                      '|'.join(list(i[1]['Response_Type_C'])),
                                                      '|'.join(list(i[1]['Evidence_Source_C'])),
                                                      '|'.join(list(i[1]['EfficacyEvidence'])), \
                                                      '|'.join(list(i[1]['Drug_Detail'])),
                                                      genefunction[i[0][14].upper()],
                                                      '|'.join(list(i[1]['Drug_Category']))] + list(i[0][11:14])
        fusion_sheet = fusion_sheet.replace(np.nan, '.')
    '''
    cnv_sheet
    '''
    cnv_pos_file = "/home/jm001/test/reference_standard/lung85gene/Tissue/BKDL202603539-1a/cnvkit/BKDL202603539-1a.cnv.pos.dedup.txt"
    cnv_sheet = pd.DataFrame(
        columns=['Validated', 'Chromosome', 'Start', 'End', 'Gene', 'Depth', 'Probes', 'Copy_number', 'OKBSIG',
                 'Gene_Symbol', 'AMP_evidence_level', 'AMP_mut_level', \
                 'Indication', 'Drug', 'Response_Type', 'Evidence_Source', 'Efficacy_Evidence', 'Drug_Detail',
                 'Gene_Function', 'Drug_Category'])
    if os.path.getsize(cnv_pos_file) > 0:
        cnv_pos = pd.read_table(cnv_pos_file, sep="\t")
        cnv = list(cnv_pos.groupby(
            ['可信', 'chromosome', 'start', 'end', 'gene', 'depth', 'probes', 'cn', 'fun_change', 'Gene_Symbol']))
        for i in cnv:
            for index, row in i[1].iterrows():
                drugs = row['药物中文名'].replace(" + ", ",")
                drugs = list(set(drugs.split(",")))
                drug_mm = ''
                for drug in drugs:
                    if drug.upper() in drug_mechanism.keys():
                        drug_mm += '[[' + drug + ']]' + drug_mechanism[drug.upper()]
                i[1].loc[index, ['Drug_Detail']] = drug_mm
                if row['标签'] == '非适应症':
                    row['证据等级'] = 'C'
                if (re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'A'):
                    i[1].loc[index, ['Drug_Category']] = 'a'
                elif re.search("敏感", row['Response_Type_C']) and row['证据等级'] == 'C':
                    i[1].loc[index, ['Drug_Category']] = 'b'
                elif re.search("耐药", row['Response_Type_C']):
                    i[1].loc[index, ['Drug_Category']] = 'd'
                elif row['Response_Type_C'] == '.':
                    i[1].loc[index, ['Drug_Category']] = '.'
                else:
                    i[1].loc[index, ['Drug_Category']] = 'c'
            i[1]['AMP_mut_level'] = i[1]['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
            cnv_sheet.loc[len(cnv_sheet)] = list(i[0][0:10]) + ['|'.join(list(i[1]['证据等级'])),
                                                                '|'.join(list(i[1]['AMP_mut_level'])), \
                                                                '|'.join(list(i[1]['疾病中文名'])),
                                                                '|'.join(list(i[1]['药物中文名'])),
                                                                '|'.join(list(i[1]['Response_Type_C'])),
                                                                '|'.join(list(i[1]['Evidence_Source_C'])),
                                                                '|'.join(list(i[1]['EfficacyEvidence'])), \
                                                                '|'.join(list(i[1]['Drug_Detail'])),
                                                                genefunction[i[0][9].upper()],
                                                                '|'.join(list(i[1]['Drug_Category']))]
    else:
        cnv_pos = pd.DataFrame(columns=[])
    with pd.ExcelWriter(out_xlsx) as writer:
        snvindel_sheet.to_excel(writer, sheet_name="snvindel", index=False)
        fusion_sheet.to_excel(writer, sheet_name="fusion", index=False)
        cnv_sheet.to_excel(writer, sheet_name="cnv", index=False)
    ##加入cnvkit/*.cnv.png
    wb = openpyxl.load_workbook(filename=out_xlsx)
    ws = wb['cnv']
    mr = ws.max_row
    cell = 'C' + str(mr + 4)
    cnv_pic = "".join([output_dir, '/cnvkit/', name, '.cnv.png'])
    image = Image(cnv_pic)
    ws.add_image(image, cell)
    wb.save(out_xlsx)
 class PostProcess:
    """
    excel处理
    """
    def __init__(self, path, outpath):
        self.path = path
        self.outpath = outpath
        self.neeecol = self.need_col()
    def need_col(self):
        """
        读取所需列
        """
        path = os.path.join(os.path.dirname(__file__), 'columns.csv')
        cols = pd.read_csv(path)
        cols = cols.fillna('')
        cols_record = cols.to_dict('list')
        for sheet in cols_record:
            cols_record[sheet] = [x for x in cols_record[sheet] if x]
        return cols_record
    def msi(self):
        """
            Process msi result files
        """
        msi_files = glob.glob(os.path.join(self.path, 'MSI', '*.msi'))
        msi_res = dict()
        if msi_files:
            df = pd.read_csv(msi_files[0], sep='\t')
            res = df.to_dict('records')[0]
            msi_res['msi_count'] = res['Total_Number_of_Sites']
            msi_res['msi_value'] = res['%']
            if msi_res['msi_value'] >= 0.3:
                msi_res['msi_result'] = 'MSI-H'
                msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感'
            else:
                msi_res['msi_result'] = 'MSS'
                msi_res['msi_predict'] = '对免疫检查点抑制剂可能不敏感'
        return [msi_res]
    def chemo(self):
        """
        化疗
        """
        chemo_files = glob.glob(os.path.join(self.path, 'chemo', '*chemo.res.txt'))
        chemo_res = []
        if chemo_files:
            df = pd.read_csv(chemo_files[0], sep='\t')
            df = df.fillna('.')
            chemo_res = df.to_dict('records')
        return chemo_res
    def heredity(self):
        """
        遗传
        """
        heredi_files = glob.glob(os.path.join(self.path, 'mutation', '*Germline*filtered.txt'))
        heredires = []
        if heredi_files:
            df = pd.read_csv(heredi_files[0], sep='\t')
            df = df.fillna('.')
            tmdf1 = df[
                ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
                 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
            df['freq_high'] = tmdf1.max(axis=1)
            tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
            df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
            df_need = df[self.neeecol.get('HCS', [])]
            try:
                heredires = df_need.to_dict('records')
            except KeyError as e:
                raise UserWarning('表头设置和配置文件不对应', e)
        return heredires
    def MMR(self):
        """
        MMR
        """
        mmr_files = glob.glob(os.path.join(self.path, 'MMR', '*mmr.pre.txt'))
        mmr = []
        if mmr_files:
            df = pd.read_csv(mmr_files[0], sep='\t')
            df = df.fillna('.')
            tmdf1 = df[
                ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
                 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
            df['freq_high'] = tmdf1.max(axis=1)
            tmdf2 = df[['MutationTaster_pred', 'FATHMM_pred', 'MetaLR_pred']]
            df['Deleterious'] = tmdf2.apply(lambda x: x.tolist().count('D'), axis=1)
            df_need = df[self.neeecol.get('HCS', [])]
            try:
                mmr = df_need.to_dict('records')
            except KeyError as e:
                raise UserWarning('表头设置和配置文件不对应', e)
        return mmr
    def hotspot(self):
        hotspot_files = glob.glob(
            os.path.join(self.path, 'mutation', 'hotspot', '*hotspot.snp.indel.filter.anno.hg19_multianno.txt'))
        if hotspot_files:
            return self.txt_2_excel(hotspot_files[0])
    def splicing(self):
        splicing_files = glob.glob(
            os.path.join(self.path, 'mutation', '*.target.splicing.txt'))
        if splicing_files:
            return self.txt_2_excel(splicing_files[0])
    def indication(self):
        indication_files = glob.glob(
            os.path.join(self.path, 'mutation', '*indication.txt'))
        if indication_files:
            return self.txt_2_excel(indication_files[0])
    def longindel(self):
        longindel_files = glob.glob(
            os.path.join(self.path, 'fusion', '*.longindel.pos.txt'))
        if longindel_files:
            return self.txt_2_excel(longindel_files[0])
    def cms(self):
        """
        样本信息
        """
        cms_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
        cms_info_need = []
        if cms_files:
            file_read = open(cms_files[0], 'r')
            cms_info = json.load(file_read)['data']
            file_read.close()
            df = pd.DataFrame(cms_info)
            df_need = df[self.neeecol.get('sample_info', [])]
            try:
                cms_info_need = df_need.to_dict('records')
            except KeyError as e:
                raise UserWarning('表头设置和配置文件不对应', e)
        return cms_info_need
    def qc(self):
        qc_files = glob.glob(os.path.join(self.path, 'qc', '*_post.json'))
        qc_res = []
        if qc_files:
            df = pd.read_csv(qc_files[0], sep='\t', header=None)
            df = df.set_index(0).T
            qc_res = df.to_dict('records')
        return qc_res
    #
    # def snv(self):
    #     # filter file
    #     filter_files = glob.glob(os.path.join(self.path, 'report', '*snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'))
    #     if filter_files:
    #         snv = pd.read_csv(filter_files[0], sep="\t")
    #     def sign_drug_Category(x):
    #         if '敏感' in x['Response_Type_C'] and x['证据等级'] == 'A':
    #             return 'a'
    #         elif '敏感' in x['Response_Type_C'] and x['证据等级'] == 'C':
    #             return 'b'
    #         elif '耐药' in x['Response_Type_C']:
    #             return 'd'
    #         else:
    #             return 'c'
    #     # pos_file 处理
    #     pos_files = glob.glob(os.path.join(self.path, 'mutation', '*snvindel.pos.txt'))
    #     if pos_files:
    #         pos = pd.read_csv(pos_files[0], sep='\t')
    #         pos['证据等级'] = pos.apply(lambda x: 'C' if x['标签'] == '非适应症' else x['证据等级'], axis=1)
    #         pos['Drug_Category'] = pos.apply(sign_drug_Category, axis=1)
    #         pos['AMP_mut_level'] = pos['证据等级'].replace(['A', 'B', 'C', 'D'], ['I', 'I', 'II', 'II'])
    #         agg_list = ['证据等级', 'AMP_mut_level', '疾病中文名', '药物中文名', '证据等级', 'Response_Type_C', 'Evidence_Source_C',
    #                     'EfficacyEvidence', 'Drug_Category']
    #         agg_dict = {column: ','.join for column in agg_list}
    #         pos_group =pos.groupby(['Gene.refGene','AAChange.refGene','fun_change']).agg(agg_dict, axis=1)
    def txt_2_excel(self, path):
        try:
            df = pd.read_csv(path, sep='\t')
        except pd.errors.EmptyDataError:
            return []
        return df.to_dict('records')
    def collect(self):
        writer = pd.ExcelWriter(self.outpath, mode='a', engine='openpyxl')
        sheet = {
            'MSI': self.msi(),
            'chemo': self.chemo(),
            'HCS': self.heredity(),
            'sample_info': self.cms(),
            'MMR': self.MMR(),
            'hotspot': self.hotspot(),
            'MET': self.splicing(),
            'indication': self.indication(),
            'longindel': self.longindel(),
            'qc': self.qc()
        }
        # 遍历CSV文件列表
        for sheet_name in sheet:
            # 读取CSV文件为DataFrame
            df = pd.DataFrame(sheet[sheet_name])
            df.to_excel(writer, sheet_name=sheet_name, index=False)
        # 保存并关闭Excel写入器
        writer.close()
 if __name__ == '__main__':
    snv_fusion_cnv(sys.argv[1], sys.argv[2])
    # 未加日志，未添加路径
    out_xlsx = "".join([sys.argv[1], '/report/', sys.argv[2], '.check_new.xlsx'])
    postprocess = PostProcess(sys.argv[1], out_xlsx)
    postprocess.collect()
--- a/scripts/comcancergene_report.py
+++ b/scripts/comcancergene_report.py
@ -0,0 +1,864 @@
 #!/usr/bin/python3
 # -*- coding: UTF-8 -*-
 ##https://www.pianshen.com/article/5314917437/
 ##https://zhuanlan.zhihu.com/p/366902690
 ##https://itpcb.com/a/277599
 import docxtpl
 from docx.shared import Mm
 from docxtpl import DocxTemplate,RichText
 import pandas as pd
 from pandas import DataFrame
 import re
 import sys
 import os
 import json
 import time
 if len(sys.argv) != 5:
    print(" ".join(['usage:python',sys.argv[0],'output_dir','tumor','sample_type(t for tissue,c for cfdna)']),'projcet')
    sys.exit()
 output_dir=sys.argv[1]
 name=sys.argv[2]
 Sample_type=sys.argv[3]
 projcet=sys.argv[4]
 snv_base="".join([name,'.snvindel.pos.dedup.txt'])
 snv_file='/'.join([output_dir,'mutation',snv_base])
 snv_base_vus="".join([name,'.snvindel.vus.txt'])
 snv_file_vus='/'.join([output_dir,'mutation',snv_base_vus])
 fusion_base="".join([name,'.fusion.pos.dedup.txt'])
 fusion_file='/'.join([output_dir,'fusion',fusion_base])
 cnv_base="".join([name,'.cnv.pos.dedup.txt'])
 cnv_file='/'.join([output_dir,'cnvkit',cnv_base])
 qc_base=''.join([name,'_qc.txt'])
 qc_file='/'.join([output_dir,'qc',qc_base])
 report_base="".join([name,'_report.docx'])
 report_file='/'.join([output_dir,'report',report_base])
 indication_file="".join([output_dir,'/mutation/','indication.txt'])
 context = {'list1':[],'list2':{},'list3':{},'clingene1':[],'clingene2':[],'nonclingenes':[],'genefunc':{},
 'indication':[],'mmr':[],'chemo':[]}
 #genefunction
 genefunction={}
 gf=open("/dataseq/jmdna/codes/reportbase/gene_function.txt",'r',encoding='utf-8').readlines()
 for line in gf[1:]:
    gene=line.strip().split("\t")[0]
    func=line.strip().split("\t")[1]
    genefunction[gene.upper()]=func
 ##sensitive_resistant_drug
 drug_disease={}
 drug_mechanism={}
 drug_fh=open("/dataseq/jmdna/codes/reportbase/target_drug.txt",'r',encoding='utf-8').readlines()
 for line in drug_fh[1:]:
    disease=line.split("\t")[8]
    mechanism=line.split("\t")[11]
    drugs=line.split("\t")[0].split('|')
    if disease or mechanism:
        for drug in drugs:
            drug_disease[drug.upper()]=disease
            drug_mechanism[drug.upper()]=mechanism
 sensitive_resistant_drug=[{'type':'可能敏感药物','drug':[]},{'type':'可能耐药药物','drug':[]}]
 ##somatic snvindel处理
 snv_size = os.path.getsize(snv_file)
 if snv_size>0:
    snv=pd.read_table(snv_file,sep="\t")
    cols=[index for index,row in snv[snv['可信']==0].iterrows()]
    snv.drop(cols,inplace=True)    
    genes=snv['Gene.refGene'].drop_duplicates()
    if len(genes):
      for gene in genes:
        rt={}
        rt[gene]=[]
        muts=snv['AAChange.refGene'][snv['Gene.refGene']==gene].drop_duplicates()
        for mut in muts:
            info2={mut:[]}
            for index,row in snv[snv['AAChange.refGene']==mut].iterrows():    
                info3={}
                info3['drug']=row['药物中文名']
                info3['effect']=row['Response_Type_C']
                info3['tumor']=row['疾病中文名']
                info3['evidence']=row['Evidence_Source_C']
                info3['sig']=row['EfficacyEvidence']
                info2[mut].append(info3)
 ##sensitive_resistant_drug
                drugs=row['Drug'].replace(" + ",",")
                drugs=drugs.split(",")
                drugs_chinese=row['药物中文名'].replace(" + ",",")
                drugs_chinese=drugs_chinese.split(",")
                bool=0
                for drug in drugs:
                    if drug.upper() in drug_disease.keys():
                        if re.search(r'敏感',row['Response_Type_C']):
                            sensitive_drug={}
                            sensitive_drug['name']=drugs_chinese[bool]                    
                            sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
                                sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
                        elif re.search(r'耐药',row['Response_Type_C']): 
                            resistant_drug={}
                            resistant_drug['name']=drugs_chinese[bool]
                            resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if resistant_drug not in sensitive_resistant_drug[1]['drug']:
                                sensitive_resistant_drug[1]['drug'].append(resistant_drug)
                    bool+=1                            
            context['list2'][mut]=info2[mut]
            info={}
            info['gene']=snv['Gene.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
            m=re.search(r'(p\..*)$',mut)
            if m:
                info['p']=m.group(1)
            else:
                m=re.search(r'(c\..*)$',mut)
                info['p']=m.group(1)
            info['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
            A=[]
            B=[]
            C=[]
            D=[]  			
            for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='A'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                A.extend(ds_new)			
            for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='非适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='C'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                B.extend(ds_new)				
            for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='.') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence=row['证据等级']
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                C.extend(ds_new)
            for index,row in snv[(snv['AAChange.refGene']==mut) & snv['Response_Type_C'].str.contains("耐药")].iterrows():
                evidence=''
                if row['标签']=='非适应症':
                    evidence='C'
                else:
                    evidence=row['证据等级']				
                ds=row['药物中文名'].split(",")
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                D.extend(ds_new)
            A=sorted(set(A),key=A.index)
            B=sorted(set(B),key=B.index)
            C=sorted(set(C),key=C.index)
            D=sorted(set(D),key=D.index)            
            info['drug_A']="\n".join(A)
            info['drug_B']="\n".join(B)
            info['drug_C']="\n".join(C)
            info['drug_D']="\n".join(D)
            if not info['drug_A']:
                info['drug_A']='/'
            if not info['drug_B']:
                info['drug_B']='/'
            if not info['drug_C']:
                info['drug_C']='/'
            if not info['drug_D']:
                info['drug_D']='/'
            context['list1'].append(info)
            rt[gene].append("".join([info['p'],'(',info['freq'],')']))
 ##genefunc            
            context['genefunc'][mut]=genefunction[gene.upper()]          
 ##clingenes            
            clingene1={}
            clingene2={}            
            for index,row in snv[(snv['AAChange.refGene']==mut)].iterrows():
                if row['标签']=='适应症' or row['证据等级']=='B':
                    clingene1['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
                    if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
                        (clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
                    else:
                        (clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
                        clingene1['aacid'] = '/'
                    clingene1['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
                    if re.match("nonsynonymous SNV",clingene1['muttype']):
                        clingene1['muttype']='错义突变'
                    elif re.search("^frameshift",clingene1['muttype']):
                        clingene1['muttype']='移码突变'
                    elif re.search("^nonframeshift",clingene1['muttype']):
                        clingene1['muttype']='非移码突变'
                    elif re.match("stopgain",clingene1['muttype']):
                        clingene1['muttype']='提前终止'
                    else:
                        clingene1['muttype']='/'
                    context['clingene1'].append(clingene1)
                else:
                    clingene2['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
                    if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
                        (clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
                    else:
                        (clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
                        clingene2['aacid'] = '/'
                    clingene2['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
                    if re.match("nonsynonymous SNV",clingene2['muttype']):
                        clingene2['muttype']='错义突变'
                    elif re.search("^frameshift",clingene2['muttype']):
                        clingene2['muttype']='移码突变'
                    elif re.search("^nonframeshift",clingene2['muttype']):
                        clingene2['muttype']='非移码突变'
                    elif re.match("stopgain",clingene2['muttype']):
                        clingene2['muttype']='提前终止'
                    else:
                        clingene2['muttype']='/'
                    context['clingene2'].append(clingene2)						
                break                
        context['list3'][gene]="\n".join(rt[gene])
    else:     
      snv_size=0
 ##target vus and nontarget vus
 snv_size_vus = os.path.getsize(snv_file_vus)
 if snv_size_vus>0:
    snv_vus=pd.read_table(snv_file_vus,sep="\t")
    for index,row in snv_vus.iterrows():
        if float(row['Freq'].replace('%',''))>=2:
            nonclingene={}
            if len(row['AAChange.refGene'].split(":")) == 5:
                (nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'],nonclingene['aacid'])=row['AAChange.refGene'].split(":")
            else:
                (nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'])=row['AAChange.refGene'].split(":")
                nonclingene['aacid'] = '/'
            nonclingene['freq']=row['Freq']
            nonclingene['muttype']=row['ExonicFunc.refGene']
            if re.match("nonsynonymous SNV",nonclingene['muttype']):
                nonclingene['muttype']='错义突变'
            elif re.search("^frameshift",nonclingene['muttype']):
                nonclingene['muttype']='移码突变'
            elif re.search("^nonframeshift",nonclingene['muttype']):
                nonclingene['muttype']='非移码突变'
            elif re.match("stopgain",nonclingene['muttype']):
                nonclingene['muttype']='提前终止'
            else:
                nonclingene['muttype']='/'
            context['nonclingenes'].append(nonclingene)   
 ##fusion处理
 fusion_size = os.path.getsize(fusion_file)
 if fusion_size>0:
    fusion=pd.read_table(fusion_file,sep="\t")
    cols=[index for index,row in fusion[fusion['可信']==0].iterrows()]
    fusion.drop(cols,inplace=True)
    genes=fusion['Gene_Symbol'].drop_duplicates()
    if len(genes):
      for gene in genes:
        rt={}
        rt[gene]=[]
        fusions=fusion['FUSION'][fusion['Gene_Symbol']==gene].drop_duplicates()
        for mut in fusions:
            info2={mut:[]}
            for index,row in fusion[fusion['FUSION']==mut].iterrows():    
                info3={}
                info3['drug']=row['药物中文名']
                info3['effect']=row['Response_Type_C']
                info3['tumor']=row['疾病中文名']
                info3['evidence']=row['Evidence_Source_C']
                info3['sig']=row['EfficacyEvidence']
                info2[mut].append(info3)
 ##sensitive_resistant_drug
                drugs=row['Drug'].replace(" + ",",")
                drugs=drugs.split(",")
                drugs_chinese=row['药物中文名'].replace(" + ",",")
                drugs_chinese=drugs_chinese.split(",")
                bool=0
                for drug in drugs:
                    if drug.upper() in drug_disease.keys():
                        if re.search(r'敏感',row['Response_Type_C']):
                            sensitive_drug={}
                            sensitive_drug['name']=drugs_chinese[bool]                    
                            sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
                                sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
                        elif re.search(r'耐药',row['Response_Type_C']): 
                            resistant_drug={}
                            resistant_drug['name']=drugs_chinese[bool]
                            resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if resistant_drug not in sensitive_resistant_drug[1]['drug']:
                                sensitive_resistant_drug[1]['drug'].append(resistant_drug)
                    bool+=1  
            context['list2'][mut]=info2[mut]
            info={}
            info['gene']=mut
            info['p']='融合'
            info['freq']="".join([str(fusion['FREQ1'][fusion['FUSION']==mut].reset_index(drop=True)[0]),'%'])
            A=[]
            B=[]
            C=[]
            D=[]  			
            for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='A'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                A.extend(ds_new)			
            for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='C'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                B.extend(ds_new)				
            for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence=row['证据等级']
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                C.extend(ds_new)
            for index,row in fusion[(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].iterrows():
                evidence=''
                if row['标签']=='非适应症':
                    evidence='C'
                else:
                    evidence=row['证据等级']				
                ds=row['药物中文名'].split(",")
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                D.extend(ds_new)
            A=sorted(set(A),key=A.index)
            B=sorted(set(B),key=B.index)
            C=sorted(set(C),key=C.index)
            D=sorted(set(D),key=D.index) 
            info['drug_A']="\n".join(A)
            info['drug_B']="\n".join(B)
            info['drug_C']="\n".join(C)
            info['drug_D']="\n".join(D)
 #            info['drug_A']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_B']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_C']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_D']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
            if not info['drug_A']:
                info['drug_A']='/'
            if not info['drug_B']:
                info['drug_B']='/'
            if not info['drug_C']:
                info['drug_C']='/'
            if not info['drug_D']:
                info['drug_D']='/'    
            context['list1'].append(info)
            rt[gene].append("".join([info['gene'],'(',info['freq'],')']))
 ##genefunc            
            context['genefunc'][mut]=genefunction[gene.upper()]
 ##clingenes
            clingene1={}
            clingene2={}            
            for index,row in fusion[(fusion['FUSION']==mut)].iterrows():
                if row['标签']=='适应症' or row['证据等级']=='B':            
                    clingene1['gene']= info['gene']
                    clingene1['freq']= info['freq']
                    (clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
                    clingene1['muttype']= '融合'
                    context['clingene1'].append(clingene1)
                else:
                    clingene2['gene']= info['gene']
                    clingene2['freq']= info['freq']
                    (clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
                    clingene2['muttype']= '融合'
                    context['clingene2'].append(clingene2)            
        context['list3'][gene]="\n".join(rt[gene])
    else:
      fusion_size=0
 ##cnv处理
 cnv_size = os.path.getsize(cnv_file)
 if cnv_size>0:
    cnv=pd.read_table(cnv_file,sep="\t")
    cols=[index for index,row in cnv[cnv['可信']==0].iterrows()]
    cnv.drop(cols,inplace=True)
    genes=cnv['gene'].drop_duplicates()
    if len(genes):
      for gene in genes:
        rt={}
        rt[gene]=[]
        cnvs=cnv['Gene_Symbol'][cnv['gene']==gene].drop_duplicates()
        for mut in cnvs:
            info={}
            info['gene']=mut
            copy=cnv['cn'][cnv['Gene_Symbol']==mut].reset_index(drop=True)[0]
            if copy > 2:
                info['p']='扩增'
            else:
                info['p']='缺失'
            A=[]
            B=[]
            C=[]
            D=[]  			
            for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='A'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                A.extend(ds_new)			
            for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence='C'
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                B.extend(ds_new)				
            for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
                ds=row['药物中文名'].split(",")
                evidence=row['证据等级']
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                C.extend(ds_new)
            for index,row in cnv[(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].iterrows():
                evidence=''
                if row['标签']=='非适应症':
                    evidence='C'
                else:
                    evidence=row['证据等级']				
                ds=row['药物中文名'].split(",")
                ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
                D.extend(ds_new)
            A=sorted(set(A),key=A.index)
            B=sorted(set(B),key=B.index)
            C=sorted(set(C),key=C.index)
            D=sorted(set(D),key=D.index) 
            info['drug_A']="\n".join(A)
            info['drug_B']="\n".join(B)
            info['drug_C']="\n".join(C)
            info['drug_D']="\n".join(D)
            info['freq']=" ".join([str(copy),'拷贝'])
 #            info['drug_A']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_B']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_C']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
 #            info['drug_D']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
            if not info['drug_A']:
                info['drug_A']='/'
            if not info['drug_B']:
                info['drug_B']='/'
            if not info['drug_C']:
                info['drug_C']='/'
            if not info['drug_D']:
                info['drug_D']='/'
            context['list1'].append(info)
            info2={mut:[]}
            for index,row in cnv[cnv['Gene_Symbol']==mut].iterrows():    
                info3={}
                info3['drug']=row['药物中文名']
                info3['effect']=row['Response_Type_C']
                info3['tumor']=row['疾病中文名']
                info3['evidence']=row['Evidence_Source_C']
                info3['sig']=row['EfficacyEvidence']
                info2[mut].append(info3)
 ##sensitive_resistant_drug
                drugs=row['Drug'].replace(" + ",",")
                drugs=drugs.split(",")
                drugs_chinese=row['药物中文名'].replace(" + ",",")
                drugs_chinese=drugs_chinese.split(",")
                bool=0
                for drug in drugs:
                    if drug.upper() in drug_disease.keys():
                        if re.search(r'敏感',row['Response_Type_C']):
                            sensitive_drug={}
                            sensitive_drug['name']=drugs_chinese[bool]                    
                            sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
                                sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
                        elif re.search(r'耐药',row['Response_Type_C']): 
                            resistant_drug={}
                            resistant_drug['name']=drugs_chinese[bool]
                            resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
                            if resistant_drug not in sensitive_resistant_drug[1]['drug']:
                                sensitive_resistant_drug[1]['drug'].append(resistant_drug)
                    bool+=1      
            rt[gene].append("".join([info['p'],'(',info['freq'],')']))
            context['list2'][" ".join([mut,info['p']])]=info2[mut]
 ##genefunc            
            context['genefunc'][" ".join([mut,info['p']])]=genefunction[mut.upper()]
 ##clingenes            
            clingene1={}
            clingene2={}            
            for index,row in cnv[(cnv['Gene_Symbol']==mut)].iterrows():
                if row['标签']=='适应症' or row['证据等级']=='B': 
                   clingene1['gene']=info['gene']
                   clingene1['freq']= info['freq']
                   (clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
                   clingene1['muttype']= info['p']
                   context['clingene1'].append(clingene1)
                else:
                   clingene2['gene']=info['gene']
                   clingene2['freq']= info['freq']
                   (clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
                   clingene2['muttype']= info['p']
                   context['clingene2'].append(clingene2) 
        context['list3'][gene]="\n".join(rt[gene])
    else:
      cnv_size=0
 ##msi
 if Sample_type == 't':
    msi_file=''.join([output_dir,'/MSI/',name,'.msi'])
    msi=(open(msi_file,'r').readlines()[1]).split("\t")
    context['msi_count']=msi[0]
    context['msi_value']=round(float(msi[2].strip())/100,2)
    if context['msi_value']>=0.3:
        context['msi_result']='MSI-H'
        context['msi_predict']='对免疫检查点抑制剂可能敏感'
    else:
        context['msi_result']='MSS'
        context['msi_predict']='对免疫检查点抑制剂可能不敏感'
 ##MMR处理
 mmr_file=''.join([output_dir,'/MMR/',name,"_mmr.txt"])
 mmr_size = os.path.getsize(mmr_file)
 mmr_result=0
 mmr_result_summary=[]
 if mmr_size>0:
    mmr_fh=open(mmr_file,'r',encoding='utf-8').readlines()
    for line in mmr_fh[1:]:
        mmr={}
        mmr['gene']= line.strip().split("\t")[0]
        mmr['transcript'] = line.strip().split("\t")[1]
        mmr['nacid'] = line.strip().split("\t")[2]
        mmr['aacid'] = line.strip().split("\t")[3]
        mmr['muttype'] = line.strip().split("\t")[5]    
        mmr['freq'] = line.strip().split("\t")[4]
        mmr['sig'] = line.strip().split("\t")[6]
        if mmr['sig'] != '意义未明突变':
            mmr_result+=1
            mmr_result_summary.append(mmr['gene'] + ' ' + mmr['aacid']) 
        context['mmr'].append(mmr)
 context['mmr_result']=mmr_result
 mmr_result_summary=' | '.join(mmr_result_summary)
 if mmr_result_summary:
    context['mmr_result_summary']=mrr_result_summary
    context['mmr_predict']="对免疫检查点抑制剂可能敏感"
 else:
    context['mmr_result_summary']="未检测到相关基因突变"
    context['mmr_predict']="对免疫检查点抑制剂可能不敏感"
 ##chemo
 chemo_file=''.join([output_dir,'/chemo/',name,".drug.res.txt"])
 chemo_fh=open(chemo_file,'r')
 bool=0
 chemos=chemo_fh.readlines()[1:]
 chemo_result=0
 recommend_drug=[]
 normal_drug=[]
 restrict_drug=[]
 while bool<len(chemos):
    chemo={}
    lines=chemos[bool].strip().split("\t")
    chemo['bool1']=bool+1
    chemo['name1']=lines[0]
    chemo['result1']=lines[4]
    if lines[3] == '推荐':
 #        chemo['result1']=RichText('推荐使用', color='00FF00')
        chemo_result+=1
        recommend_drug.append(chemo['name1'])
    elif lines[3] == '常规':
 #        chemo['result1']=RichText('常规使用')
        normal_drug.append(chemo['name1'])
    elif lines[3] == '谨慎':
 #        chemo['result1']=RichText('谨慎使用', color='FF0000')
        restrict_drug.append(chemo['name1'])
    if bool+1<len(chemos):
        lines=chemos[bool+1].strip().split("\t")
        chemo['bool2']=bool+2
        chemo['name2']=lines[0]
        chemo['result2']=lines[4]    
        if lines[3] == '推荐':
 #            chemo['result2']=RichText('推荐使用', color='00FF00')
            chemo_result+=1
            recommend_drug.append(chemo['name2'])
        elif lines[3] == '常规':
 #            chemo['result2']=RichText('常规使用')
            normal_drug.append(chemo['name2'])
        elif lines[3] == '谨慎':
 #            chemo['result2']=RichText('谨慎使用', color='FF0000')
            restrict_drug.append(chemo['name2'])
 #    print(chemo)
    context['chemo'].append(chemo)
    bool+=2
 context['chemo_result']=chemo_result
 chemo_detail=[]
 chemo_detail_file=''.join([output_dir,'/chemo/',name,".drug.infos.txt"])
 chemo_data=pd.read_table(chemo_detail_file,sep="\t")
 chemo_drugs=chemo_data['药物'].drop_duplicates()
 context['recommend_drug']=','.join(recommend_drug)
 context['normal_drug']=','.join(normal_drug)
 context['restrict_drug']=','.join(restrict_drug)
 for drug in chemo_drugs:
    c={}
    c['drug']=drug
    c['info']=[]
    for index,row in chemo_data[chemo_data['药物'] == drug].iterrows():        
        d={}
        d['gene']=row['检测基因']
        d['site']=row['检测位点']
        d['gt']=row['基因型']
        d['level']=row['证据等级']
        d['sig']=row['用药提示']
        c['info'].append(d)
    chemo_detail.append(c)
 context['chemo_detail']=chemo_detail
 context['sensitive_resistant_drug']=sensitive_resistant_drug
 ##化疗联合用药
 chemo_comb_file=''.join([output_dir,'/chemo/',name,".chemo.comb.txt"])
 chemo=pd.read_table(chemo_comb_file,sep="\t")
 type=chemo['癌种'].drop_duplicates()
 bool=-1
 chemo_comb=[]
 for i in type:
    bool+=1
    chemo_comb.append({'type':i,'drug':[]})
    for index,row in chemo[chemo['癌种']==i].iterrows():
        info={}
        info['name']=row['用药方案']
        info['abbr']=row['方案缩写']
        info['sig']=row['临床提示']
        chemo_comb[bool]['drug'].append(info)
 context['chemo_comb']=chemo_comb
 ##hereditary cancer
 if os.path.exists(''.join([output_dir,'/hereditary/',name,'.hereditary.pre.txt'])):
    context['hereditary_cancer_1']=[]
    context['hereditary_cancer_2']=[]
    hereditary_file1=''.join([output_dir,'/hereditary/',name,".hereditary.txt"])
    hereditary_file2=''.join([output_dir,'/hereditary/',name,".risk.txt"])
    hereditary_file1_fh=open(hereditary_file1,'r')
    hereditary_file2_fh=open(hereditary_file2,'r')
    hereditary_result=0
    hereditary_result_summary=[]
    hereditary_disease=[]
    if os.path.getsize(hereditary_file1)>0:
        hereditary_file1_fh=open(hereditary_file1,'r')
        for line in hereditary_file1_fh.readlines()[1:]:
            hereditary_cancer_1={}
            lines=line.strip().split("\t")
            hereditary_cancer_1['gene']=lines[0]
            hereditary_cancer_1['syndrome']=lines[1]
            hereditary_cancer_1['hereditary_type']=lines[2]
            hereditary_cancer_1['type']=lines[3]
            hereditary_cancer_1['result']=lines[4]
            hereditary_result+=len(lines[4].split(";"))
            hereditary_result_summary.append(hereditary_cancer_1['gene'] + ' ' + hereditary_cancer_1['result'])
            hereditary_disease.append(hereditary_cancer_1['syndrome'])
            context['hereditary_cancer_1'].append(hereditary_cancer_1)
    context['hereditary_result']=hereditary_result
    if hereditary_result_summary:
        context['hereditary_disease']=';'.join(hereditary_disease)
        context['hereditary_result_summary']=' | '.join(hereditary_result_summary)
    else:
        context['hereditary_disease']='/'
        context['hereditary_result_summary']='未检测到相关基因突变'
    hereditary_risk=[]
    bool=0
    heres=hereditary_file2_fh.readlines()[1:]
    while bool<len(heres):
        hereditary_cancer_2={}
        lines=(heres[bool]).strip().split("\t")
        hereditary_cancer_2['type1']=lines[0]
        if lines[1] == '偏高':
            hereditary_risk.append(lines[0])
            hereditary_cancer_2['risk1']=RichText('偏高', color='FF0000')
        elif lines[1] == '同一般人群':
            hereditary_cancer_2['risk1']=RichText('同一般人群')
        if bool+1<len(heres):
            lines=heres[bool+1].strip().split("\t")
            hereditary_cancer_2['type2']=lines[0]
            if lines[1] == '偏高':
                hereditary_cancer_2['risk2']=RichText('偏高', color='FF0000')
            elif lines[1] == '同一般人群':
                hereditary_cancer_2['risk2']=RichText('同一般人群')
        context['hereditary_cancer_2'].append(hereditary_cancer_2)
        bool+=2
    if hereditary_risk:
        context['hereditary_risk']=','.join(hereditary_risk) + '风险可能较高'
    else:
        context['hereditary_risk']='风险同一般人群' 
    if len(context['hereditary_cancer_1'])==0:
        context['hereditary_cancer_1']=[{'gene':'/','syndrome':'/','hereditary_type':'/','type':'/','result':'/'}]        
 ##可能获益的临床药物处理
 total_drug_count=[]
 for i in context['list1']:
    if i['drug_A'] != '/':
        for j in i['drug_A'].split("\n"):
            if j not in total_drug_count:
                total_drug_count.append(j)
    if i['drug_B'] != '/':
        for j in i['drug_B'].split("\n"):
            if j not in total_drug_count:
                total_drug_count.append(j)
    if i['drug_C'] != '/':
        for j in i['drug_C'].split("\n"):
            if j not in total_drug_count:
                total_drug_count.append(j)
 context['total_drug_count']=len(total_drug_count)                
 ##去重处理
 context['clingenes1']=[]
 context['clingenes2']=[]
 for i in context['clingene1']:
    if i not in context['clingenes1']:
        context['clingenes1'].append(i)
 for i in context['clingene2']:
    if i not in context['clingenes2'] and i not in context['clingenes1']:
        context['clingenes2'].append(i) 
 ##检测到的基因变异
 context['total_mut_count']=len(context['clingenes1'])+len(context['clingenes2'])
 ##本癌种FDA/NMPA/NCCN批准基因检测
 indication_fh=open(indication_file,'r',encoding='utf-8').readlines()
 indication_genes=[]
 for line in indication_fh[1:]:
    indication={}
    indication['gene']=line.strip().split("\t")[0]
    indication['content']=line.strip().split("\t")[1]
    if indication['gene'] in context['list3'].keys():
        indication['result']=RichText(context['list3'][indication['gene']], color='FF0000')
    else:
        indication['result']='未检出变异'
    context['indication'].append(indication)
 ##qc处理
 qc_file=''.join([output_dir,'/qc/',name,'_qc.txt'])
 qc=pd.read_table(qc_file,sep="\t",header=None,index_col=0,names=['A','B'])
 Q30=qc.loc['Q30(%)','B']
 if Q30>=85:
    Q30_result='合格'
 else:
    Q30_result='警戒'
 depth=qc.loc['mean_depth(dedup)','B']
 if Sample_type == 'c':
    if depth>=1000:
        depth_result='合格'
    else:
        depth_result='警戒'
 elif Sample_type == 't':
    if depth>=500:
        depth_result='合格'
    else:
        depth_result='警戒'
 uniformity=qc.loc['coverage(>=0.2*meanx)','B']
 if uniformity>=90:
    uniformity_result='合格'
 else:
    uniformity_result='警戒'
 if Q30_result=='合格' and depth_result=='合格' and uniformity_result=='合格':
    context['qc_result']='合格'
 else:
    context['qc_result']='警戒'
 context['Q30']=Q30
 context['Q30_result']=Q30_result
 context['depth']=depth
 context['uniformity']=uniformity
 context['depth_result']=depth_result
 context['uniformity_result']=uniformity_result 
 ##阴性模块处理
 ##list1
 if len(context['list1'])==0:
    context['list1']=[{'gene':'/','freq':'/','drug_A':'/','drug_B':'/','drug_C':'/','drug_D':'/'}]
 ##list2
 if len(context['list2'])==0:
    context['list2']= {'/':[{'drug':'/','effect':'/','tumor':'/','evidence':'/','sig':'/'}]}
    context['genefunc']['/']='/'
 ##clingenes,nonclingenes 
 if len(context['clingenes1'])==0:
    context['clingenes1']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
 if len(context['clingenes2'])==0:
    context['clingenes2']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
 if len(context['nonclingenes'])==0:
    context['nonclingenes']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
 ##sensitive_resistant_drug
 if len(context['sensitive_resistant_drug'][0]['drug'])==0:
    context['sensitive_resistant_drug'][0]['drug']=[{'name':'/','mechanism':'/'}]
 if len(context['sensitive_resistant_drug'][1]['drug'])==0:
    context['sensitive_resistant_drug'][1]['drug']=[{'name':'/','mechanism':'/'}]
 ##mmr
 if len(context['mmr'])==0:
    context['mmr']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','muttype':'/','freq':'/','sig':'/'}]
 ##info
 post_file=''.join([output_dir,'/qc/',name,'_post.json'])
 sex='/'
 age='/'
 phone='/'
 medical_history='/'
 family_history='/'
 sample_id=name
 sample_type='/'
 report_date=time.strftime("%Y-%m-%d", time.localtime())
 arrival_date=report_date
 cancer_type='/'
 pathologic_diagnosis='/'
 if os.path.isfile(post_file):
 # if post_file.exists():
    post_fh=open(post_file,'r')
    post=json.load(post_fh)
    name=post["data"][0]["name"]
    sex=post["data"][0]["gender"]
    age=post["data"][0]["age"]
    medical_history=post["data"][0]["treatHistory"]
    family_history=post["data"][0]["sickFamilyHistory"]
    sample_id=post["data"][0]["barcode"]
    # sample_id_control=post["data"][0]["barcode_N"]
    sample_type=post["data"][0]["source"]
    # sample_type_control=post["data"][0]["source_N"]
 #    sample_type_control = post["data"][0].get('source_N', '/')
    arrival_date=post["data"][0]["receiveTime"].split(' ')[0]
    cancer_type=post["data"][0]["zlType"]
    pathologic_diagnosis=post["data"][0]["treatResult"]
 context['info']={
 'name':name,
 'sex':sex,
 'age':age,
 'phone':phone,
 'medical_history':medical_history,
 'family_history':family_history,
 'sample_id':sample_id,
 'sample_type':sample_type,
 'report_date':report_date,
 'arrival_date':arrival_date,
 'cancer_type':cancer_type,
 'pathologic_diagnosis':pathologic_diagnosis}
 context['report_time']=report_date
 ##模板替换
 file_real = os.path.realpath(sys.argv[0])
 Exe_Path = os.path.dirname(file_real)
 report_template={'lung85gene':{'t':'lung85-tissue-oem.docx','c':'lung85-blood-oem.docx'},
 'crc88gene':{'t':'CRC88-tissue-oem.docx','c':'CRC88-blood-oem.docx'}}
 doc_full = os.path.join(Exe_Path, report_template[projcet][Sample_type])
 doc = DocxTemplate(doc_full)
 doc.render(context)
 doc.save(report_file)
--- a/server.py
+++ b/server.py
@ -0,0 +1,85 @@
 import json
 import os
 import socket
 import struct
 import sys
 from datetime import datetime
 from main import main
 from tools.common import basedir
 def recvdata(conn, path):
    """
    接受文件
    :param conn:
    :param path:
    :return:
    """
    header_size = struct.unpack('i', conn.recv(4))[0]
    header_bytes = conn.recv(header_size)
    header_json = header_bytes.decode('utf-8')
    header_dic = json.loads(header_json)
    content_len = header_dic['contentlen']
    content_name = header_dic['contentname']
    recv_len = 0
    fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
    file = open(fielpath, 'wb')
    while recv_len < content_len:
        correntrecv = conn.recv(1024 * 1000)
        file.write(correntrecv)
        recv_len += len(correntrecv)
    file.close()
    return fielpath
 def senddata(conn, path, message=None):
    name = os.path.basename(os.path.realpath(path))
    if not message:
        with open(path, 'rb') as file:
            content = file.read()
        headerdic = dict(
            contentlen=len(content),
            contentname=name
        )
        headerjson = json.dumps(headerdic)
        headerbytes = headerjson.encode('utf-8')
        headersize = len(headerbytes)
        conn.send(struct.pack('i', headersize))
        conn.send(headerbytes)
        conn.sendall(content)
    else:
        headerdic = dict(
            contentlen=len(path),
            contentname='message'
        )
        headerjson = json.dumps(headerdic)
        headerbytes = headerjson.encode('utf-8')
        headersize = len(headerbytes)
        conn.send(struct.pack('i', headersize))
        conn.send(headerbytes)
        conn.sendall(path.encode('utf-8'))
 def server():
    myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    adrss = ("", 8190)
    myserver.bind(adrss)
    myserver.listen(5)
    while True:
        try:
            myclient, adddr = myserver.accept()
            recv_content = recvdata(myclient, os.path.join(basedir, 'xlsx'))
            outputpath = main(recv_content)
            senddata(myclient, outputpath)
            print('生成成功')
        except Exception as e:
            print(e, '有错误')
            # continue
 if __name__ == '__main__':
    if len(sys.argv) > 1:
        outputpath = main(sys.argv[1])
    else:
        server()
--- a/t.json
+++ b/t.json
--- a/template/nreport.docx
+++ b/template/nreport.docx
--- a/template/~$report.docx
+++ b/template/~$report.docx
--- a/tools/init.py
+++ b/tools/init.py
--- a/tools/common.py
+++ b/tools/common.py
@ -0,0 +1,3 @@
 import os
 basedir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
--- a/tools/parsexlsx.py
+++ b/tools/parsexlsx.py
@ -0,0 +1,457 @@
 import json
 import re
 import sys
 import time
 from collections import defaultdict
 import pandas as pd
 from tools.readxlsx import read
 def tree():
    return defaultdict(tree)
 class BaseAssignment:
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.result = tree()  # 报告结果
        self.signtb = set()  # 具有明确或潜在临床意义的基因变异
        self.signdurg = set()  # 潜在临床获益的治疗药物
        self.drugs_type = dict()
 class Parse(BaseAssignment):
    def __init__(self, sampledata, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.sampledata = sampledata
    def cms(self):
        """
        样本信息处理
        """
        data = pd.DataFrame(self.sampledata['sample_info'])
        if data.empty:
            raise UserWarning('sample_info表为空，生成报告失败！')
        data = data.applymap(
            lambda x: str(x).replace('.', '/').replace('-', '/').replace('——', '/') if str(x) in ['.', '-', '——'] else x)
        data_dict = data.to_dict('index')[0]
        data_dict['receiveTime'] = re.split(' ', data_dict['receiveTime'])[0]
        data_dict['reportTime'] = time.strftime("%Y-%m-%d", time.localtime())
        self.result['c'] = data_dict
    def target(self):
        data = pd.DataFrame(self.sampledata['snvindel'])
        res = []
        if data.empty:
            self.result['snvindel'] = res
            return
        data = data[data['Validated'] == 1].reset_index()
        data['muttype'] = '/'
        data.loc[data['ExonicFunc.refGene'].str.match('nonsynonymous SNV'), 'muttype'] = '错义突变'
        data.loc[data['ExonicFunc.refGene'].str.match('^frameshift'), 'muttype'] = '移码突变'
        data.loc[data['ExonicFunc.refGene'].str.match('^nonframeshift'), 'muttype'] = '非移码突变'
        data.loc[data['ExonicFunc.refGene'].str.match('stopgain'), 'muttype'] = '提前终止'
        # 拆分hgvs
        data[['gene', 'transcript', 'exon', 'nacid', 'aacid']] = data['AAChange.refGene'].str.split(':', expand=True)
        # 没有氨基酸改变用核苷酸改变代替
        data['aacid'] = data['aacid'].fillna(data['nacid'])
        for alter, alter_data in data.groupby('AAChange.refGene'):
            alter_data_need = alter_data[['gene', 'transcript', 'exon', 'nacid', 'aacid', 'mutant_frequency',
                                          'AMP_mut_level', 'muttype', 'Gene_function']]
            alter_res = alter_data_need.iloc[0].to_dict()
            alter_res['drug_category'] = self._drug_category(alter_data)
            drug_content = alter_data[
                ['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
            drug_content = drug_content[drug_content['DrugCn'] != '.']
            alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
            alter_res['alter'] = alter
            res.append(alter_res)
            # 汇总
            if alter_res['AMP_mut_level'] in ['I', 'II']:
                self.signtb.add(alter)
        self.result['snvindel'] = res
    def fusion(self):
        data = pd.DataFrame(self.sampledata['fusion'])
        res = []
        if data.empty:
            self.result['fusion'] = res
            return
        data = data[data['Validated'] == 1].reset_index()
        for alter, alter_data in data.groupby('FUSION'):
            alter_data_need = alter_data[['FUSION', 'FREQ1', 'AMP_mut_level', 'Gene_function']]
            alter_res = alter_data_need.iloc[0].to_dict()
            alter_res['drug_category'] = self._drug_category(alter_data)
            drug_content = alter_data[
                ['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
            drug_content = drug_content[drug_content['DrugCn'] != '.']
            alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
            alter_res['alter'] = '%s 融合' % (alter_res['FUSION'].replace('-', ':'))
            res.append(alter_res)
            # 汇总
            if alter_res['AMP_mut_level'] in ['I', 'II']:
                self.signtb.add(alter)
        self.result['fusion'] = res
    def cnv(self):
        data = pd.DataFrame(self.sampledata['cnv'])
        res = []
        if data.empty:
            self.result['cnv'] = res
            return
        data = data[data['Validated'] == 1].reset_index()
        for alter, alter_data in data.groupby('Gene_Symbol'):
            alter_data_need = alter_data[['Gene_Symbol', 'Copy_number', 'AMP_mut_level', 'Gene_function']].reset_index()
            alter_data_need['muttype'] = '缺失'
            alter_data_need.loc[alter_data_need['Copy_number'] > 2, 'muttype'] = '扩增'
            alter_res = alter_data_need.iloc[0].to_dict()
            alter_res['drug_category'] = self._drug_category(alter_data)
            drug_content = alter_data[
                ['DrugCn', 'Response_Type', 'Indication', 'Evidence_Source', 'Efficacy_Evidence']]
            drug_content = drug_content[drug_content['DrugCn'] != '.']
            alter_res['drug_content'] = drug_content.reset_index().to_dict('records')
            alter_res['alter'] = '%s %s' % (alter, alter_res['muttype'])
            res.append(alter_res)
            # 汇总
            if alter_res['AMP_mut_level'] in ['I', 'II']:
                self.signtb.add(alter)
        self.result['cnv'] = res
    def hotspot(self):
        self._to_records('hotspot')
    def met(self):
        self._to_records('MET')
    def longindel(self):
        self._to_records('longindel')
    def mmr(self):
        data = pd.DataFrame(self.sampledata['MMR'])
        result_summary = '未检测到相关基因突变'
        predict = '对免疫检查点抑制剂可能不敏感'
        mmr_num = 0
        res = []
        if not data.empty:
            tmdf = data[['gene', 'p_change']].reset_index()
            tmdf['result_summary'] = tmdf.apply(lambda x: '%s %s' % (x['gene'], x['p_change']), axis=1)
            result_summary = ' | '.join(tmdf['result_summary'].to_list())
            predict = '对免疫检查点抑制剂可能敏感'
            mmr_num = len(data.index)
            res = data.to_dict('records')
        self.result['MMR'] = res
        self.result['sum']['mmr'] = dict(
            result_summary=result_summary,
            predict=predict,
            mmr_num=mmr_num
        )
    def msi(self):
        self._to_dicts('MSI')
    # def chemo(self):
    #     data = pd.DataFrame(self.sampledata['chemo'])
    #
    #     project = data['project'].to_list()[0]
    #
    #     # 分类汇总 同位点，药物合并 drug.infos.txt
    #     drugrsid = data[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']]
    #     drugrsid = drugrsid.drop_duplicates()
    #     resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
    #         ','.join).reset_index()
    #     resdrugrsid.rename(columns=
    #                        {'drugname': '药物', 'genename': '检测基因', 'rsid': '检测位点', 'result': '基因型',
    #                         'level': '证据等级', 'tips': '用药提示'},
    #                        inplace=True)
    #     resdrugrsid = resdrugrsid.sort_values(by=['drugsort', '药物', '检测基因'])
    #     self.result['chemo']['druginfo'] = resdrugrsid.to_dict('records')
    #
    #     # 药物 药物疗效 推荐程度合并 drug.res.txt
    #     drugtypesum = data[['drugname', 'drugtype', 'rsid', 'weights']]
    #     drugtypesum = drugtypesum.drop_duplicates()
    #     drugtyperes = list()
    #     drugsum = dict()
    #     for drug, drugdata in drugtypesum.groupby('drugname'):
    #         tipsnum = drugdata.groupby(['drugtype']).agg({'weights': 'sum'}).to_dict('index')
    #         sumlist = list()
    #         if 'LX' in tipsnum:
    #             LX = tipsnum['LX']['weights']
    #             if LX > 0:
    #                 lxdes = '疗效较好'
    #                 lxnum = 1
    #             elif LX == 0:
    #                 lxdes = '疗效一般'
    #                 lxnum = 0
    #             else:
    #                 lxdes = '疗效较差'
    #                 lxnum = -1
    #             sumlist.append(lxdes)
    #         else:
    #             LX = 0
    #             lxnum = 0
    #         if 'DF' in tipsnum:
    #             DF = tipsnum['DF']['weights']
    #             if DF > 0:
    #                 dfdes = '毒副较低'
    #                 dfnum = 1
    #             elif DF == 0:
    #                 dfdes = '毒副一般'
    #                 dfnum = 0
    #             else:
    #                 dfdes = '毒副较高'
    #                 dfnum = -1
    #             sumlist.append(dfdes)
    #         else:
    #             DF = 0
    #             dfnum = 0
    #
    #         # 评价方式 疗效 1 0 -1, 毒副 1 0 -1 ，可形成9宫格
    #         sumnum = lxnum + dfnum
    #         if sumnum > 0:
    #             sumdes = '推荐'
    #         elif sumnum == 0:
    #             sumdes = '常规'
    #         else:
    #             sumdes = '谨慎'
    #
    #         # 特别药物处理
    #         if (drug == "氟尿嘧啶" or drug == "卡培他滨") and DF < 0:
    #             sumdes = '谨慎'
    #
    #         drugtyperes.append(dict(
    #             药物名称=drug,
    #             疗效=LX,
    #             毒副=DF,
    #             推荐程度=sumdes,
    #             疗效和毒副总结=','.join(sumlist)
    #         ))
    #         drugsum[drug] = sumdes
    #
    #     # 报告中展示药物有顺序
    #     drugsort = data[['drugname', 'drugsort']].drop_duplicates()
    #     drugsort_dict = drugsort.set_index('drugname')['drugsort'].to_dict()
    #     drugtyperes_sort = sorted(drugtyperes, key=lambda x: (
    #         drugsort_dict[x['药物名称']] if x['药物名称'] in drugsort_dict else 100, x['药物名称']))
    #
    #     drugtyperes_sort_df = pd.DataFrame(drugtyperes_sort)
    #     self.result['chemo']['sum'] = drugtyperes_sort_df.groupby('推荐程度')['药物名称'].apply(','.join).to_dict()
    #     self.result['chemo']['drugres'] = drugtyperes_sort_df.to_dict('records')
    #
    #     # 联合用药
    #     drug_combine_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'database',
    #                                      'chemo_drug_combine.csv')
    #     drug_combine = pd.read_csv(drug_combine_path, sep='\t')
    #     drug_combine.fillna('.', inplace=True)
    #     drug_combine_data = drug_combine[drug_combine['source'].str.contains(project)]
    #     drug_combine_data = drug_combine_data.reset_index()
    #     if not drug_combine_data.empty:
    #         drug_combine_data['临床提示'] = drug_combine_data['用药方案'].apply(self._get_drug_plan, args=(drugsum,))
    #         self.result['chemo']['combine'] = drug_combine_data.groupby('癌种').apply(
    #             lambda group: group.set_index('癌种').to_dict('records')).to_dict()
    #     else:
    #         self.result['chemo']['combine'] = dict()
    #     self.result['sum']['chemo_drug_num'] = len(drugsum.keys())
    def chemo(self):
        chemo_res = self._to_records('chemo_res', need=True)
        chemo_res_df = pd.DataFrame(chemo_res)
        chemo_res_df.index = chemo_res_df.index + 1
        chemo_res_df = chemo_res_df.reset_index()
        self.result['chemo']['chemo_res'] = chemo_res_df.to_dict('records')
        self.result['sum']['chemo']['drug_num'] = len(chemo_res)
        self.result['sum']['chemo']['drug_category'] = pd.DataFrame(chemo_res).groupby('推荐程度')['药物名称'].apply(
            ','.join).to_dict()
        chemo_comb = self._to_records('chemo_comb', need=True)
        chemo_comb_res = dict()
        if chemo_comb:
            chemo_comb_res = pd.DataFrame(chemo_comb).groupby('癌种').apply(
                lambda group: group.set_index('癌种').to_dict('records')).to_dict()
        self.result['chemo']['chemo_comb'] = chemo_comb_res
        chemo_info = self._to_records('chemo_info', need=True)
        chemo_info_res = dict()
        if chemo_info:
            chemo_info_res = pd.DataFrame(chemo_info).groupby('药物').apply(
                lambda group: group.set_index('药物').to_dict('records')).to_dict()
        self.result['chemo']['chemo_info'] = chemo_info_res
    def hcs(self):
        self._to_records('HCS')
        self.result['sum']['hcs']['num'] = len(self.result['HCS'])
    def heredity(self):
        """
        遗传的结果文件
        :return:
        """
        hereditary = pd.DataFrame(self.sampledata['hereditary'])
        result = '/'
        disease = '/'
        risk = '/'
        if not hereditary.empty:
            result = '|'.join(hereditary.apply(lambda x: '%s %s' % (x['基因'], x['检测结果']), axis=1).to_list())
            disease = '|'.join(hereditary['遗传性肿瘤综合征'].to_list())
        hereditary_risk = pd.DataFrame(self.sampledata['hereditary_risk'])
        if not hereditary_risk.empty:
            risk = ','.join(hereditary_risk[hereditary_risk['风险值'] == '偏高']['肿瘤类型'].to_list())
        self.result['hereditary'] = hereditary.to_dict('records')
        self.result['sum']['hereditary']['result'] = result
        self.result['sum']['hereditary']['disease'] = disease
        self.result['sum']['hereditary']['risk'] = risk
    def qc(self):
        # self._to_dicts('qc')
        data = pd.DataFrame(self.sampledata['qc'])
        res = {}
        if not data.empty:
            data.rename(columns={
                'Q30(%)': 'q30',
                'mean_depth(dedup)': 'depth',
                'coverage(>=0.2*meanx)': 'coverage'
            }, inplace=True)
            res = data.to_dict('index')[0]
        self.result['qc'] = res
    def drugs(self):
        data = pd.DataFrame(self.sampledata['drugs'])
        res = {}
        if not data.empty:
            data = data.dropna()
            data = data[data['drug_detail'] != '.']
            res = data.set_index('drug_name')['drug_detail'].to_dict()
        self.result['drugs']['drugs_detail'] = res
    def indication(self):
        self._to_records('indication')
    def _to_records(self, sheetname, need=False):
        """
        for many lines
        :param sheetname:
        :return:
        """
        data = pd.DataFrame(self.sampledata[sheetname])
        res = []
        if data.empty:
            self.result[sheetname] = res
            return
        res = data.to_dict('records')
        if need:
            return res
        self.result[sheetname] = res
    def _to_dicts(self, sheetname):
        """
        for single line
        :param sheetname:
        :return:
        """
        data = pd.DataFrame(self.sampledata[sheetname])
        res = {}
        if data.empty:
            self.result[sheetname] = res
            return
        res = data.to_dict('index')[0]
        self.result[sheetname] = res
    def _drug_category(self, groupdata):
        drug_category_res = dict()
        for drug_category, drug_category_alter_data in groupdata.groupby('Drug_Category'):
            if drug_category == '.':
                continue
            # 敏感，可能敏感药物统计
            if drug_category in ['a', 'b', 'c']:
                self.signdurg.update(set(drug_category_alter_data['DrugCn'].str.split(',').explode().tolist()))
            drug_category_alter_data['drugdes'] = drug_category_alter_data.apply(
                lambda x: '%s 【%s 级】' % (x['DrugCn'], x['AMP_evidence_level']), axis=1)
            drug_category_res[drug_category] = '\n'.join(drug_category_alter_data['drugdes'].to_list())
        # 所有药物信息
        groupdata['list_col'] = groupdata['DrugCn'].str.replace(' + ', '+').str.split(r'[+,]')
        exploded_df = groupdata.explode('list_col').reset_index()
        exploded_df = exploded_df[(exploded_df['list_col'] != '.') & (exploded_df['list_col'] != '')]
        exploded_dict = exploded_df.groupby('Response_Type')['list_col'].agg(lambda x: list(set(x))).to_dict()
        for drug_type in exploded_dict:
            if drug_type in self.drugs_type:
                self.drugs_type[drug_type].extend(exploded_dict[drug_type])
            else:
                self.drugs_type[drug_type] = exploded_dict[drug_type]
        # for drugall in exploded_df['Drug_Detail'].to_list():
        #     for drug in drugall.split('|'):
        #         match = re.search(r'\[\[(.*?)]](.*?)$', drug)
        #         if match:
        #             self.drugs_record['drugs'].update({match.group(1).strip(): match.group(2).strip()})
        return drug_category_res
    @staticmethod
    def _get_drug_plan(x, drugsum):
        tlist = x.split('+')
        tdeslist = list()
        for tdes in tlist:
            if tdes.strip() in drugsum:
                t1_des = drugsum[tdes.strip()]
                tdeslist.append(t1_des)
        if '慎用' in tdeslist or '谨慎' in tdeslist:
            return '慎用'
        elif '推荐' in tdeslist:
            return '推荐'
        elif '常规' in tdeslist:
            return '可选'
        else:
            return '可选'
    def collect(self):
        self.cms()
        self.target()
        self.fusion()
        self.cnv()
        self.hotspot()
        self.met()
        self.longindel()
        self.mmr()
        self.msi()
        self.chemo()
        self.hcs()
        self.heredity()
        self.qc()
        self.indication()
        self.drugs()
        # 汇总
        self.result['sum']['signtb_num'] = len(self.signtb)
        self.result['sum']['signdrug_num'] = len(self.signdurg)
        self.result['drugs']['drugs_type'] = {key: self.drugs_type[key] for key in sorted(self.drugs_type.keys())}
        return self.result
 def run(path):
    parse = Parse(read(path))
    res = parse.collect()
    resjson = json.dumps(res, indent=4, ensure_ascii=False)
    with open('t.json', 'w') as f:
        f.write(resjson)
    return resjson
 if __name__ == '__main__':
    run(sys.argv[1])
--- a/tools/readxlsx.py
+++ b/tools/readxlsx.py
@ -0,0 +1,27 @@
 import pandas as pd
 import logging
 import json
 import sys
 logger = logging.getLogger('main.sub')
 def read(merge):
    df = pd.read_excel(merge, None)
    samplelist = df['sample_info']['sampleSn'].to_list()
    if not samplelist:
        logger.error('sample_info表为空！读取excel信息失败！')
        raise UserWarning('sample_info表为空！读取excel信息失败！')
    samdict = dict()
    for name, contents in df.items():
        if contents.empty:
            samdict[name] = []
            continue
        contents.fillna('.', inplace=True)
        samdict[name] = contents.to_dict('list')
    return samdict
 if __name__ == '__main__':
    res = read(sys.argv[1])
    print(res)
--- a/tools/t.json
+++ b/tools/t.json
--- a/xlsx/07211057_merged_file.xlsx
+++ b/xlsx/07211057_merged_file.xlsx
--- a/xlsx/07211104_merged_file.xlsx
+++ b/xlsx/07211104_merged_file.xlsx
--- a/xlsx/07211105_merged_file.xlsx
+++ b/xlsx/07211105_merged_file.xlsx
--- a/xlsx/07211115_merged_file.xlsx
+++ b/xlsx/07211115_merged_file.xlsx
--- a/xlsx/merged_file.xlsx
+++ b/xlsx/merged_file.xlsx
		`@ -0,0 +1,3 @@`
							`import os`

							`basedir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))`