diff --git a/codes/chemo.py b/codes/chemo.py index 9b7e8d5..4cbc0ad 100755 --- a/codes/chemo.py +++ b/codes/chemo.py @@ -1,4 +1,5 @@ #! /usr/bin/env python3 + import argparse import logging import os diff --git a/codes/drug_dedup.py b/codes/drug_dedup.py deleted file mode 100755 index c233b39..0000000 --- a/codes/drug_dedup.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: UTF-8 -*- - -import os -import re -import sys - -import pandas as pd - -if len(sys.argv) != 3: - print(" ".join(['usage:python3', sys.argv[0], 'output_dir', 'name'])) - sys.exit() - -output_dir = sys.argv[1] -name = sys.argv[2] -snv_file = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.txt') -snv_file_new = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.dedup.txt') - -fusion_file = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.txt') -fusion_file_new = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.dedup.txt') - -cnv_file = os.path.join(output_dir, 'cnv', f'{name}.rmdup.cns.filter.pos.txt') -cnv_file_new = os.path.join(output_dir, 'cnvkit', f'{name}.rmdup.cns.filter.pos.dedup.txt') - -# gm_snv_file = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.txt') -# gm_snv_file_new = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.dedup.txt') - -open(snv_file_new, "w") -open(fusion_file_new, "w") -open(cnv_file_new, "w") -# open(gm_snv_file_new, "w") -##Evidence_Source_C及标签排序 -df_mapping_1 = pd.DataFrame({ - 'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'], -}) -sort_mapping_1 = df_mapping_1.reset_index().set_index('Evidence_Source_C') -df_mapping_2 = pd.DataFrame({'标签': ['适应症', '非适应症', '.']}) -sort_mapping_2 = df_mapping_2.reset_index().set_index('标签') - -##snvindel处理 -snv_size = os.path.getsize(snv_file) -if snv_size > 0: - data = pd.read_table(snv_file, sep="\t") - data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) - data['level2'] = data['标签'].map(sort_mapping_2['index']) - data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True) - data.drop(['level1', 'level2'], axis=1, inplace=True) - info = {} - for index, row in data.iterrows(): - if re.search(r'敏感', row['Response_Type_C']): - if row['标签'] == '适应症': - info[row['AAChange.refGene'] + row['Drug']] = '1' - else: - if (row['AAChange.refGene'] + row['Drug']) in info.keys(): - data.drop([index], inplace=True) - data.insert(0, '可信', 1) - data.to_csv(snv_file_new, index=False, sep='\t') - -# ##germline snv/indel处理 -# gm_snv_size = os.path.getsize(gm_snv_file) -# if gm_snv_size > 0: -# data = pd.read_table(gm_snv_file, sep="\t") -# data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) -# data['level2'] = data['标签'].map(sort_mapping_2['index']) -# data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True) -# data.drop(['level1', 'level2'], axis=1, inplace=True) -# info = {} -# for index, row in data.iterrows(): -# if re.search(r'敏感', row['Response_Type_C']): -# if row['标签'] == '适应症': -# info[row['AAChange.refGene'] + row['Drug']] = '1' -# else: -# if (row['AAChange.refGene'] + row['Drug']) in info.keys(): -# data.drop([index], inplace=True) -# data.insert(0, '可信', 1) -# data.to_csv(gm_snv_file_new, index=False, sep='\t') - -##fusion处理 -fusion_size = os.path.getsize(fusion_file) -if fusion_size > 0: - data = pd.read_table(fusion_file, sep="\t") - data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) - data['level2'] = data['标签'].map(sort_mapping_2['index']) - data.sort_values(by=['FUSION', 'level2', 'level1'], ascending=True, inplace=True) - data.drop(['level1', 'level2'], axis=1, inplace=True) - info = {} - for index, row in data.iterrows(): - if re.search(r'敏感', row['Response_Type_C']): - if row['标签'] == '适应症': - info[row['FUSION'] + row['Drug']] = '1' - else: - if (row['FUSION'] + row['Drug']) in info.keys(): - data.drop([index], inplace=True) - data.insert(0, '可信', 1) - data.to_csv(fusion_file_new, index=False, sep='\t') - -##cnv处理 -cnv_size = os.path.getsize(cnv_file) -if cnv_size > 0: - data = pd.read_table(cnv_file, sep="\t") - data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) - data['level2'] = data['标签'].map(sort_mapping_2['index']) - data.sort_values(by=['Gene_Symbol', 'level2', 'level1'], ascending=True, inplace=True) - data.drop(['level1', 'level2'], axis=1, inplace=True) - info = {} - for index, row in data.iterrows(): - if re.search(r'敏感', row['Response_Type_C']): - if row['标签'] == '适应症': - info[row['Gene_Symbol'] + row['Drug']] = '1' - else: - if (row['Gene_Symbol'] + row['Drug']) in info.keys(): - data.drop([index], inplace=True) - data.insert(0, '可信', 1) - data.to_csv(cnv_file_new, index=False, sep='\t') diff --git a/codes/filter_longindel.pl b/codes/filter_longindel.pl index 483daf0..d7c76d0 100755 --- a/codes/filter_longindel.pl +++ b/codes/filter_longindel.pl @@ -30,14 +30,15 @@ while () { } my @line = split(/\t/); $line[7] =~ /Gene.refGene=(.*?);/; - if ((grep {$1 =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) { - if ($1 eq "BCL2L11") { + my $gene = $1; + if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) { + if ($gene eq "BCL2L11") { if ($line[1] == '111883194') { - print LONGINDEL join("\n", @pos) . "\n"; + print LONGINDEL $_; } } else { - print LONGINDEL join("\n", @pos) . "\n"; + print LONGINDEL $_; } } } diff --git a/codes/filter_snpindel.pl b/codes/filter_snpindel.pl index 7fd3ef3..60fd083 100755 --- a/codes/filter_snpindel.pl +++ b/codes/filter_snpindel.pl @@ -1,4 +1,5 @@ #!/usr/bin/env perl + use strict; #use warnings; use List::Util qw(sum); @@ -131,12 +132,12 @@ while () { $line[9] = join(":", ($gene, $hgvs)); } elsif ($spl =~ /\d+\-[1|2]\D+/) { - my $intron = $exon-1; + my $intron = $exon - 1; $hgvs =~ s/exon(\d+)/intron$intron;exon$exon/; $line[9] = join(":", ($gene, $hgvs)); } elsif ($gene eq "MET") { - $line[9] = join(":", ($gene, "exon14", "c.xxx")); + $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx")); $line[8] = 'skipping' } else { diff --git a/codes/netchop.pl b/codes/netchop.pl new file mode 100755 index 0000000..5513d23 --- /dev/null +++ b/codes/netchop.pl @@ -0,0 +1,78 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use List::Util qw(min max); +#max_length:最大的epitope长度 +die "usage:perl $0 outputDir tumor_prefix" if @ARGV != 2; + +my ($outputDir, $tumor_prefix) = @ARGV; + +open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta"; +my %fa; +while () { + if (/^>MT/) { + open OUT, ">$outputDir/neoantigen/MHC_Class_I/tmp.fa"; + print OUT; + $_ =~ /MT\.(\d+)\./; + my $id = $1; + my $seq = ; + print OUT $seq; + chomp $seq; + $fa{$id} = $seq; + system "predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; + close OUT; + } +} +unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa"; + +my %score; +open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; +while () { + next unless /^\d+/; + chomp; + my @line = split; + $line[3] =~ /MT\.(\d+)\./; + $score{$1}{$line[0]} = $line[2]; +} +open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv"; +open OUT, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt"; +open OUT2, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; +#my %neopt; +my $head = ; +chomp $head; +print OUT "$head\tcleavage_score\n"; +while () { + chomp; + my @line = split(/\t/); + $line[44] =~ /^(\d+)\./; + my $id = $1; + my $pep = $line[18]; + if (exists $fa{$id}) { + my $index = index($fa{$id}, $pep) + length($pep); + my $cleavage_score = $score{$id}{$index}; + print OUT "$_\t$cleavage_score\n"; + if ($line[21] <= 5000 and ($line[23] eq "NA" or $line[23] >= 1)) { + print OUT2 "$_\t$cleavage_score\n"; + } + } + else { + print OUT "$_\tNA\n"; + } +} +system "sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; +unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; +open SORT, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; +open OUT3, ">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt"; +print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n"; +my %pep; +my $bool = 0; +while () { + chomp; + my @line = split(/\t/); + if (not exists $pep{$line[18]}) { + $pep{$line[18]}++; + $bool += 1; + print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n"; + } +} diff --git a/codes/pollution.py b/codes/pollution.py index c88c4ba..67879c5 100755 --- a/codes/pollution.py +++ b/codes/pollution.py @@ -110,7 +110,7 @@ def process_judge_vcf(input_vcf, output_vcf): if not line.startswith("#"): fields = line.strip().split('\t') info = fields[9].split(":") - percentage = float(info[4]) + percentage = float(info[6]) if 0.1 <= percentage <= 0.9: b = 0.5 @@ -135,7 +135,10 @@ def merge_and_sort_files(matched_file, unmatched_file, output_file): return output_file # 如果 unmatched_file 不为空,继续合并和排序操作 - matched_df = pd.read_csv(matched_file, sep='\t', header=None) + if os.stat(matched_file).st_size == 0: + matched_df = pd.DataFrame() + else: + matched_df = pd.read_csv(matched_file, sep='\t', header=None) unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None) # 合并数据帧 @@ -195,7 +198,7 @@ def select_cnvkit_vcf(vcf, bed, output_file): line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len( line.split()[3]) < 2 and len(line.split()[4]) < 2] for line in filtered_lines: - p_value_str = line.split()[9].split(":")[4] + p_value_str = line.split()[9].split(":")[6] p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str) if 0.1 <= p_value <= 0.9: result_data.append(line) @@ -218,7 +221,7 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed, # 处理胚系,根据bed筛选 select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf') select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf') - Germline_matched_file, Germline_unmatched_file = select_position(germline_vcf, ref_bed, + germline_matched_file, germline_unmatched_file = select_position(germline_vcf, ref_bed, select_position_output_file3, select_position_output_file4) # 处理体系,数值转换 @@ -226,14 +229,14 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed, somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1) # 处理胚系,数值转换 process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf') - germline_matched_add_judge_file = process_judge_vcf(Germline_matched_file, process_judge_vcf_file2) + germline_matched_add_judge_file = process_judge_vcf(germline_matched_file, process_judge_vcf_file2) # 合并体系,将匹配到的和未匹配到bed的的合并 merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf') somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file, merge_and_sort_files_file1) # 合并胚系,将匹配到的和未匹配到bed的的合并 merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf') - Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, Germline_unmatched_file, + Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, germline_unmatched_file, merge_and_sort_files_file2) # 合并胚系,体系,将体系,胚系两个合并文件再合并 result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt') diff --git a/codes/postprocess.py b/codes/postprocess.py index 9c057b8..86281ca 100755 --- a/codes/postprocess.py +++ b/codes/postprocess.py @@ -75,7 +75,7 @@ class PostProcess: def txt_2_excel(path): try: df = pd.read_csv(path, sep='\t') - except pd.errors.EmptyDataError: + except (pd.errors.EmptyDataError, FileNotFoundError): return [] return df.to_dict('records') @@ -179,6 +179,8 @@ class PostProcess: filter_neg = os.path.join(self.path, 'mutation', f'{self.sample_name}.snp_indel.somatic.hg19_multianno.filter.sum.neg.txt') + tmb_file = os.path.join(self.path, 'tmb', f'{self.sample_name}.tmb.txt') + filter_sum_pos_res = list() # 从pos_files中获取药物信息 pos_check = check_file_exist_and_empty(filter_pos) @@ -219,7 +221,7 @@ class PostProcess: neg['AMP_mut_level'] = 'IIII' neg_dict = neg.set_index(['Chr', 'Start', 'End'])['AMP_mut_level'].to_dict() - filter_sum_res = list() + filter_sum_df = pd.DataFrame() filter_sum_check = check_file_exist_and_empty(filter_sum) if not filter_sum_check: filter_sum_df = pd.read_csv(filter_sum, sep='\t') @@ -228,8 +230,33 @@ class PostProcess: level_dict.update(vus_dict) level_dict.update(neg_dict) filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['Chr', 'Start', 'End']).index.map(level_dict) - filter_sum_df = filter_sum_df.fillna('.') - filter_sum_res = filter_sum_df.to_dict('records') + + cols = list(filter_sum_df.columns) + + tmb_file_check = check_file_exist_and_empty(tmb_file) + if not tmb_file_check: + tmb_df = pd.read_csv(tmb_file, sep='\t') + key_cols = ['Chr', 'Start', 'End'] + filter_sum_df = filter_sum_df.set_index(key_cols) + tmb_df = tmb_df.set_index(key_cols) + + # 在filter_sum_df中的process列中追加字符串";tmb",对应tmb_df中的行 并且 是非 1,2类突变 + filter_sum_df['process'] = filter_sum_df.index.map( + lambda x: filter_sum_df.at[x, 'process'] + ';tmb' if x in tmb_df.index and filter_sum_df.at[ + x, 'AMP_mut_level'] not in ['I', 'II'] else filter_sum_df.at[x, 'process']) + + # 找到tmb_df中不在filter_sum_df中的行,并将这些新的行添加到filter_sum_df中 + new_rows = tmb_df[~tmb_df.index.isin(filter_sum_df.index)] + filter_sum_df = pd.concat([filter_sum_df, new_rows]) + + # 重置索引 + filter_sum_df = filter_sum_df.reset_index() + + # 按之前列排 + filter_sum_df = filter_sum_df[cols] + + filter_sum_df = filter_sum_df.fillna('.') + filter_sum_res = filter_sum_df.to_dict('records') self.sheet['target_mut'] = filter_sum_res self.sheet['target_drug'] = filter_sum_pos_res @@ -429,11 +456,62 @@ class PostProcess: print(file_check) def longindel(self): - longindel_files = glob.glob( - os.path.join(self.path, 'fusion', '*.longindel.pos.txt')) - if longindel_files: - # return self.txt_2_excel(longindel_files[0]) - self.sheet['longindel'] = self.txt_2_excel(longindel_files[0]) + + filter_sum_pos = os.path.join(self.path, 'fusion', + f'{self.sample_name}.longindel.hg19_multianno.filter.pos.txt') + filter_sum_pos_check = check_file_exist_and_empty(filter_sum_pos) + + filter_sum_pos_res = list() + pos_dict = dict() + + if not filter_sum_pos_check: + filter_sum_pos_df = pd.read_csv(filter_sum_pos, sep='\t') + # 添加常规列 + filter_sum_pos_df = self._add_columns(filter_sum_pos_df) + # 添加基因功能 + filter_sum_pos_df = self._add_gene_function(filter_sum_pos_df, colname='ref_gene') + # 药物处理 + self.drug_parse(filter_sum_pos_df['DrugCn'].to_list()) + filter_sum_pos_df['Validated'] = 1 + filter_sum_pos_df = filter_sum_pos_df.fillna('.') + + grouped_df = filter_sum_pos_df.groupby(['#CHROM', 'POS', 'REF', 'ALT']) + # 对每个分组进行操作 + for group_name, group_data in grouped_df: + chr, pos, ref, alt = group_name + if any(group_data['AMP_mut_level'] == 'I'): + filter_condition = (filter_sum_pos_df['#CHROM'] == chr) & \ + (filter_sum_pos_df['POS'] == pos) & \ + (filter_sum_pos_df['REF'] == ref) & \ + (filter_sum_pos_df['ALT'] == alt) + filter_sum_pos_df.loc[filter_condition, 'AMP_mut_level'] = 'I' + + pos_dict = filter_sum_pos_df.set_index(['#CHROM', 'POS', 'REF', 'ALT'])['AMP_mut_level'].to_dict() + filter_sum_pos_res = filter_sum_pos_df.to_dict('records') + + filter_sum = os.path.join(self.path, 'fusion', f'{self.sample_name}.longindel.hg19_multianno.filter.txt') + filter_sum_check = check_file_exist_and_empty(filter_sum) + filter_sum_res = list() + if not filter_sum_check: + filter_sum_df = pd.read_csv(filter_sum, sep='\t') + + filter_sum_df['Validated'] = 1 + level_dict = dict() + level_dict.update(pos_dict) + filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['#CHROM', 'POS', 'REF', 'ALT']).index.map( + level_dict) + filter_sum_df = filter_sum_df.fillna('.') + filter_sum_res = filter_sum_df.to_dict('records') + + self.sheet['longindel_mut'] = filter_sum_res + self.sheet['longindel_drug'] = filter_sum_pos_res + + def neoantigen(self): + neoantigen = os.path.join(self.path, 'neoantigen', f'MHC_Class_I', 'neoantigen.txt') + hla = os.path.join(self.path, 'neoantigen', f'hla', f'{self.normal_name}_result.tsv') + + self.sheet['neoantigen'] = self.txt_2_excel(neoantigen) + self.sheet['hla'] = self.txt_2_excel(hla) def qc(self): qc_files = glob.glob(os.path.join(self.path, 'qc', '*_qc.txt')) @@ -452,17 +530,18 @@ class PostProcess: def collect(self): writer = pd.ExcelWriter(self.outpath) self.cms() + self.qc() self.snv() self.fusion() + self.longindel() self.cnv() self.msi() self.germline() self.heredity() self.heredity_res() - self.longindel() self.chemo() self.indication() - self.qc() + self.neoantigen() self.drugs() # 遍历CSV文件列表 diff --git a/codes/run_pipeline.py b/codes/run_pipeline.py index 847645b..eaad506 100755 --- a/codes/run_pipeline.py +++ b/codes/run_pipeline.py @@ -2,6 +2,7 @@ import argparse import os +from datetime import datetime run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py') @@ -42,13 +43,15 @@ if __name__ == '__main__': if not os.path.exists(res_path): os.makedirs(res_path) + logname = datetime.now().strftime("%m%d%H%M") + cmd = f'nohup python ' \ f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \ f'{"-u " if args.umi else ""} -i {args.input_dir} ' \ f'-node {args.start_node} ' \ f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \ - f'> {res_path}/{args.barcode}_run.log ' \ - f'2>> {res_path}/{args.barcode}_run.log &' + f'> {res_path}/{args.barcode}_{logname}_run.log ' \ + f'2>> {res_path}/{args.barcode}_{logname}_run.log &' # with open(os.path.join(res_path, 'exec'), 'w') as execfile: # execfile.write(cmd + '\n') os.system(cmd) diff --git a/codes/run_wdl.py b/codes/run_wdl.py index e47c3a8..6289f4a 100755 --- a/codes/run_wdl.py +++ b/codes/run_wdl.py @@ -3,6 +3,7 @@ import json import os import subprocess import time +from datetime import datetime import pandas as pd @@ -80,7 +81,8 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl arg = {key: value for key, value in arg.items() if value not in (None, '', False)} # generate json - jsfile_path = os.path.join(output_dir, f'{barcode}.json') + logname = datetime.now().strftime("%m%d%H%M") + jsfile_path = os.path.join(output_dir, f'{barcode}_{logname}.json') with open(jsfile_path, 'w') as jsfile: jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False)) @@ -127,7 +129,7 @@ if __name__ == '__main__': parser.add_argument('-p', '--project', help="project", required=True) parser.add_argument('-c', '--cancer', help="cancer", required=True) parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True) - parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl') + parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl') parser.add_argument('-node', '--start_node', help="node begain to run; 'addQc', 'addAlignment', " "'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo'," diff --git a/codes/target_therapy_longindel.pl b/codes/target_therapy_longindel.pl index f25d764..13aae43 100755 --- a/codes/target_therapy_longindel.pl +++ b/codes/target_therapy_longindel.pl @@ -6,10 +6,10 @@ die "useage:perl $0 input output cancer_type" unless @ARGV == 3; my ($input, $output, $cancer_type) = @ARGV; my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public"; -print "Fusion药物注释使用public路径:$public_path\n"; +print "Longindel药物注释使用public路径:$public_path\n"; my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase"; -print "Fusion药物注释使用路径:$database_path\n"; +print "Longindel药物注释使用路径:$database_path\n"; open MUT, "$database_path/fusion.csv"; ; @@ -26,7 +26,8 @@ my %therapy; while () { chomp; my @line = split("\t"); - push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + # push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 @@ -106,23 +107,38 @@ while () { my @splitline = split(/\t/); my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7]; - my $gene; - if ($_ =~ /Gene\.refGene=([^;]+)/) { - $gene = $1; - } if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) { + print "$freq\n"; foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) { + my @line = split("\t", $entry); - if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) { + if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { + # push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; } - elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) { + elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { + # push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; } - elsif ($line[2] =~ /$cancer_type|solid tumor/i) { + elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { + # push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; } + + # my @line = split("\t", $entry); + # if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) { + # push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + # } + # elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) { + # push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + # } + # elsif ($line[2] =~ /$cancer_type|solid tumor/i) { + # push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + # } + # else { + # print "未匹配到" + # } } } diff --git a/database/chemo_database.xlsx b/database/chemo_database.xlsx index 5f636b8..1b8bac1 100755 Binary files a/database/chemo_database.xlsx and b/database/chemo_database.xlsx differ diff --git a/database/info.csv b/database/info.csv index da3eb78..451824f 100755 --- a/database/info.csv +++ b/database/info.csv @@ -3,8 +3,8 @@ LZ103,160gene,肿瘤160基因检测(组织版),肿瘤个体化用药(160 LZ110,160gene,肿瘤160基因检测(血液版),肿瘤个体化用药(160基因)检测,addTarget;addFusion;addCnv;addMsi;addMmr;addChemo;addHrr1;addHrr2;addHcs;addbigPanel,NA,160,AKT1/ALK/APC/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MET/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/STK11/TERT/TP53/TSC1/TSC2/VHL/MDM2/MDM4/MSH2/MSH6/MLH1/PMS2/EPCAM/ATR/BAP1/BMPR1A/CDH1/FANCA/FANCM/MUTYH/NBN/ABCC2/ABCG2/ABRAXAS1/ACYP2/AR/ARID1A/ATIC/B2M/C8orf34/CBR3/CD274/CTNNA1/CYP1A1/CYP1B1/CYP2C19/CYP4B1/DHFR/DNMT3A/ERCC3/GATA3/GEN1/GREM1/HDAC2/MLH3/MRE11/MTR/MYC/NOTCH1/NT5C2/PDCD1LG2/PNPLA3/POLD1/POLE/PPM1D/RAD50/RRM1/SCG5/SDHA/SDHB/SDHC/SDHD/SEMA3C/SLC19A1/SLC28A3/SLCO1B1/SRC/STIM1/TAP1/TAP2/TOP1/TSPAN31/TUBB1/XRCC2,MET,TERT,CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MDM2/MDM4/CDKN2A,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,ABCB1/CASP7/CDA/CYP19A1/CYP2C8/CYP2D6/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/HAS3/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XPC/XRCC1,ALK/APC/ATM/BRAF/BRCA1/BRCA2/CDH1/CDK4/CDKN2A/CHEK2/DNMT3A/EGFR/EPCAM/ERCC3/FANCA/HRAS/KRAS/MAP2K1/MET/MLH1/MSH2/MSH6/MUTYH/NBN/NF1/NRAS/PALB2/PMS2/POLD1/POLE/PTEN/RAD51D/RB1/RET/SDHA/SDHB/SDHC/SDHD/SMAD4/STK11/TP53/TSC1/TSC2/VHL,MSH2/MSH6/MLH1/PMS2/EPCAM,ATM/ATR/BARD1/BRCA1/BRCA2/BRIP1/CDK12/CHEK1/CHEK2/FANCA/FANCL/MLH1/NBN/PALB2/RAD51B/RAD51C/RAD51D/RAD54L,ARID1A/FANCM/HDAC2/POLD1/PTEN/RAD50,NA LZ108,682gene,肿瘤682基因检测(组织版),肿瘤精准诊疗基因检测(682基因),addTarget;addFusion;addCnv;addMsi;addMmr;addChemo;addHrr1;addHrr2;addHcs;addbigPanel;addHpd;addNeoantigen;addTmb,NA,682,ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MET/MPL/MTOR/MYCN/MYD88/MDM2/MDM4/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TERT/TP53/TSC1/TSC2/VHL/PPP2R2A/ARID1A/ATR/ATRX/BAP1/BLM/FANCA/FANCC/FANCD2/FANCE/FANCF/FANCG/FANCI/MDC1/MRE11/NBN/RAD50/RAD51/XRCC2/MSH2/MSH6/MLH1/PMS2/EPCAM/AXIN1/B2M/KEAP1/ERCC4/POLE/ARID1B/ARID2/EPHA3/EPHA5/EPHA7/POLD1/LRP1B/NOTCH1/NOTCH2/NOTCH3/BMPR1A/CDC73/CDH1/CEBPA/ERCC3/ERCC5/FH/FLCN/GATA2/HOXB13/MAX/MEN1/MITF/MSH3/MUTYH/PHOX2B/PMS1/PRKAR1A/PTPN11/RIT1/RNF43/SBDS/SDHA/SDHAF2/SDHB/SDHC/SDHD/SLX4/SOS1/SUFU/TMEM127/FANCB/ABCB9/ABCC2/ABCC4/ABCG2/ABRAXAS1/ACE2/ACVR1/ACVR1B/ACVR2A/ACYP2/ADGRA2/AGO2/ALOX12B/AMER1/ANKRD11/AR/ARFRP1/ARID5B/ASXL1/ASXL2/ATIC/AURKA/AURKB/AXIN2/AXL/BABAM1/BBC3/BCL10/BCL2/BCL2L1/BCL2L2/BCL6/BCOR/BCORL1/BCR/BIRC3/BIRC5/BLK/BRD4/BTG1/C8orf34/CALR/CANX/CARD11/CARM1/CASP8/CBFB/CBL/CBR3/CCN6/CCNE1/CCNQ/CD19/CD200/CD22/CD274/CD276/CD38/CD3D/CD3E/CD3G/CD40/CD52/CD74/CD79A/CD79B/CDC42/CDK2/CDK8/CDK9/CDKN1A/CDKN1B/CDKN2C/CENPA/CHD4/CHST3/CIC/COL5A1/COP1/CREBBP/CRKL/CRLF2/CSDE1/CSF3R/CSK/CSNK1A1/CTCF/CTLA4/CTNNA1/CTSB/CTSL/CTSS/CUL3/CUL4A/CXCR4/CYLD/CYP17A1/CYP1A1/CYP1B1/CYP2C19/CYP2E1/CYP4B1/CYSLTR2/DAXX/DCUN1D1/DDR1/DHFR/DICER1/DIS3/DMD/DNAJB1/DNMT1/DNMT3A/DNMT3B/DOT1L/DROSHA/DSCAM/DUSP4/E2F3/EED/EGF/EGFL7/EIF1AX/EIF4A2/EIF4E/ELF3/ELOC/EML4/EMSY/EP300/EPAS1/EPHA2/EPHB1/ERAP1/ERAP2/ERF/ERG/ERRFI1/ETV1/ETV4/ETV5/ETV6/EWSR1/EZH1/EZR/FANCM/FAS/FASTKD3/FAT1/FCGR3A/FGF1/FGF10/FGF12/FGF14/FGF19/FGF2/FGF23/FGF3/FGF4/FGF5/FGF6/FGF7/FGF8/FGF9/FGR/FLT1/FLT4/FOXA1/FOXL2/FOXO1/FOXP1/FRK/FRS2/FUBP1/FYN/GABRA6/GALNT14/GATA1/GATA3/GATA4/GATA6/GEN1/GID4/GLI1/GNA13/GNAS/GPS2/GREM1/GRIN2A/GRM3/GSK3B/H1-2/H2BC5/H3-3A/H3-3B/H3-4/H3-5/H3C1/H3C10/H3C11/H3C12/H3C13/H3C14/H3C2/H3C3/H3C4/H3C6/H3C7/H3C8/HAS3/HDAC2/HGF/HLA-A/HLA-B/HNF1A/HSD3B1/HSP90AA1/ICOSLG/ID3/IDE/IFI30/IFNGR1/IGF1/IGF1R/IGF2/IKBKE/IKZF1/IL10/IL2RA/IL2RB/IL2RG/IL6/IL7R/INHA/INHBA/INPP4A/INPP4B/INPPL1/INSR/IRF2/IRF4/IRS1/IRS2/ITK/JUN/KAT6A/KCNJ5/KDM5A/KDM5C/KDM6A/KEL/KLC3/KLF4/KLHL6/KMT2A/KMT2B/KMT2C/KMT2D/KMT5A/KNSTRN/LATS1/LATS2/LCK/LGMN/LIMK1/LMO1/LNPEP/LRIG3/LRP2/LTK/LYN/MALT1/MAP2K4/MAP2K7/MAP3K1/MAP3K13/MAP3K14/MAP4K5/MAPK1/MAPK3/MAPKAP1/MCL1/MED12/MEF2B/MERTK/MGA/MLH3/MSI1/MSI2/MST1/MST1R/MTR/MTRR/MYB/MYC/MYCL/MYOD1/NCOA3/NCOR1/NEGR1/NFE2L2/NFKBIA/NKX2-1/NKX3-1/NOS3/NOTCH4/NPEPPS/NPM1/NRDC/NRG1/NSD1/NSD2/NSD3/NT5C2/NTHL1/NUF2/NUP93/NUTM1/PAK1/PAK3/PAK5/PARP1/PARP2/PAX5/PBRM1/PCBP1/PCNA/PDCD1/PDCD1LG2/PDIA3/PDK1/PDPK1/PGR/PHF6/PIK3C2B/PIK3C2G/PIK3C3/PIK3CB/PIK3CD/PIK3CG/PIK3R1/PIK3R2/PIK3R3/PIM1/PLCG2/PLK2/PMAIP1/PNPLA3/PNRC1/POLB/POLR1G/PPARG/PPM1D/PPP2R1A/PPP4R2/PPP6C/PRDM1/PRDM14/PREX2/PRKCI/PRKD1/PRKDC/PRKN/PTGS2/PTK2/PTK6/PTP4A1/PTPRD/PTPRS/PTPRT/QKI/RAB35/RAC1/RAC2/RAD21/RAD52/RANBP2/RARA/RASA1/RBM10/RECQL/RECQL4/REL/RHEB/RHOA/RICTOR/RNU6-28P/ROCK1/RPL13/RPS6KA4/RPS6KB2/RPTOR/RRAGC/RRAS/RRAS2/RRM1/RSPO2/RTEL1/RUNX1/RXRA/RYBP/SCG5/SDC4/SEMA3C/SESN1/SESN2/SESN3/SETD2/SF3B1/SGK1/SH2B3/SH2D1A/SHOC2/SHQ1/SIK1/SLAMF7/SLC19A1/SLC22A2/SLC28A3/SLC34A2/SLCO1B1/SLCO1B3/SMAD2/SMAD3/SMARCA4/SMARCD1/SMYD3/SNCAIP/SOCS1/SOX10/SOX2/SOX17/SOX9/SPEN/SPOP/SPRED1/SPRY2/SRC/SRD5A2/SRMS/SRSF2/STAG2/STAT3/STAT4/STAT5A/STAT5B/STIM1/STK19/STK40/SUZ12/SYK/TAP1/TAP2/TAPBP/TAPBPL/TBX3/TCF3/TCF7L2/TEK/TENT5C/TET1/TET2/TGFBR1/TGFBR2/TIPARP/TMPRSS2/TNF/TNFAIP3/TNFRSF14/TNFRSF8/TNFSF11/TOP1/TP53BP1/TP63/TP73/TPP2/TRAF2/TRAF7/TSHR/TSPAN31/TUBB1/TYK2/TYRO3/U2AF1/UMPS/UPF1/VEGFA/VSIR/VTCN1/WAS/WT1/WWTR1/XIAP/XPO1/YAP1/YES1/ZFHX3/ZNF217/ZNF703,MET,TERT,CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MYCN/MDM2/MDM4/CDKN2A/CDKN2B,ALK/BRAF/FGFR1/FGFR2/FGFR3/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,ABCB1/CDA/CYP19A1/CYP2B6/CYP2C8/CYP2D6/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/NUDT15/SOD2/TPMT/TYMS/UGT1A1/XPC/XRCC1/CCND1,ALK/APC/ASXL1/ATM/ATRX/BAP1/BLM/BMPR1A/BRAF/BRCA1/BRCA2/BRIP1/CBL/CDC73/CDH1/CDK4/CDKN1B/CDKN2A/CEBPA/CHEK2/CREBBP/CTLA4/DICER1/DNMT3A/EGFR/EP300/EPCAM/ERCC4/ETV6/FANCA/FANCC/FANCD2/FANCE/FANCF/FANCG/FANCI/FANCL/FAS/FH/FLCN/GATA2/HOXB13/HRAS/IKZF1/KRAS/MAP2K1/MAP2K2/MAX/MEN1/MET/MITF/MLH1/MSH2/MSH3/MSH6/MUTYH/NBN/NF1/NF2/NKX2-1/NRAS/NTHL1/PALB2/PAX5/PHOX2B/PMS2/POLD1/POLE/PRKAR1A/PTCH1/PTEN/PTPN11/RAD51/RAD51C/RAD51D/RAF1/RB1/RET/RIT1/RNF43/RRAS2/RUNX1/SBDS/SDHA/SDHAF2/SDHB/SDHC/SDHD/SLX4/SMAD4/SMARCA4/SMARCB1/SOS1/STAT3/STK11/SUFU/TERT/TMEM127/TP53/TSC1/TSC2/VHL/WT1/XRCC2,MSH2/MSH6/MLH1/PMS2/EPCAM,ATM/ATR/BARD1/BRCA1/BRCA2/BRIP1/CDK12/CHEK1/CHEK2/FANCA/FANCL/MLH1/NBN/PALB2/RAD51B/RAD51C/RAD51D/RAD54L,ARID1A/FANCM/HDAC2/POLD1/PTEN/RAD50/PPP2R2A/ATR/ATRX/BAP1/BLM/FANCA/FANCC/FANCD2/FANCE/FANCF/FANCG/FANCI/MDC1/MRE11/NBN/RAD51/XRCC2/ARID1B/ARID2/EMSY/RAD52,MDM2/MDM4/EGFR/JAK1/JAK2/CTNNB1/AXIN1/APC/ALK/B2M/PTEN/STK11/KEAP1/ATM/ATR/BRCA1/BRCA2/CHEK1/CHEK2/BAP1/ERCC4/POLE/PALB2/RAD51C/RAD51D/ARID1A/ARID1B/ARID2/EPHA3/EPHA5/EPHA7/NF1/POLD1/LRP1B/NOTCH1/NOTCH2/NOTCH3 LZ113,624gene,肿瘤624基因检测(血液版),肿瘤精准诊疗基因检测(624基因),addTarget;addFusion;addCnv;addMsi;addMmr;addChemo;addHrr1;addHrr2;addHcs;addbigPanel;addHpd;addNeoantigen;addTmb,NA,624,ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CCNE1/CDK12/CDK4/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CSF3R/CTNNB1/CXCR4/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MDM2/MET/MPL/MTOR/MYCN/MYD88/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PPP2R2A/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TP53/TSC1/TSC2/VHL/ARID1A/ARID1B/ARID2/ATR/ATRX/BAP1/BLM/EMSY/FANCA/FANCC/FANCD2/FANCE/FANCG/FANCM/HDAC2/MRE11/NBN/POLD1/RAD50/RAD51/RAD52/WRN/XRCC2/MLH1/MSH2/MSH6/PMS2/EPCAM/AXIN1/B2M/CD274/EPHA3/ERCC4/FGF19/FGF3/FGF4/KEAP1/MDM4/MTAP/NFE2L2/NOTCH1/NOTCH2/POLE/RNF43/SETD2/ASXL1/CBL/CDC73/CDH1/CDKN1B/CEBPA/CREBBP/DNMT3A/EP300/ERCC3/FAS/FH/FLCN/IKZF1/MAPK1/MEN1/MITF/NKX2-1/PAX5/PRKAR1A/PTPN11/SDHA/SDHB/SDHC/SDHD/SMARCA4/STAT3/SUFU/TERC/WT1/ABL2/ACSL6/ACVR1/ACVR1B/ACVR2A/AFDN/AFF1/AFF3/AKAP9/ALDH2/ALOX12B/AMER1/AR/ARFRP1/ARHGAP5/ARHGEF12/ASXL2/ATP1A1/ATP2B3/AURKA/AURKB/AXIN2/AXL/B3GALT2/BAX/BCL11A/BCL2/BCL2L1/BCL2L12/BCL2L2/BCL3/BCL6/BCL9/BCL9L/BCLAF1/BCOR/BCORL1/BCR/BIRC6/BMP5/BMPR1A/BRD3/BRD4/BTG1/BTG2/BUB1B/CALR/CAMTA1/CARD11/CASP8/CASP9/CBFB/CCDC6/CCR4/CD209/CD22/CD70/CD74/CD79A/CD79B/CDH10/CDH11/CDK8/CDKN1A/CDKN2C/CDX2/CEP43/CEP89/CHD2/CHD4/CIC/CIITA/CLIP1/CLTC/CLTCL1/CNBD1/CNTNAP2/CNTRL/COL1A1/COL2A1/CRKL/CRLF2/CRNKL1/CRTC1/CSMD3/CTCF/CTNNA1/CTNND1/CTNND2/CUL4A/CUX1/CYLD/CYP17A1/DAXX/DCBLD1/DCC/DDB2/DDR1/DDX10/DDX5/DDX6/DGCR8/DICER1/DIS3/DNM2/DNMT1/DOT1L/DROSHA/EBF1/EED/ELL/EML4/EPHA7/EPHB1/EPHB4/EPS15/ERCC5/ERG/ERRFI1/ETV1/ETV6/EVI2A/EVI2B/EXT2/EZR/FAM135B/FAM47C/FAT1/FAT3/FAT4/FBXO11/FCGR2B/FES/FGF10/FGF12/FGF14/FGF23/FGF6/FHIT/FKBP9/FLNA/FLT1/FLT4/FOXA1/FOXL2/FOXO3/FOXP1/FUBP1/GABRA6/GAS7/GATA1/GATA2/GATA3/GATA4/GATA6/GID4/GLI1/GNA13/GNAS/GRIN2A/GRM3/GSK3B/GSTTP2/H3-3A/H3C2/H4C9/HDAC1/HERPUD1/HGF/HIF1A/HIF1A-AS3/HIP1/HLA-A/HLA-B/HLA-C/HMGA1/HNF1A/HNRNPA2B1/HOXA11/HSP90AA1/HSP90AB1/ID3/IGF1R/IKBKE/IL6ST/IL7R/INPP4B/IRF2/IRF4/IRS2/IRS4/ISX/ITGAV/ITK/JUN/KAT6A/KAT6B/KAT7/KCNJ5/KDM5A/KDM5C/KDM6A/KEL/KIAA1549/KLF4/KLHL6/KMT2A/KMT2C/KMT2D/KNL1/KNSTRN/KTN1/LARP4B/LATS1/LATS2/LHFPL6/LMNA/LMO1/LMO2/LOC101928140/LOC645967/LPP/LRIG3/LRP1B/LRRC4/LSM14A/LTK/LYN/LZTR1/MACC1/MAF/MAP2K4/MAP3K13/MAX/MB21D2/MCL1/MECOM/MED12/MEF2B/MERTK/MKNK1/MLH3/MLLT3/MSI2/MSH3/MSN/MST1R/MUC1/MUC16/MUC4/MYB/MYC/MYCL/MYH11/MYOD1/N4BP2/NAB2/NACA/NBEA/NCOA2/NCOR1/NCOR2/NDRG1/NFATC2/NFKB2/NFKBIA/NFKBIE/NIN/NOTCH3/NPIPB2/NPM1/NRG1/NSD1/NSD2/NSD3/NUMA1/NUP214/NUTM2B/OLIG2/P2RY8/PABPC1/PARP1/PARP2/PARP3/PAX3/PBRM1/PCBP1/PCM1/PDCD1/PDCD1LG2/PDE4DIP/PDK1/PER1/PIK3C2B/PIK3C2G/PIK3CB/PIK3R1/PIM1/PLCG1/PMS1/POLQ/PPARG/PPFIBP1/PPM1D/PPP2R1A/PPP6C/PRCC/PRDM1/PRDM2/PREX2/PRKACA/PRKCB/PRKCI/PRKN/PTPN13/PTPN6/PTPRB/PTPRC/PTPRD/PTPRO/PTPRT/QKI/RABEP1/RAC1/RAD21/RANBP2/RARA/RBM10/RBM15/RECQL4/REL/RGPD3/RGS7/RHOA/RICTOR/RIT1/RNF213/ROBO2/RPL10/RPL22/RPL5/RPTOR/RUNX1/RUNX1T1/SALL4/SDC4/SEPTIN9/SETBP1/SETD1B/SETDB1/SF3B1/SGK1/SH2B3/SHTN1/SIRPA/SIX1/SIX2/SLC34A2/SMAD2/SMAD3/SMARCE1/SMC1A/SNCAIP/SNX29/SOCS1/SOX2/SOX9/SPECC1/SPEN/SPOP/SRC/SRSF2/SS18/SSX1/STAG1/STAG2/STAT5B/STAT6/STIL/SYK/TAL2/TAP1/TAP2/TBL1XR1/TBX3/TCEA1/TCF3/TCF7L2/TEC/TEK/TENT5C/TERT/TET1/TET2/TFE3/TFG/THRAP3/TIPARP/TMPRSS2/TNC/TNFAIP3/TNFRSF14/TNFRSF17/TP63/TPR/TRAF7/TRIM24/TRIM33/TRIP11/TRRAP/TSHR/TYRO3/U2AF1/UBR5/USP6/USP8/VEGFA/WAS/WNK2/XPO1/ZEB1/ZFHX3/ZMYM2/ZMYM3/ZNF217/ZNF479/ZNF703/ZNRF3/ZRSR2,MET,TERT,ALK/ATM/B2M/CCND1/CCND2/CCND3/CCNE1/CD274/CDK4/CDKN2A/CDKN2B/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MDM2/MDM4/MET/MYCN/NF2/PTEN/RB1/SMARCB1,ALK/BRAF/FGFR1/FGFR2/FGFR3/MET/NTRK1/NTRK2/NTRK3/PDGFRA/RET/ROS1,BCL2L11,ABCB1/ABCG2/ACYP2/CASP7/CDA/CEP72/CYP19A1/CYP2B6/CYP2D6/CYP3A4/DHFR/DPYD/ERCC1/ERCC2/GSTP1/HAS3/MTHFR/MUTYH/NQO1/NUDT15/PNPLA3/RRM1/SLCO1B1/SLCO1B3/SOD2/TP53/TPMT/TYMS/UGT1A1/XRCC1,ALK/APC/ASXL1/ATM/ATRX/BAP1/BLM/BMPR1A/BRAF/BRCA1/BRCA2/BRIP1/CBL/CDC73/CDH1/CDK4/CDKN1B/CDKN2A/CEBPA/CHEK2/CREBBP/DICER1/DNMT3A/EGFR/EP300/EPCAM/ERCC3/ERCC4/ETV6/FANCA/FANCC/FANCD2/FANCE/FANCG/FANCL/FANCM/FAS/FH/FLCN/GATA2/HRAS/IKZF1/KRAS/LZTR1/MAP2K1/MAP2K2/MAPK1/MEN1/MET/MITF/MLH1/MSH2/MSH3/MSH6/MUTYH/NBN/NF1/NF2/NKX2-1/NRAS/PALB2/PAX5/PMS2/POLD1/POLE/PRKAR1A/PTCH1/PTEN/PTPN11/RAD51/RAD51C/RAD51D/RAF1/RB1/RET/RNF43/RUNX1/SDHA/SDHB/SDHC/SDHD/SMAD4/SMARCA4/SMARCB1/STAT3/STK11/SUFU/TERC/TP53/TSC1/TSC2/VHL/WRN/WT1/XRCC2,MSH2/MSH6/MLH1/PMS2/EPCAM,ATM/ATR/BARD1/BRCA1/BRCA2/BRIP1/CDK12/CHEK1/CHEK2/FANCA/FANCL/MLH1/NBN/PALB2/RAD51B/RAD51C/RAD51D/RAD54L,ARID1A/FANCM/HDAC2/POLD1/PTEN/RAD50/PPP2R2A/ATR/ATRX/BAP1/BLM/FANCA/FANCC/FANCD2/FANCE/FANCG/MRE11/NBN/RAD51/XRCC2/ARID1B/ARID2/EMSY/RAD52/WRN,CCND1/ALK/APC/ARID1A/ATM/ATR/AXIN1/B2M/BAP1/BRCA1/BRCA2/BRIP1/CD274/CDK12/CDKN2A/CDKN2B/CHEK1/CHEK2/CTNNB1/EGFR/EPHA3/ERCC4/FGF19/FGF3/FGF4/JAK1/JAK2/KEAP1/MDM2/MDM4/MRE11/MTAP/NBN/NF1/NFE2L2/NOTCH1/NOTCH2/NTRK3/PALB2/POLD1/POLE/PTEN/RAD50/RAD51C/RAD51D/RNF43/SETD2/SMO/STK11 -LZ102,lung_17_gene,肺癌17基因检测(组织版),肺癌17基因检测报告(组织版),addTarget;addFusion;addCnv,ST21,17,ALK/AKT1/BRAF/EGFR/ERBB2/KRAS/MAP2K1/MET/PIK3CA/PTEN/NTRK1/NTRK2/NTRK3/NRAS,MET,NA,EGFR/ERBB2/MET,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,NA,NA,NA,NA,NA,NA -LZ111,lung_17_gene,肺癌17基因检测(血液版),肺癌17基因检测报告(血液版),addTarget;addFusion;addCnv,ST21,17,ALK/AKT1/BRAF/EGFR/ERBB2/KRAS/MAP2K1/MET/PIK3CA/PTEN/NTRK1/NTRK2/NTRK3/NRAS,MET,NA,EGFR/ERBB2/MET,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,NA,NA,NA,NA,NA,NA +LZ102,lung_17_gene,肺癌17基因检测(组织版),肺癌17基因检测报告(组织版),addTarget;addFusion;addCnv,ST21,17,ALK/AKT1/BRAF/EGFR/ERBB2/KRAS/MAP2K1/MET/PIK3CA/PTEN/NTRK1/NTRK2/NTRK3/NRAS/RET/ROS1/BCL2L11,MET,NA,ALK/EGFR/ERBB2/MET,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,NA,NA,NA,NA,NA,NA +LZ111,lung_17_gene,肺癌17基因检测(血液版),肺癌17基因检测报告(血液版),addTarget;addFusion;addCnv,ST21,17,ALK/AKT1/BRAF/EGFR/ERBB2/KRAS/MAP2K1/MET/PIK3CA/PTEN/NTRK1/NTRK2/NTRK3/NRAS/RET/ROS1/BCL2L11,MET,NA,ALK/EGFR/ERBB2/MET,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,NA,NA,NA,NA,NA,NA LZ135,lung_85_gene,肺癌85基因检测(组织版),肺癌85基因检测报告(组织版),addTarget;addFusion;addCnv;addMsi;addMmr;addChemo,ST21,160,AKT1/ALK/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/DDR2/EGFR/ERBB2/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/HRAS/IDH1/KIT/KRAS/MET/MTOR/MYC/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RET/ROS1/TP53/TSC1/TSC2/VHL/STK11/APC/ARID1A/CTNNB1/EPCAM/ERBB3/MAP2K1/MLH1/MSH2/MSH6/PMS2/RB1/SMAD4/SMO,MET,NA,ALK/CCND1/CCND2/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/MET/MYC/SMO,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3,NA,MSH2/MSH6/MLH1/PMS2/EPCAM,NA,NA,NA LZ136,lung_85_gene,肺癌85基因检测(血液版),肺癌85基因检测报告(血液版),addTarget;addFusion;addCnv;addMmr;addChemo,ST21,160,AKT1/ALK/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/DDR2/EGFR/ERBB2/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/HRAS/IDH1/KIT/KRAS/MET/MTOR/MYC/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RET/ROS1/TP53/TSC1/TSC2/VHL/STK11/APC/ARID1A/CTNNB1/EPCAM/ERBB3/MAP2K1/MLH1/MSH2/MSH6/PMS2/RB1/SMAD4/SMO,MET,NA,ALK/CCND1/CCND2/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/MET/MYC/SMO,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,BCL2L11,ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3,NA,MSH2/MSH6/MLH1/PMS2/EPCAM,NA,NA,NA LZ137,Colorectal_88_gene,结直肠癌88基因检测(组织版),结直肠癌88基因检测报告(组织版),addTarget;addFusion;addCnv;addMsi;addMmr;addChemo;addHcs,ST14,160,AKT1/ALK/APC/ARID1A/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND2/CCND3/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/EGFR/EPCAM/ERBB2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/IDH1/KDR/KIT/KRAS/MAP2K1/MET/MLH1/MSH2/MSH6/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PMS2/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/TP53/TSC1/TSC2/VHL/POLD1/POLE/STK11/MUTYH/SDHA/SDHB/SDHC/SDHD,MET,NA,ALK/CCND2/CCND3/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/PTEN/RB1,ALK/NTRK1/NTRK2/NTRK3/RET/ROS1,NA,ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3,MLH1/MSH2/MSH6/PMS2/EPCAM/TP53/APC/MUTYH/SMAD4/STK11/PTEN,MSH2/MSH6/MLH1/PMS2/EPCAM,NA,NA,NA diff --git a/pipeline.wdl b/pipeline.wdl index 5ee794d..7f92fdc 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -1,3 +1,4 @@ + import "./wdl/qc.wdl" import "./wdl/alignment.wdl" import "./wdl/call_mutation.wdl" @@ -199,6 +200,7 @@ workflow pipeline { msi=call_msi.msi_txt, hereditary=call_hereditary.hereditary_txt, chemo=call_chemo.chemo_res, + neoantigen=call_neoantigen.neoantigen_txt, pollution=call_pollution.pollution_res, name=tumor, normal=normal, diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl index 10ae946..2658d98 100755 --- a/wdl/call_mutation.wdl +++ b/wdl/call_mutation.wdl @@ -1,3 +1,4 @@ + task mutation_calling_umi { String name String output_dir diff --git a/wdl/chemo.wdl b/wdl/chemo.wdl index 84df779..d5598b9 100755 --- a/wdl/chemo.wdl +++ b/wdl/chemo.wdl @@ -1,3 +1,4 @@ + task run_chemo { String name String output_dir @@ -10,11 +11,12 @@ task run_chemo { if [ ! -d ${output_dir}/chemo ];then mkdir ${output_dir}/chemo fi + chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project} >>> output { - String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt" + String run_chemo_res = "${output_dir}/chemo/${name}.drug.res.txt" } } diff --git a/wdl/cnv.wdl b/wdl/cnv.wdl index decb170..9e145c4 100755 --- a/wdl/cnv.wdl +++ b/wdl/cnv.wdl @@ -1,3 +1,4 @@ + task cnv_single { String name String output_dir diff --git a/wdl/hereditary.wdl b/wdl/hereditary.wdl index 77d6d76..fdc0926 100755 --- a/wdl/hereditary.wdl +++ b/wdl/hereditary.wdl @@ -1,3 +1,4 @@ + task run_hereditary { String name String output_dir @@ -8,6 +9,7 @@ task run_hereditary { if [ ! -d ${output_dir}/hereditary ];then mkdir ${output_dir}/hereditary fi + hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary >>> diff --git a/wdl/msi.wdl b/wdl/msi.wdl index 161f9aa..3b31417 100755 --- a/wdl/msi.wdl +++ b/wdl/msi.wdl @@ -1,3 +1,4 @@ + task msi_single { String name String bed @@ -18,7 +19,7 @@ task msi_single { >>> output { - String msi_txt = "${output_dir}/msi/${name}.msi.txt" + String run_msi_txt = "${output_dir}/msi/${name}.msi.txt" } } @@ -44,7 +45,7 @@ task msi_paired { >>> output { - String msi_txt = "${output_dir}/msi/${name}.msi.txt" + String run_msi_txt = "${output_dir}/msi/${name}.msi.txt" } } diff --git a/wdl/neoantigen.wdl b/wdl/neoantigen.wdl index 2d86796..d8f01ef 100755 --- a/wdl/neoantigen.wdl +++ b/wdl/neoantigen.wdl @@ -1,3 +1,4 @@ + task run_neoantigen { String tumor String? normal @@ -12,7 +13,7 @@ task run_neoantigen { command <<< if [ ! -d ${output_dir}/neoantigen/hla ];then - mkdir ${output_dir}/neoantigen/hla + mkdir -p ${output_dir}/neoantigen/hla fi razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \ @@ -145,7 +146,7 @@ task run_neoantigen { /data dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv - # perl ${output_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length + netchop.pl ${output_dir} ${tumor} >>> } @@ -178,4 +179,8 @@ workflow call_neoantigen { sample_type=if umi then 'c' else 't' } } + + output { + String neoantigen_txt = "${output_dir}neoantigen/MHC_Class_I/neoantigen.txt" + } } \ No newline at end of file diff --git a/wdl/pollution.wdl b/wdl/pollution.wdl index 5ab38a3..a52bd8f 100755 --- a/wdl/pollution.wdl +++ b/wdl/pollution.wdl @@ -1,3 +1,4 @@ + task run_pollution { String name String output_dir @@ -20,7 +21,7 @@ task run_pollution { >>> output { - String pollution_res = "${output_dir}/pollution/${name}_pollution.csv" + String run_pollution_res = "${output_dir}/pollution/${name}_pollution.csv" } } diff --git a/wdl/postprocess.wdl b/wdl/postprocess.wdl index e458b91..c19816e 100755 --- a/wdl/postprocess.wdl +++ b/wdl/postprocess.wdl @@ -1,3 +1,4 @@ + task run_post { String? mutation String? fusion @@ -5,6 +6,7 @@ task run_post { String? msi String? hereditary String? chemo + String? neoantigen String? pollution String name String? normal @@ -22,7 +24,7 @@ task run_post { >>> output { - String merged = "${output_dir}/report/${name}.merged_file.xlsx" + String run_merged = "${output_dir}/report/${name}.merged_file.xlsx" } } @@ -37,6 +39,7 @@ workflow call_postprocess { String? hereditary String? pollution String? chemo + String? neoantigen String name String? normal String output_dir @@ -52,6 +55,7 @@ workflow call_postprocess { msi=msi, hereditary=hereditary, chemo=chemo, + neoantigen=neoantigen, pollution=pollution, name=name, normal=normal, diff --git a/wdl/qc.wdl b/wdl/qc.wdl index 6f019fc..13f4465 100755 --- a/wdl/qc.wdl +++ b/wdl/qc.wdl @@ -1,3 +1,4 @@ + task runqc { String name String input_dir diff --git a/wdl/statistics.wdl b/wdl/statistics.wdl index f52dd1d..0199cd0 100755 --- a/wdl/statistics.wdl +++ b/wdl/statistics.wdl @@ -1,3 +1,4 @@ + task run_statistics { String name String output_dir diff --git a/wdl/tmb.wdl b/wdl/tmb.wdl index aa4e630..a0fe4dd 100755 --- a/wdl/tmb.wdl +++ b/wdl/tmb.wdl @@ -1,3 +1,4 @@ + task run_tmb { String name String file @@ -19,7 +20,7 @@ task run_tmb { >>> output { - String tmb_txt = "${output_dir}/tmb/${name}.tmb.txt" + String run_tmb_txt = "${output_dir}/tmb/${name}.tmb.txt" } } @@ -50,7 +51,7 @@ workflow call_tmb { name=name, file=file, project=project, - sample_type='c', + sample_type='t', output_dir=output_dir }