流程更新

master
chaopower 2023-12-25 14:06:30 +08:00
parent c9f525b2bf
commit bd66bc6612
24 changed files with 254 additions and 162 deletions

View File

@ -1,4 +1,5 @@
#! /usr/bin/env python3
import argparse
import logging
import os

View File

@ -1,114 +0,0 @@
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import os
import re
import sys
import pandas as pd
if len(sys.argv) != 3:
print(" ".join(['usage:python3', sys.argv[0], 'output_dir', 'name']))
sys.exit()
output_dir = sys.argv[1]
name = sys.argv[2]
snv_file = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.txt')
snv_file_new = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.dedup.txt')
fusion_file = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.txt')
fusion_file_new = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.dedup.txt')
cnv_file = os.path.join(output_dir, 'cnv', f'{name}.rmdup.cns.filter.pos.txt')
cnv_file_new = os.path.join(output_dir, 'cnvkit', f'{name}.rmdup.cns.filter.pos.dedup.txt')
# gm_snv_file = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.txt')
# gm_snv_file_new = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.dedup.txt')
open(snv_file_new, "w")
open(fusion_file_new, "w")
open(cnv_file_new, "w")
# open(gm_snv_file_new, "w")
##Evidence_Source_C及标签排序
df_mapping_1 = pd.DataFrame({
'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'],
})
sort_mapping_1 = df_mapping_1.reset_index().set_index('Evidence_Source_C')
df_mapping_2 = pd.DataFrame({'标签': ['适应症', '非适应症', '.']})
sort_mapping_2 = df_mapping_2.reset_index().set_index('标签')
##snvindel处理
snv_size = os.path.getsize(snv_file)
if snv_size > 0:
data = pd.read_table(snv_file, sep="\t")
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
data['level2'] = data['标签'].map(sort_mapping_2['index'])
data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True)
data.drop(['level1', 'level2'], axis=1, inplace=True)
info = {}
for index, row in data.iterrows():
if re.search(r'敏感', row['Response_Type_C']):
if row['标签'] == '适应症':
info[row['AAChange.refGene'] + row['Drug']] = '1'
else:
if (row['AAChange.refGene'] + row['Drug']) in info.keys():
data.drop([index], inplace=True)
data.insert(0, '可信', 1)
data.to_csv(snv_file_new, index=False, sep='\t')
# ##germline snv/indel处理
# gm_snv_size = os.path.getsize(gm_snv_file)
# if gm_snv_size > 0:
# data = pd.read_table(gm_snv_file, sep="\t")
# data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
# data['level2'] = data['标签'].map(sort_mapping_2['index'])
# data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True)
# data.drop(['level1', 'level2'], axis=1, inplace=True)
# info = {}
# for index, row in data.iterrows():
# if re.search(r'敏感', row['Response_Type_C']):
# if row['标签'] == '适应症':
# info[row['AAChange.refGene'] + row['Drug']] = '1'
# else:
# if (row['AAChange.refGene'] + row['Drug']) in info.keys():
# data.drop([index], inplace=True)
# data.insert(0, '可信', 1)
# data.to_csv(gm_snv_file_new, index=False, sep='\t')
##fusion处理
fusion_size = os.path.getsize(fusion_file)
if fusion_size > 0:
data = pd.read_table(fusion_file, sep="\t")
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
data['level2'] = data['标签'].map(sort_mapping_2['index'])
data.sort_values(by=['FUSION', 'level2', 'level1'], ascending=True, inplace=True)
data.drop(['level1', 'level2'], axis=1, inplace=True)
info = {}
for index, row in data.iterrows():
if re.search(r'敏感', row['Response_Type_C']):
if row['标签'] == '适应症':
info[row['FUSION'] + row['Drug']] = '1'
else:
if (row['FUSION'] + row['Drug']) in info.keys():
data.drop([index], inplace=True)
data.insert(0, '可信', 1)
data.to_csv(fusion_file_new, index=False, sep='\t')
##cnv处理
cnv_size = os.path.getsize(cnv_file)
if cnv_size > 0:
data = pd.read_table(cnv_file, sep="\t")
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
data['level2'] = data['标签'].map(sort_mapping_2['index'])
data.sort_values(by=['Gene_Symbol', 'level2', 'level1'], ascending=True, inplace=True)
data.drop(['level1', 'level2'], axis=1, inplace=True)
info = {}
for index, row in data.iterrows():
if re.search(r'敏感', row['Response_Type_C']):
if row['标签'] == '适应症':
info[row['Gene_Symbol'] + row['Drug']] = '1'
else:
if (row['Gene_Symbol'] + row['Drug']) in info.keys():
data.drop([index], inplace=True)
data.insert(0, '可信', 1)
data.to_csv(cnv_file_new, index=False, sep='\t')

View File

@ -30,14 +30,15 @@ while (<IN>) {
}
my @line = split(/\t/);
$line[7] =~ /Gene.refGene=(.*?);/;
if ((grep {$1 =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
if ($1 eq "BCL2L11") {
my $gene = $1;
if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
if ($gene eq "BCL2L11") {
if ($line[1] == '111883194') {
print LONGINDEL join("\n", @pos) . "\n";
print LONGINDEL $_;
}
}
else {
print LONGINDEL join("\n", @pos) . "\n";
print LONGINDEL $_;
}
}
}

View File

@ -1,4 +1,5 @@
#!/usr/bin/env perl
use strict;
#use warnings;
use List::Util qw(sum);
@ -131,12 +132,12 @@ while (<IN>) {
$line[9] = join(":", ($gene, $hgvs));
}
elsif ($spl =~ /\d+\-[1|2]\D+/) {
my $intron = $exon-1;
my $intron = $exon - 1;
$hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
$line[9] = join(":", ($gene, $hgvs));
}
elsif ($gene eq "MET") {
$line[9] = join(":", ($gene, "exon14", "c.xxx"));
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
$line[8] = 'skipping'
}
else {

78
codes/netchop.pl 100755
View File

@ -0,0 +1,78 @@
#!/usr/bin/env perl
use strict;
use warnings;
use List::Util qw(min max);
#max_length:最大的epitope长度
die "usage:perl $0 outputDir tumor_prefix" if @ARGV != 2;
my ($outputDir, $tumor_prefix) = @ARGV;
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta";
my %fa;
while (<IN>) {
if (/^>MT/) {
open OUT, ">$outputDir/neoantigen/MHC_Class_I/tmp.fa";
print OUT;
$_ =~ /MT\.(\d+)\./;
my $id = $1;
my $seq = <IN>;
print OUT $seq;
chomp $seq;
$fa{$id} = $seq;
system "predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
close OUT;
}
}
unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa";
my %score;
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
while (<IN>) {
next unless /^\d+/;
chomp;
my @line = split;
$line[3] =~ /MT\.(\d+)\./;
$score{$1}{$line[0]} = $line[2];
}
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv";
open OUT, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt";
open OUT2, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
#my %neopt;
my $head = <IN>;
chomp $head;
print OUT "$head\tcleavage_score\n";
while (<IN>) {
chomp;
my @line = split(/\t/);
$line[44] =~ /^(\d+)\./;
my $id = $1;
my $pep = $line[18];
if (exists $fa{$id}) {
my $index = index($fa{$id}, $pep) + length($pep);
my $cleavage_score = $score{$id}{$index};
print OUT "$_\t$cleavage_score\n";
if ($line[21] <= 5000 and ($line[23] eq "NA" or $line[23] >= 1)) {
print OUT2 "$_\t$cleavage_score\n";
}
}
else {
print OUT "$_\tNA\n";
}
}
system "sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
open SORT, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
open OUT3, ">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt";
print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n";
my %pep;
my $bool = 0;
while (<SORT>) {
chomp;
my @line = split(/\t/);
if (not exists $pep{$line[18]}) {
$pep{$line[18]}++;
$bool += 1;
print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n";
}
}

View File

@ -110,7 +110,7 @@ def process_judge_vcf(input_vcf, output_vcf):
if not line.startswith("#"):
fields = line.strip().split('\t')
info = fields[9].split(":")
percentage = float(info[4])
percentage = float(info[6])
if 0.1 <= percentage <= 0.9:
b = 0.5
@ -135,6 +135,9 @@ def merge_and_sort_files(matched_file, unmatched_file, output_file):
return output_file
# 如果 unmatched_file 不为空,继续合并和排序操作
if os.stat(matched_file).st_size == 0:
matched_df = pd.DataFrame()
else:
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
@ -195,7 +198,7 @@ def select_cnvkit_vcf(vcf, bed, output_file):
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
line.split()[3]) < 2 and len(line.split()[4]) < 2]
for line in filtered_lines:
p_value_str = line.split()[9].split(":")[4]
p_value_str = line.split()[9].split(":")[6]
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
if 0.1 <= p_value <= 0.9:
result_data.append(line)
@ -218,7 +221,7 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed,
# 处理胚系根据bed筛选
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
Germline_matched_file, Germline_unmatched_file = select_position(germline_vcf, ref_bed,
germline_matched_file, germline_unmatched_file = select_position(germline_vcf, ref_bed,
select_position_output_file3,
select_position_output_file4)
# 处理体系,数值转换
@ -226,14 +229,14 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed,
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
# 处理胚系,数值转换
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
germline_matched_add_judge_file = process_judge_vcf(Germline_matched_file, process_judge_vcf_file2)
germline_matched_add_judge_file = process_judge_vcf(germline_matched_file, process_judge_vcf_file2)
# 合并体系将匹配到的和未匹配到bed的的合并
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
merge_and_sort_files_file1)
# 合并胚系将匹配到的和未匹配到bed的的合并
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, Germline_unmatched_file,
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, germline_unmatched_file,
merge_and_sort_files_file2)
# 合并胚系,体系,将体系,胚系两个合并文件再合并
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')

View File

@ -75,7 +75,7 @@ class PostProcess:
def txt_2_excel(path):
try:
df = pd.read_csv(path, sep='\t')
except pd.errors.EmptyDataError:
except (pd.errors.EmptyDataError, FileNotFoundError):
return []
return df.to_dict('records')
@ -179,6 +179,8 @@ class PostProcess:
filter_neg = os.path.join(self.path, 'mutation',
f'{self.sample_name}.snp_indel.somatic.hg19_multianno.filter.sum.neg.txt')
tmb_file = os.path.join(self.path, 'tmb', f'{self.sample_name}.tmb.txt')
filter_sum_pos_res = list()
# 从pos_files中获取药物信息
pos_check = check_file_exist_and_empty(filter_pos)
@ -219,7 +221,7 @@ class PostProcess:
neg['AMP_mut_level'] = 'IIII'
neg_dict = neg.set_index(['Chr', 'Start', 'End'])['AMP_mut_level'].to_dict()
filter_sum_res = list()
filter_sum_df = pd.DataFrame()
filter_sum_check = check_file_exist_and_empty(filter_sum)
if not filter_sum_check:
filter_sum_df = pd.read_csv(filter_sum, sep='\t')
@ -228,6 +230,31 @@ class PostProcess:
level_dict.update(vus_dict)
level_dict.update(neg_dict)
filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['Chr', 'Start', 'End']).index.map(level_dict)
cols = list(filter_sum_df.columns)
tmb_file_check = check_file_exist_and_empty(tmb_file)
if not tmb_file_check:
tmb_df = pd.read_csv(tmb_file, sep='\t')
key_cols = ['Chr', 'Start', 'End']
filter_sum_df = filter_sum_df.set_index(key_cols)
tmb_df = tmb_df.set_index(key_cols)
# 在filter_sum_df中的process列中追加字符串";tmb"对应tmb_df中的行 并且 是非 12类突变
filter_sum_df['process'] = filter_sum_df.index.map(
lambda x: filter_sum_df.at[x, 'process'] + ';tmb' if x in tmb_df.index and filter_sum_df.at[
x, 'AMP_mut_level'] not in ['I', 'II'] else filter_sum_df.at[x, 'process'])
# 找到tmb_df中不在filter_sum_df中的行并将这些新的行添加到filter_sum_df中
new_rows = tmb_df[~tmb_df.index.isin(filter_sum_df.index)]
filter_sum_df = pd.concat([filter_sum_df, new_rows])
# 重置索引
filter_sum_df = filter_sum_df.reset_index()
# 按之前列排
filter_sum_df = filter_sum_df[cols]
filter_sum_df = filter_sum_df.fillna('.')
filter_sum_res = filter_sum_df.to_dict('records')
@ -429,11 +456,62 @@ class PostProcess:
print(file_check)
def longindel(self):
longindel_files = glob.glob(
os.path.join(self.path, 'fusion', '*.longindel.pos.txt'))
if longindel_files:
# return self.txt_2_excel(longindel_files[0])
self.sheet['longindel'] = self.txt_2_excel(longindel_files[0])
filter_sum_pos = os.path.join(self.path, 'fusion',
f'{self.sample_name}.longindel.hg19_multianno.filter.pos.txt')
filter_sum_pos_check = check_file_exist_and_empty(filter_sum_pos)
filter_sum_pos_res = list()
pos_dict = dict()
if not filter_sum_pos_check:
filter_sum_pos_df = pd.read_csv(filter_sum_pos, sep='\t')
# 添加常规列
filter_sum_pos_df = self._add_columns(filter_sum_pos_df)
# 添加基因功能
filter_sum_pos_df = self._add_gene_function(filter_sum_pos_df, colname='ref_gene')
# 药物处理
self.drug_parse(filter_sum_pos_df['DrugCn'].to_list())
filter_sum_pos_df['Validated'] = 1
filter_sum_pos_df = filter_sum_pos_df.fillna('.')
grouped_df = filter_sum_pos_df.groupby(['#CHROM', 'POS', 'REF', 'ALT'])
# 对每个分组进行操作
for group_name, group_data in grouped_df:
chr, pos, ref, alt = group_name
if any(group_data['AMP_mut_level'] == 'I'):
filter_condition = (filter_sum_pos_df['#CHROM'] == chr) & \
(filter_sum_pos_df['POS'] == pos) & \
(filter_sum_pos_df['REF'] == ref) & \
(filter_sum_pos_df['ALT'] == alt)
filter_sum_pos_df.loc[filter_condition, 'AMP_mut_level'] = 'I'
pos_dict = filter_sum_pos_df.set_index(['#CHROM', 'POS', 'REF', 'ALT'])['AMP_mut_level'].to_dict()
filter_sum_pos_res = filter_sum_pos_df.to_dict('records')
filter_sum = os.path.join(self.path, 'fusion', f'{self.sample_name}.longindel.hg19_multianno.filter.txt')
filter_sum_check = check_file_exist_and_empty(filter_sum)
filter_sum_res = list()
if not filter_sum_check:
filter_sum_df = pd.read_csv(filter_sum, sep='\t')
filter_sum_df['Validated'] = 1
level_dict = dict()
level_dict.update(pos_dict)
filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['#CHROM', 'POS', 'REF', 'ALT']).index.map(
level_dict)
filter_sum_df = filter_sum_df.fillna('.')
filter_sum_res = filter_sum_df.to_dict('records')
self.sheet['longindel_mut'] = filter_sum_res
self.sheet['longindel_drug'] = filter_sum_pos_res
def neoantigen(self):
neoantigen = os.path.join(self.path, 'neoantigen', f'MHC_Class_I', 'neoantigen.txt')
hla = os.path.join(self.path, 'neoantigen', f'hla', f'{self.normal_name}_result.tsv')
self.sheet['neoantigen'] = self.txt_2_excel(neoantigen)
self.sheet['hla'] = self.txt_2_excel(hla)
def qc(self):
qc_files = glob.glob(os.path.join(self.path, 'qc', '*_qc.txt'))
@ -452,17 +530,18 @@ class PostProcess:
def collect(self):
writer = pd.ExcelWriter(self.outpath)
self.cms()
self.qc()
self.snv()
self.fusion()
self.longindel()
self.cnv()
self.msi()
self.germline()
self.heredity()
self.heredity_res()
self.longindel()
self.chemo()
self.indication()
self.qc()
self.neoantigen()
self.drugs()
# 遍历CSV文件列表

View File

@ -2,6 +2,7 @@
import argparse
import os
from datetime import datetime
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
@ -42,13 +43,15 @@ if __name__ == '__main__':
if not os.path.exists(res_path):
os.makedirs(res_path)
logname = datetime.now().strftime("%m%d%H%M")
cmd = f'nohup python ' \
f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \
f'{"-u " if args.umi else ""} -i {args.input_dir} ' \
f'-node {args.start_node} ' \
f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \
f'> {res_path}/{args.barcode}_run.log ' \
f'2>> {res_path}/{args.barcode}_run.log &'
f'> {res_path}/{args.barcode}_{logname}_run.log ' \
f'2>> {res_path}/{args.barcode}_{logname}_run.log &'
# with open(os.path.join(res_path, 'exec'), 'w') as execfile:
# execfile.write(cmd + '\n')
os.system(cmd)

View File

@ -3,6 +3,7 @@ import json
import os
import subprocess
import time
from datetime import datetime
import pandas as pd
@ -80,7 +81,8 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
arg = {key: value for key, value in arg.items() if value not in (None, '', False)}
# generate json
jsfile_path = os.path.join(output_dir, f'{barcode}.json')
logname = datetime.now().strftime("%m%d%H%M")
jsfile_path = os.path.join(output_dir, f'{barcode}_{logname}.json')
with open(jsfile_path, 'w') as jsfile:
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
@ -127,7 +129,7 @@ if __name__ == '__main__':
parser.add_argument('-p', '--project', help="project", required=True)
parser.add_argument('-c', '--cancer', help="cancer", required=True)
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl')
parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl')
parser.add_argument('-node', '--start_node',
help="node begain to run; 'addQc', 'addAlignment', "
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"

View File

@ -6,10 +6,10 @@ die "useage:perl $0 input output cancer_type" unless @ARGV == 3;
my ($input, $output, $cancer_type) = @ARGV;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public";
print "Fusion药物注释使用public路径$public_path\n";
print "Longindel药物注释使用public路径$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Fusion药物注释使用路径:$database_path\n";
print "Longindel药物注释使用路径:$database_path\n";
open MUT, "$database_path/fusion.csv";
<MUT>;
@ -26,7 +26,8 @@ my %therapy;
while (<THERAPY>) {
chomp;
my @line = split("\t");
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
# push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
}
##药物翻译信息
@ -106,23 +107,38 @@ while (<IN>) {
my @splitline = split(/\t/);
my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7];
my $gene;
if ($_ =~ /Gene\.refGene=([^;]+)/) {
$gene = $1;
}
if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) {
print "$freq\n";
foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) {
my @line = split("\t", $entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) {
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
}
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) {
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
}
elsif ($line[2] =~ /$cancer_type|solid tumor/i) {
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
}
# my @line = split("\t", $entry);
# if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) {
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
# }
# elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) {
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
# }
# elsif ($line[2] =~ /$cancer_type|solid tumor/i) {
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
# }
# else {
# print "未匹配到"
# }
}
}

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,4 @@
import "./wdl/qc.wdl"
import "./wdl/alignment.wdl"
import "./wdl/call_mutation.wdl"
@ -199,6 +200,7 @@ workflow pipeline {
msi=call_msi.msi_txt,
hereditary=call_hereditary.hereditary_txt,
chemo=call_chemo.chemo_res,
neoantigen=call_neoantigen.neoantigen_txt,
pollution=call_pollution.pollution_res,
name=tumor,
normal=normal,

View File

@ -1,3 +1,4 @@
task mutation_calling_umi {
String name
String output_dir

View File

@ -1,3 +1,4 @@
task run_chemo {
String name
String output_dir
@ -10,11 +11,12 @@ task run_chemo {
if [ ! -d ${output_dir}/chemo ];then
mkdir ${output_dir}/chemo
fi
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
>>>
output {
String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
String run_chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
}
}

View File

@ -1,3 +1,4 @@
task cnv_single {
String name
String output_dir

View File

@ -1,3 +1,4 @@
task run_hereditary {
String name
String output_dir
@ -8,6 +9,7 @@ task run_hereditary {
if [ ! -d ${output_dir}/hereditary ];then
mkdir ${output_dir}/hereditary
fi
hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
>>>

View File

@ -1,3 +1,4 @@
task msi_single {
String name
String bed
@ -18,7 +19,7 @@ task msi_single {
>>>
output {
String msi_txt = "${output_dir}/msi/${name}.msi.txt"
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
}
}
@ -44,7 +45,7 @@ task msi_paired {
>>>
output {
String msi_txt = "${output_dir}/msi/${name}.msi.txt"
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
}
}

View File

@ -1,3 +1,4 @@
task run_neoantigen {
String tumor
String? normal
@ -12,7 +13,7 @@ task run_neoantigen {
command <<<
if [ ! -d ${output_dir}/neoantigen/hla ];then
mkdir ${output_dir}/neoantigen/hla
mkdir -p ${output_dir}/neoantigen/hla
fi
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
@ -145,7 +146,7 @@ task run_neoantigen {
/data
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
# perl ${output_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length
netchop.pl ${output_dir} ${tumor}
>>>
}
@ -178,4 +179,8 @@ workflow call_neoantigen {
sample_type=if umi then 'c' else 't'
}
}
output {
String neoantigen_txt = "${output_dir}neoantigen/MHC_Class_I/neoantigen.txt"
}
}

View File

@ -1,3 +1,4 @@
task run_pollution {
String name
String output_dir
@ -20,7 +21,7 @@ task run_pollution {
>>>
output {
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
String run_pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
}
}

View File

@ -1,3 +1,4 @@
task run_post {
String? mutation
String? fusion
@ -5,6 +6,7 @@ task run_post {
String? msi
String? hereditary
String? chemo
String? neoantigen
String? pollution
String name
String? normal
@ -22,7 +24,7 @@ task run_post {
>>>
output {
String merged = "${output_dir}/report/${name}.merged_file.xlsx"
String run_merged = "${output_dir}/report/${name}.merged_file.xlsx"
}
}
@ -37,6 +39,7 @@ workflow call_postprocess {
String? hereditary
String? pollution
String? chemo
String? neoantigen
String name
String? normal
String output_dir
@ -52,6 +55,7 @@ workflow call_postprocess {
msi=msi,
hereditary=hereditary,
chemo=chemo,
neoantigen=neoantigen,
pollution=pollution,
name=name,
normal=normal,

View File

@ -1,3 +1,4 @@
task runqc {
String name
String input_dir

View File

@ -1,3 +1,4 @@
task run_statistics {
String name
String output_dir

View File

@ -1,3 +1,4 @@
task run_tmb {
String name
String file
@ -19,7 +20,7 @@ task run_tmb {
>>>
output {
String tmb_txt = "${output_dir}/tmb/${name}.tmb.txt"
String run_tmb_txt = "${output_dir}/tmb/${name}.tmb.txt"
}
}
@ -50,7 +51,7 @@ workflow call_tmb {
name=name,
file=file,
project=project,
sample_type='c',
sample_type='t',
output_dir=output_dir
}