diff --git a/codes/filter_neoantigen.pl b/codes/filter_neoantigen.pl index c5445c0..884cd33 100755 --- a/codes/filter_neoantigen.pl +++ b/codes/filter_neoantigen.pl @@ -15,8 +15,12 @@ if ($type eq "germline") { next; } elsif (/^#CHROM/) { - $_ =~ s/TUMOR/$name/; - print OUT; + $_ =~ s/(\S+)-match/NORMAL/; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT $_; next; } else { @@ -40,27 +44,38 @@ if ($type eq "germline") { elsif ($type eq "somatic") { while () { if (/^##/) { - print OUT; + print OUT $_; next; } elsif (/^#CHROM/) { - $_ =~ s/TUMOR/$name/; - print OUT; + $_ =~ s/(\S+)-match/NORMAL/; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT "##FILTER=\n"; + print OUT $_; next; } else { chomp; my @line = split("\t", $_); - next if $line[6] ne "PASS"; - + # next if $line[6] ne "PASS"; + my $filters = split(";", $line[6]); + # 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2 if ($sample_type eq 'c') { my $t_af = (split(":", $line[-1]))[6]; - print OUT "$_\n" if $t_af >= 0.02; + if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) { + print OUT "$_\n" if $t_af >= 0.02; + } + } else { my $n_af = (split(":", $line[-1]))[6]; my $t_af = (split(":", $line[-2]))[6]; - print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05); + if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) { + print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05); + } + } } diff --git a/codes/run_pipeline.py b/codes/run_pipeline.py index fe6516d..847645b 100755 --- a/codes/run_pipeline.py +++ b/codes/run_pipeline.py @@ -25,7 +25,7 @@ if __name__ == '__main__': parser.add_argument('-p', '--project', help="project", required=True) parser.add_argument('-c', '--cancer', help="cancer", required=True) parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True) - parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl') + parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl') parser.add_argument('-node', '--start_node', help="node begain to run; 'addQc', 'addAlignment', " "'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo'," diff --git a/codes/vcf_add_tag_msi.pl b/codes/vcf_add_tag_msi.pl index e346b68..6e4bfdf 100755 --- a/codes/vcf_add_tag_msi.pl +++ b/codes/vcf_add_tag_msi.pl @@ -9,12 +9,11 @@ my ($vcfin, $vcfout, $prob, $type) = @ARGV; my $typekey; if ($type eq 't') { - $typekey="tissue"; + $typekey = "tissue"; } elsif ($type eq 'c') { - $typekey="cfdna"; + $typekey = "cfdna"; } - else { die "参数输入错误 type 需为 t 或 c'':$!" } @@ -39,8 +38,14 @@ while () { next; } chomp; - $_ =~ /VD=(\d+);.*;VARBIAS=(\d+):(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/; - my ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5); + my ($vd, $msi_repeat_time, $msi_unit) = (0, 0, 0, 0, 0); + + $_ =~ /;VARBIAS=(\d+):(\d+);/; + my ($vfr, $vrr) = ($1, $2); + $_ =~ /VD=(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/; + # ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5); + ($vd, $msi_repeat_time, $msi_unit) = ($1, $2, $3); + my @line = split; my $depth = (split(":", $line[9]))[1]; next if $depth < 50; @@ -56,19 +61,23 @@ while () { } if ($msi_repeat_time >= 4) { if ($line[6] eq 'PASS') { - $line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time; + # $line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time; + $line[6] = 'MSI_u_x_r_x'; } else { - $line[6] = $line[6] . ";" . 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time; + $line[6] = $line[6] . ";" . 'MSI_u_x_r_x'; } } - if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) { - if ($line[6] eq 'PASS') { - $line[6] = 'strandBias'; - } - else { - $line[6] = $line[6] . ";" . 'strandBias'; + if (defined($vfr) and defined($vrr)) { + if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) { + if ($line[6] eq 'PASS') { + $line[6] = 'strandBias'; + } + else { + $line[6] = $line[6] . ";" . 'strandBias'; + } } } + print OUT join("\t", @line), "\n"; } diff --git a/codes/vcf_filter.py b/codes/vcf_filter.py index 1ee0dac..4cba96b 100755 --- a/codes/vcf_filter.py +++ b/codes/vcf_filter.py @@ -6,6 +6,36 @@ import sys import pysam +contig_length = """ +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +""" + + + class VCFFilter: def __init__(self, input_vcf, output_vcf, filter_expression): @@ -40,10 +70,17 @@ class VCFFilter: vcf_out = sys.stdout with pysam.VariantFile(self.input_vcf, 'r') as vcf_in: - for chrome in range(1, 23): - vcf_in.header.add_line(f'##contig=') - for chrome in ['X', 'Y', 'MT']: - vcf_in.header.add_line(f'##contig=') + # # 获取当前已存在的过滤器信息 + # existing_filters = vcf_in.header.filters + # # 添加新的过滤器 + # existing_filters.add(id='noise', number='', type='',description='noise; JM add ') + + # for chrome in range(1, 23): + # vcf_in.header.add_line(f'##contig=') + # for chrome in ['X', 'Y', 'MT']: + # vcf_in.header.add_line(f'##contig=') + for line in contig_length.strip().splitlines(): + vcf_in.header.add_line(line) header = vcf_in.header vcf_out.write(str(header)) for record in vcf_in: diff --git a/pipeline.wdl b/pipeline.wdl index 1735507..5ee794d 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -89,7 +89,8 @@ workflow pipeline { bed=bed, output_dir=workdir, project=project, - cancer=cancer + cancer=cancer, + probe=probe } call tmb.call_tmb as call_tmb { diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl index 3006665..10ae946 100755 --- a/wdl/call_mutation.wdl +++ b/wdl/call_mutation.wdl @@ -4,6 +4,7 @@ task mutation_calling_umi { String rmdup_bam String ref String bed + String probe command <<< if [ ! -d ${output_dir}/mutation ];then @@ -53,7 +54,7 @@ task mutation_calling_umi { -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf # add msi and strandbias flag - vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c + vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c # add malt flag grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \ @@ -88,6 +89,7 @@ task mutation_calling_umi_control { String output_dir String tumor_rmdup_bam String normal_rmdup_bam + String probe command <<< if [ ! -d ${output_dir}/mutation ];then @@ -148,7 +150,7 @@ task mutation_calling_umi_control { -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf # add msi and strandbias flag - vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c + vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c # add malt flag grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \ @@ -178,6 +180,7 @@ task mutation_calling_tissue { String ref String output_dir String rmdup_bam + String probe command <<< if [ ! -d ${output_dir}/mutation ];then @@ -207,7 +210,7 @@ task mutation_calling_tissue { -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf # add msi and strandbias flag - vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t + vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t cp ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf @@ -232,6 +235,7 @@ task mutation_calling_tissue_control { String output_dir String tumor_rmdup_bam String normal_rmdup_bam + String probe command <<< if [ ! -d ${output_dir}/mutation ];then @@ -258,7 +262,7 @@ task mutation_calling_tissue_control { -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf # add msi and strandbias flag - vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t + vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \ -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \ @@ -463,6 +467,7 @@ workflow call_mutation { String bed String project String cancer + String probe # pipe 执行 mutation_calling => annovar => filter if (run) { @@ -475,7 +480,8 @@ workflow call_mutation { output_dir=output_dir, ref=ref, bed=bed, - rmdup_bam=tumor_rmdup_bam + rmdup_bam=tumor_rmdup_bam, + probe=probe } call hotspot as hotspot_calling_umi { input: @@ -556,7 +562,8 @@ workflow call_mutation { output_dir=output_dir, ref=ref, bed=bed, - rmdup_bam=tumor_rmdup_bam + rmdup_bam=tumor_rmdup_bam, + probe=probe } call hotspot as hotspot_calling_tissue { input: @@ -636,7 +643,8 @@ workflow call_mutation { ref=ref, bed=bed, tumor_rmdup_bam=tumor_rmdup_bam, - normal_rmdup_bam=normal_rmdup_bam + normal_rmdup_bam=normal_rmdup_bam, + probe=probe } call hotspot as hotspot_calling_umi_control { @@ -717,7 +725,8 @@ workflow call_mutation { ref=ref, bed=bed, tumor_rmdup_bam=tumor_rmdup_bam, - normal_rmdup_bam=normal_rmdup_bam + normal_rmdup_bam=normal_rmdup_bam, + probe=probe } call hotspot as hotspot_calling_tissue_control { diff --git a/wdl/neoantigen.wdl b/wdl/neoantigen.wdl index 6ae4a9c..2d86796 100755 --- a/wdl/neoantigen.wdl +++ b/wdl/neoantigen.wdl @@ -105,13 +105,9 @@ task run_neoantigen { #step2 phasing variant - java -jar $GATK MergeVcfs \ - -I ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \ - -I ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \ - -O ${output_dir}/neoantigen/${tumor}_combined.vcf - java -Xmx16g -jar $PICARD SortVcf \ - I=${output_dir}/neoantigen/${tumor}_combined.vcf \ + I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \ + I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \ O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \ SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict