增加新抗原

master
chaopower 2023-12-21 10:22:54 +08:00
parent 90658950bd
commit c9f525b2bf
7 changed files with 109 additions and 42 deletions

View File

@ -15,8 +15,12 @@ if ($type eq "germline") {
next; next;
} }
elsif (/^#CHROM/) { elsif (/^#CHROM/) {
$_ =~ s/TUMOR/$name/; $_ =~ s/(\S+)-match/NORMAL/;
print OUT; print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next; next;
} }
else { else {
@ -40,30 +44,41 @@ if ($type eq "germline") {
elsif ($type eq "somatic") { elsif ($type eq "somatic") {
while (<IN>) { while (<IN>) {
if (/^##/) { if (/^##/) {
print OUT; print OUT $_;
next; next;
} }
elsif (/^#CHROM/) { elsif (/^#CHROM/) {
$_ =~ s/TUMOR/$name/; $_ =~ s/(\S+)-match/NORMAL/;
print OUT; print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next; next;
} }
else { else {
chomp; chomp;
my @line = split("\t", $_); my @line = split("\t", $_);
next if $line[6] ne "PASS"; # next if $line[6] ne "PASS";
my $filters = split(";", $line[6]);
# 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2
if ($sample_type eq 'c') { if ($sample_type eq 'c') {
my $t_af = (split(":", $line[-1]))[6]; my $t_af = (split(":", $line[-1]))[6];
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if $t_af >= 0.02; print OUT "$_\n" if $t_af >= 0.02;
} }
}
else { else {
my $n_af = (split(":", $line[-1]))[6]; my $n_af = (split(":", $line[-1]))[6];
my $t_af = (split(":", $line[-2]))[6]; my $t_af = (split(":", $line[-2]))[6];
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05); print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
} }
} }
}
} }
} }
else { else {

View File

@ -25,7 +25,7 @@ if __name__ == '__main__':
parser.add_argument('-p', '--project', help="project", required=True) parser.add_argument('-p', '--project', help="project", required=True)
parser.add_argument('-c', '--cancer', help="cancer", required=True) parser.add_argument('-c', '--cancer', help="cancer", required=True)
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True) parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl') parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl')
parser.add_argument('-node', '--start_node', parser.add_argument('-node', '--start_node',
help="node begain to run; 'addQc', 'addAlignment', " help="node begain to run; 'addQc', 'addAlignment', "
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo'," "'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"

View File

@ -14,7 +14,6 @@ if ($type eq 't') {
elsif ($type eq 'c') { elsif ($type eq 'c') {
$typekey = "cfdna"; $typekey = "cfdna";
} }
else { else {
die "参数输入错误 type 需为 t 或 c''$!" die "参数输入错误 type 需为 t 或 c''$!"
} }
@ -39,8 +38,14 @@ while (<IN>) {
next; next;
} }
chomp; chomp;
$_ =~ /VD=(\d+);.*;VARBIAS=(\d+):(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/; my ($vd, $msi_repeat_time, $msi_unit) = (0, 0, 0, 0, 0);
my ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
$_ =~ /;VARBIAS=(\d+):(\d+);/;
my ($vfr, $vrr) = ($1, $2);
$_ =~ /VD=(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/;
# ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
($vd, $msi_repeat_time, $msi_unit) = ($1, $2, $3);
my @line = split; my @line = split;
my $depth = (split(":", $line[9]))[1]; my $depth = (split(":", $line[9]))[1];
next if $depth < 50; next if $depth < 50;
@ -56,12 +61,14 @@ while (<IN>) {
} }
if ($msi_repeat_time >= 4) { if ($msi_repeat_time >= 4) {
if ($line[6] eq 'PASS') { if ($line[6] eq 'PASS') {
$line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time; # $line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
$line[6] = 'MSI_u_x_r_x';
} }
else { else {
$line[6] = $line[6] . ";" . 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time; $line[6] = $line[6] . ";" . 'MSI_u_x_r_x';
} }
} }
if (defined($vfr) and defined($vrr)) {
if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) { if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) {
if ($line[6] eq 'PASS') { if ($line[6] eq 'PASS') {
$line[6] = 'strandBias'; $line[6] = 'strandBias';
@ -70,5 +77,7 @@ while (<IN>) {
$line[6] = $line[6] . ";" . 'strandBias'; $line[6] = $line[6] . ";" . 'strandBias';
} }
} }
}
print OUT join("\t", @line), "\n"; print OUT join("\t", @line), "\n";
} }

View File

@ -6,6 +6,36 @@ import sys
import pysam import pysam
contig_length = """
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chrY,length=59373566>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chrM,length=16571>
"""
class VCFFilter: class VCFFilter:
def __init__(self, input_vcf, output_vcf, filter_expression): def __init__(self, input_vcf, output_vcf, filter_expression):
@ -40,10 +70,17 @@ class VCFFilter:
vcf_out = sys.stdout vcf_out = sys.stdout
with pysam.VariantFile(self.input_vcf, 'r') as vcf_in: with pysam.VariantFile(self.input_vcf, 'r') as vcf_in:
for chrome in range(1, 23): # # 获取当前已存在的过滤器信息
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>') # existing_filters = vcf_in.header.filters
for chrome in ['X', 'Y', 'MT']: # # 添加新的过滤器
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>') # existing_filters.add(id='noise', number='', type='',description='noise; JM add ')
# for chrome in range(1, 23):
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
# for chrome in ['X', 'Y', 'MT']:
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
for line in contig_length.strip().splitlines():
vcf_in.header.add_line(line)
header = vcf_in.header header = vcf_in.header
vcf_out.write(str(header)) vcf_out.write(str(header))
for record in vcf_in: for record in vcf_in:

View File

@ -89,7 +89,8 @@ workflow pipeline {
bed=bed, bed=bed,
output_dir=workdir, output_dir=workdir,
project=project, project=project,
cancer=cancer cancer=cancer,
probe=probe
} }
call tmb.call_tmb as call_tmb { call tmb.call_tmb as call_tmb {

View File

@ -4,6 +4,7 @@ task mutation_calling_umi {
String rmdup_bam String rmdup_bam
String ref String ref
String bed String bed
String probe
command <<< command <<<
if [ ! -d ${output_dir}/mutation ];then if [ ! -d ${output_dir}/mutation ];then
@ -53,7 +54,7 @@ task mutation_calling_umi {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag # add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
# add malt flag # add malt flag
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \ grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
@ -88,6 +89,7 @@ task mutation_calling_umi_control {
String output_dir String output_dir
String tumor_rmdup_bam String tumor_rmdup_bam
String normal_rmdup_bam String normal_rmdup_bam
String probe
command <<< command <<<
if [ ! -d ${output_dir}/mutation ];then if [ ! -d ${output_dir}/mutation ];then
@ -148,7 +150,7 @@ task mutation_calling_umi_control {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag # add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
# add malt flag # add malt flag
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \ grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
@ -178,6 +180,7 @@ task mutation_calling_tissue {
String ref String ref
String output_dir String output_dir
String rmdup_bam String rmdup_bam
String probe
command <<< command <<<
if [ ! -d ${output_dir}/mutation ];then if [ ! -d ${output_dir}/mutation ];then
@ -207,7 +210,7 @@ task mutation_calling_tissue {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag # add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
cp ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf cp ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf
@ -232,6 +235,7 @@ task mutation_calling_tissue_control {
String output_dir String output_dir
String tumor_rmdup_bam String tumor_rmdup_bam
String normal_rmdup_bam String normal_rmdup_bam
String probe
command <<< command <<<
if [ ! -d ${output_dir}/mutation ];then if [ ! -d ${output_dir}/mutation ];then
@ -258,7 +262,7 @@ task mutation_calling_tissue_control {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf -o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag # add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \ vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \
-o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \ -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \
@ -463,6 +467,7 @@ workflow call_mutation {
String bed String bed
String project String project
String cancer String cancer
String probe
# pipe 执行 mutation_calling => annovar => filter # pipe 执行 mutation_calling => annovar => filter
if (run) { if (run) {
@ -475,7 +480,8 @@ workflow call_mutation {
output_dir=output_dir, output_dir=output_dir,
ref=ref, ref=ref,
bed=bed, bed=bed,
rmdup_bam=tumor_rmdup_bam rmdup_bam=tumor_rmdup_bam,
probe=probe
} }
call hotspot as hotspot_calling_umi { call hotspot as hotspot_calling_umi {
input: input:
@ -556,7 +562,8 @@ workflow call_mutation {
output_dir=output_dir, output_dir=output_dir,
ref=ref, ref=ref,
bed=bed, bed=bed,
rmdup_bam=tumor_rmdup_bam rmdup_bam=tumor_rmdup_bam,
probe=probe
} }
call hotspot as hotspot_calling_tissue { call hotspot as hotspot_calling_tissue {
input: input:
@ -636,7 +643,8 @@ workflow call_mutation {
ref=ref, ref=ref,
bed=bed, bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam, tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam normal_rmdup_bam=normal_rmdup_bam,
probe=probe
} }
call hotspot as hotspot_calling_umi_control { call hotspot as hotspot_calling_umi_control {
@ -717,7 +725,8 @@ workflow call_mutation {
ref=ref, ref=ref,
bed=bed, bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam, tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam normal_rmdup_bam=normal_rmdup_bam,
probe=probe
} }
call hotspot as hotspot_calling_tissue_control { call hotspot as hotspot_calling_tissue_control {

View File

@ -105,13 +105,9 @@ task run_neoantigen {
#step2 phasing variant #step2 phasing variant
java -jar $GATK MergeVcfs \
-I ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
-I ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
-O ${output_dir}/neoantigen/${tumor}_combined.vcf
java -Xmx16g -jar $PICARD SortVcf \ java -Xmx16g -jar $PICARD SortVcf \
I=${output_dir}/neoantigen/${tumor}_combined.vcf \ I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \ O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict