增加新抗原
parent
90658950bd
commit
c9f525b2bf
|
|
@ -15,8 +15,12 @@ if ($type eq "germline") {
|
|||
next;
|
||||
}
|
||||
elsif (/^#CHROM/) {
|
||||
$_ =~ s/TUMOR/$name/;
|
||||
print OUT;
|
||||
$_ =~ s/(\S+)-match/NORMAL/;
|
||||
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
|
||||
print OUT $_;
|
||||
next;
|
||||
}
|
||||
else {
|
||||
|
|
@ -40,30 +44,41 @@ if ($type eq "germline") {
|
|||
elsif ($type eq "somatic") {
|
||||
while (<IN>) {
|
||||
if (/^##/) {
|
||||
print OUT;
|
||||
print OUT $_;
|
||||
next;
|
||||
}
|
||||
elsif (/^#CHROM/) {
|
||||
$_ =~ s/TUMOR/$name/;
|
||||
print OUT;
|
||||
$_ =~ s/(\S+)-match/NORMAL/;
|
||||
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
|
||||
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
|
||||
print OUT $_;
|
||||
next;
|
||||
}
|
||||
else {
|
||||
chomp;
|
||||
my @line = split("\t", $_);
|
||||
next if $line[6] ne "PASS";
|
||||
|
||||
# next if $line[6] ne "PASS";
|
||||
my $filters = split(";", $line[6]);
|
||||
# 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2
|
||||
if ($sample_type eq 'c') {
|
||||
my $t_af = (split(":", $line[-1]))[6];
|
||||
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
|
||||
print OUT "$_\n" if $t_af >= 0.02;
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
my $n_af = (split(":", $line[-1]))[6];
|
||||
my $t_af = (split(":", $line[-2]))[6];
|
||||
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
|
||||
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('-p', '--project', help="project", required=True)
|
||||
parser.add_argument('-c', '--cancer', help="cancer", required=True)
|
||||
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
||||
parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl')
|
||||
parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl')
|
||||
parser.add_argument('-node', '--start_node',
|
||||
help="node begain to run; 'addQc', 'addAlignment', "
|
||||
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
|
||||
|
|
|
|||
|
|
@ -9,12 +9,11 @@ my ($vcfin, $vcfout, $prob, $type) = @ARGV;
|
|||
|
||||
my $typekey;
|
||||
if ($type eq 't') {
|
||||
$typekey="tissue";
|
||||
$typekey = "tissue";
|
||||
}
|
||||
elsif ($type eq 'c') {
|
||||
$typekey="cfdna";
|
||||
$typekey = "cfdna";
|
||||
}
|
||||
|
||||
else {
|
||||
die "参数输入错误 type 需为 t 或 c'':$!"
|
||||
}
|
||||
|
|
@ -39,8 +38,14 @@ while (<IN>) {
|
|||
next;
|
||||
}
|
||||
chomp;
|
||||
$_ =~ /VD=(\d+);.*;VARBIAS=(\d+):(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/;
|
||||
my ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
|
||||
my ($vd, $msi_repeat_time, $msi_unit) = (0, 0, 0, 0, 0);
|
||||
|
||||
$_ =~ /;VARBIAS=(\d+):(\d+);/;
|
||||
my ($vfr, $vrr) = ($1, $2);
|
||||
$_ =~ /VD=(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/;
|
||||
# ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
|
||||
($vd, $msi_repeat_time, $msi_unit) = ($1, $2, $3);
|
||||
|
||||
my @line = split;
|
||||
my $depth = (split(":", $line[9]))[1];
|
||||
next if $depth < 50;
|
||||
|
|
@ -56,12 +61,14 @@ while (<IN>) {
|
|||
}
|
||||
if ($msi_repeat_time >= 4) {
|
||||
if ($line[6] eq 'PASS') {
|
||||
$line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
|
||||
# $line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
|
||||
$line[6] = 'MSI_u_x_r_x';
|
||||
}
|
||||
else {
|
||||
$line[6] = $line[6] . ";" . 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
|
||||
$line[6] = $line[6] . ";" . 'MSI_u_x_r_x';
|
||||
}
|
||||
}
|
||||
if (defined($vfr) and defined($vrr)) {
|
||||
if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) {
|
||||
if ($line[6] eq 'PASS') {
|
||||
$line[6] = 'strandBias';
|
||||
|
|
@ -70,5 +77,7 @@ while (<IN>) {
|
|||
$line[6] = $line[6] . ";" . 'strandBias';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print OUT join("\t", @line), "\n";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,36 @@ import sys
|
|||
|
||||
import pysam
|
||||
|
||||
contig_length = """
|
||||
##contig=<ID=chr1,length=249250621>
|
||||
##contig=<ID=chr2,length=243199373>
|
||||
##contig=<ID=chr3,length=198022430>
|
||||
##contig=<ID=chr4,length=191154276>
|
||||
##contig=<ID=chr5,length=180915260>
|
||||
##contig=<ID=chr6,length=171115067>
|
||||
##contig=<ID=chr7,length=159138663>
|
||||
##contig=<ID=chrX,length=155270560>
|
||||
##contig=<ID=chr8,length=146364022>
|
||||
##contig=<ID=chr9,length=141213431>
|
||||
##contig=<ID=chr10,length=135534747>
|
||||
##contig=<ID=chr11,length=135006516>
|
||||
##contig=<ID=chr12,length=133851895>
|
||||
##contig=<ID=chr13,length=115169878>
|
||||
##contig=<ID=chr14,length=107349540>
|
||||
##contig=<ID=chr15,length=102531392>
|
||||
##contig=<ID=chr16,length=90354753>
|
||||
##contig=<ID=chr17,length=81195210>
|
||||
##contig=<ID=chr18,length=78077248>
|
||||
##contig=<ID=chr20,length=63025520>
|
||||
##contig=<ID=chrY,length=59373566>
|
||||
##contig=<ID=chr19,length=59128983>
|
||||
##contig=<ID=chr22,length=51304566>
|
||||
##contig=<ID=chr21,length=48129895>
|
||||
##contig=<ID=chrM,length=16571>
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
class VCFFilter:
|
||||
def __init__(self, input_vcf, output_vcf, filter_expression):
|
||||
|
|
@ -40,10 +70,17 @@ class VCFFilter:
|
|||
vcf_out = sys.stdout
|
||||
|
||||
with pysam.VariantFile(self.input_vcf, 'r') as vcf_in:
|
||||
for chrome in range(1, 23):
|
||||
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
|
||||
for chrome in ['X', 'Y', 'MT']:
|
||||
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
|
||||
# # 获取当前已存在的过滤器信息
|
||||
# existing_filters = vcf_in.header.filters
|
||||
# # 添加新的过滤器
|
||||
# existing_filters.add(id='noise', number='', type='',description='noise; JM add ')
|
||||
|
||||
# for chrome in range(1, 23):
|
||||
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
|
||||
# for chrome in ['X', 'Y', 'MT']:
|
||||
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
|
||||
for line in contig_length.strip().splitlines():
|
||||
vcf_in.header.add_line(line)
|
||||
header = vcf_in.header
|
||||
vcf_out.write(str(header))
|
||||
for record in vcf_in:
|
||||
|
|
|
|||
|
|
@ -89,7 +89,8 @@ workflow pipeline {
|
|||
bed=bed,
|
||||
output_dir=workdir,
|
||||
project=project,
|
||||
cancer=cancer
|
||||
cancer=cancer,
|
||||
probe=probe
|
||||
}
|
||||
|
||||
call tmb.call_tmb as call_tmb {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ task mutation_calling_umi {
|
|||
String rmdup_bam
|
||||
String ref
|
||||
String bed
|
||||
String probe
|
||||
command <<<
|
||||
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
|
|
@ -53,7 +54,7 @@ task mutation_calling_umi {
|
|||
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
|
||||
|
||||
# add msi and strandbias flag
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
|
||||
|
||||
# add malt flag
|
||||
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
|
||||
|
|
@ -88,6 +89,7 @@ task mutation_calling_umi_control {
|
|||
String output_dir
|
||||
String tumor_rmdup_bam
|
||||
String normal_rmdup_bam
|
||||
String probe
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
|
|
@ -148,7 +150,7 @@ task mutation_calling_umi_control {
|
|||
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
|
||||
|
||||
# add msi and strandbias flag
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
|
||||
|
||||
# add malt flag
|
||||
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
|
||||
|
|
@ -178,6 +180,7 @@ task mutation_calling_tissue {
|
|||
String ref
|
||||
String output_dir
|
||||
String rmdup_bam
|
||||
String probe
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
|
|
@ -207,7 +210,7 @@ task mutation_calling_tissue {
|
|||
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
|
||||
|
||||
# add msi and strandbias flag
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
|
||||
|
||||
cp ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf
|
||||
|
||||
|
|
@ -232,6 +235,7 @@ task mutation_calling_tissue_control {
|
|||
String output_dir
|
||||
String tumor_rmdup_bam
|
||||
String normal_rmdup_bam
|
||||
String probe
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
|
|
@ -258,7 +262,7 @@ task mutation_calling_tissue_control {
|
|||
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
|
||||
|
||||
# add msi and strandbias flag
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t
|
||||
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
|
||||
|
||||
vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \
|
||||
-o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \
|
||||
|
|
@ -463,6 +467,7 @@ workflow call_mutation {
|
|||
String bed
|
||||
String project
|
||||
String cancer
|
||||
String probe
|
||||
|
||||
# pipe 执行 mutation_calling => annovar => filter
|
||||
if (run) {
|
||||
|
|
@ -475,7 +480,8 @@ workflow call_mutation {
|
|||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
rmdup_bam=tumor_rmdup_bam
|
||||
rmdup_bam=tumor_rmdup_bam,
|
||||
probe=probe
|
||||
}
|
||||
call hotspot as hotspot_calling_umi {
|
||||
input:
|
||||
|
|
@ -556,7 +562,8 @@ workflow call_mutation {
|
|||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
rmdup_bam=tumor_rmdup_bam
|
||||
rmdup_bam=tumor_rmdup_bam,
|
||||
probe=probe
|
||||
}
|
||||
call hotspot as hotspot_calling_tissue {
|
||||
input:
|
||||
|
|
@ -636,7 +643,8 @@ workflow call_mutation {
|
|||
ref=ref,
|
||||
bed=bed,
|
||||
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||
normal_rmdup_bam=normal_rmdup_bam
|
||||
normal_rmdup_bam=normal_rmdup_bam,
|
||||
probe=probe
|
||||
|
||||
}
|
||||
call hotspot as hotspot_calling_umi_control {
|
||||
|
|
@ -717,7 +725,8 @@ workflow call_mutation {
|
|||
ref=ref,
|
||||
bed=bed,
|
||||
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||
normal_rmdup_bam=normal_rmdup_bam
|
||||
normal_rmdup_bam=normal_rmdup_bam,
|
||||
probe=probe
|
||||
|
||||
}
|
||||
call hotspot as hotspot_calling_tissue_control {
|
||||
|
|
|
|||
|
|
@ -105,13 +105,9 @@ task run_neoantigen {
|
|||
|
||||
#step2 phasing variant
|
||||
|
||||
java -jar $GATK MergeVcfs \
|
||||
-I ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
|
||||
-I ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
|
||||
-O ${output_dir}/neoantigen/${tumor}_combined.vcf
|
||||
|
||||
java -Xmx16g -jar $PICARD SortVcf \
|
||||
I=${output_dir}/neoantigen/${tumor}_combined.vcf \
|
||||
I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
|
||||
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
|
||||
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
|
||||
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue