增加新抗原

master
chaopower 2023-12-21 10:22:54 +08:00
parent 90658950bd
commit c9f525b2bf
7 changed files with 109 additions and 42 deletions

View File

@ -15,8 +15,12 @@ if ($type eq "germline") {
next;
}
elsif (/^#CHROM/) {
$_ =~ s/TUMOR/$name/;
print OUT;
$_ =~ s/(\S+)-match/NORMAL/;
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next;
}
else {
@ -40,27 +44,38 @@ if ($type eq "germline") {
elsif ($type eq "somatic") {
while (<IN>) {
if (/^##/) {
print OUT;
print OUT $_;
next;
}
elsif (/^#CHROM/) {
$_ =~ s/TUMOR/$name/;
print OUT;
$_ =~ s/(\S+)-match/NORMAL/;
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next;
}
else {
chomp;
my @line = split("\t", $_);
next if $line[6] ne "PASS";
# next if $line[6] ne "PASS";
my $filters = split(";", $line[6]);
# 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2
if ($sample_type eq 'c') {
my $t_af = (split(":", $line[-1]))[6];
print OUT "$_\n" if $t_af >= 0.02;
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if $t_af >= 0.02;
}
}
else {
my $n_af = (split(":", $line[-1]))[6];
my $t_af = (split(":", $line[-2]))[6];
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
}
}
}

View File

@ -25,7 +25,7 @@ if __name__ == '__main__':
parser.add_argument('-p', '--project', help="project", required=True)
parser.add_argument('-c', '--cancer', help="cancer", required=True)
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl')
parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl')
parser.add_argument('-node', '--start_node',
help="node begain to run; 'addQc', 'addAlignment', "
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"

View File

@ -9,12 +9,11 @@ my ($vcfin, $vcfout, $prob, $type) = @ARGV;
my $typekey;
if ($type eq 't') {
$typekey="tissue";
$typekey = "tissue";
}
elsif ($type eq 'c') {
$typekey="cfdna";
$typekey = "cfdna";
}
else {
die "参数输入错误 type 需为 t 或 c''$!"
}
@ -39,8 +38,14 @@ while (<IN>) {
next;
}
chomp;
$_ =~ /VD=(\d+);.*;VARBIAS=(\d+):(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/;
my ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
my ($vd, $msi_repeat_time, $msi_unit) = (0, 0, 0, 0, 0);
$_ =~ /;VARBIAS=(\d+):(\d+);/;
my ($vfr, $vrr) = ($1, $2);
$_ =~ /VD=(\d+);.*;MSI\=(\d+);MSILEN\=(\d+);/;
# ($vd, $vfr, $vrr, $msi_repeat_time, $msi_unit) = ($1, $2, $3, $4, $5);
($vd, $msi_repeat_time, $msi_unit) = ($1, $2, $3);
my @line = split;
my $depth = (split(":", $line[9]))[1];
next if $depth < 50;
@ -56,19 +61,23 @@ while (<IN>) {
}
if ($msi_repeat_time >= 4) {
if ($line[6] eq 'PASS') {
$line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
# $line[6] = 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
$line[6] = 'MSI_u_x_r_x';
}
else {
$line[6] = $line[6] . ";" . 'MSI_u' . $msi_unit . "_r" . $msi_repeat_time;
$line[6] = $line[6] . ";" . 'MSI_u_x_r_x';
}
}
if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) {
if ($line[6] eq 'PASS') {
$line[6] = 'strandBias';
}
else {
$line[6] = $line[6] . ";" . 'strandBias';
if (defined($vfr) and defined($vrr)) {
if ($vd >= 7 and ($vfr / $vd < 0.1 or $vrr / $vd < 0.1)) {
if ($line[6] eq 'PASS') {
$line[6] = 'strandBias';
}
else {
$line[6] = $line[6] . ";" . 'strandBias';
}
}
}
print OUT join("\t", @line), "\n";
}

View File

@ -6,6 +6,36 @@ import sys
import pysam
contig_length = """
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chrY,length=59373566>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chrM,length=16571>
"""
class VCFFilter:
def __init__(self, input_vcf, output_vcf, filter_expression):
@ -40,10 +70,17 @@ class VCFFilter:
vcf_out = sys.stdout
with pysam.VariantFile(self.input_vcf, 'r') as vcf_in:
for chrome in range(1, 23):
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
for chrome in ['X', 'Y', 'MT']:
vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
# # 获取当前已存在的过滤器信息
# existing_filters = vcf_in.header.filters
# # 添加新的过滤器
# existing_filters.add(id='noise', number='', type='',description='noise; JM add ')
# for chrome in range(1, 23):
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
# for chrome in ['X', 'Y', 'MT']:
# vcf_in.header.add_line(f'##contig=<ID=chr{chrome}>')
for line in contig_length.strip().splitlines():
vcf_in.header.add_line(line)
header = vcf_in.header
vcf_out.write(str(header))
for record in vcf_in:

View File

@ -89,7 +89,8 @@ workflow pipeline {
bed=bed,
output_dir=workdir,
project=project,
cancer=cancer
cancer=cancer,
probe=probe
}
call tmb.call_tmb as call_tmb {

View File

@ -4,6 +4,7 @@ task mutation_calling_umi {
String rmdup_bam
String ref
String bed
String probe
command <<<
if [ ! -d ${output_dir}/mutation ];then
@ -53,7 +54,7 @@ task mutation_calling_umi {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
# add malt flag
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
@ -88,6 +89,7 @@ task mutation_calling_umi_control {
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
String probe
command <<<
if [ ! -d ${output_dir}/mutation ];then
@ -148,7 +150,7 @@ task mutation_calling_umi_control {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 c
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} c
# add malt flag
grep -v ^# ${output_dir}/mutation/${name}.raw.snp_indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - > \
@ -178,6 +180,7 @@ task mutation_calling_tissue {
String ref
String output_dir
String rmdup_bam
String probe
command <<<
if [ ! -d ${output_dir}/mutation ];then
@ -207,7 +210,7 @@ task mutation_calling_tissue {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
cp ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf
@ -232,6 +235,7 @@ task mutation_calling_tissue_control {
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
String probe
command <<<
if [ ! -d ${output_dir}/mutation ];then
@ -258,7 +262,7 @@ task mutation_calling_tissue_control {
-o ${output_dir}/mutation/${name}.raw.snp_indel.vcf
# add msi and strandbias flag
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf 160 t
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \
-o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \
@ -463,6 +467,7 @@ workflow call_mutation {
String bed
String project
String cancer
String probe
# pipe 执行 mutation_calling => annovar => filter
if (run) {
@ -475,7 +480,8 @@ workflow call_mutation {
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
rmdup_bam=tumor_rmdup_bam,
probe=probe
}
call hotspot as hotspot_calling_umi {
input:
@ -556,7 +562,8 @@ workflow call_mutation {
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
rmdup_bam=tumor_rmdup_bam,
probe=probe
}
call hotspot as hotspot_calling_tissue {
input:
@ -636,7 +643,8 @@ workflow call_mutation {
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
normal_rmdup_bam=normal_rmdup_bam,
probe=probe
}
call hotspot as hotspot_calling_umi_control {
@ -717,7 +725,8 @@ workflow call_mutation {
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
normal_rmdup_bam=normal_rmdup_bam,
probe=probe
}
call hotspot as hotspot_calling_tissue_control {

View File

@ -105,13 +105,9 @@ task run_neoantigen {
#step2 phasing variant
java -jar $GATK MergeVcfs \
-I ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
-I ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
-O ${output_dir}/neoantigen/${tumor}_combined.vcf
java -Xmx16g -jar $PICARD SortVcf \
I=${output_dir}/neoantigen/${tumor}_combined.vcf \
I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict