pipeline/codes/filter_neoantigen.pl

87 lines
2.9 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
die "usage:perl $0 germline/somatic sample_type name input_file output_file" unless @ARGV == 5;
my ($type, $sample_type, $name, $input, $output) = @ARGV;
open IN, $input or die "Cannot open file $input: $!";
open OUT, ">${output}";
if ($type eq "germline") {
while (<IN>) {
if (/^##/) {
print OUT;
next;
}
elsif (/^#CHROM/) {
$_ =~ s/(\S+)-match/NORMAL/;
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next;
}
else {
chomp;
my @line = split("\t", $_);
next if $line[6] ne "PASS";
if ($sample_type eq 'c') {
my $n_af = (split(":", $line[-1]))[6];
print OUT "$_\n" if $n_af >= 0.1;
}
else {
my $n_af = (split(":", $line[-1]))[6];
my $t_af = (split(":", $line[-2]))[6];
print OUT "$_\n" if ($n_af >= 0.1 and $t_af >= 0.1);
}
}
}
}
elsif ($type eq "somatic") {
while (<IN>) {
if (/^##/) {
print OUT $_;
next;
}
elsif (/^#CHROM/) {
$_ =~ s/(\S+)-match/NORMAL/;
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
print OUT $_;
next;
}
else {
chomp;
my @line = split("\t", $_);
# next if $line[6] ne "PASS";
my $filters = split(";", $line[6]);
# 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2
if ($sample_type eq 'c') {
my $t_af = (split(":", $line[-1]))[6];
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if $t_af >= 0.02;
}
}
else {
my $n_af = (split(":", $line[-1]))[6];
my $t_af = (split(":", $line[-2]))[6];
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
}
}
}
}
}
else {
die "erro type!!! somatic or germline for now"
}