pipeline/script/filter_promoter.pl

53 lines
1.9 KiB
Perl
Raw Normal View History

2023-11-30 15:31:35 +08:00
#!/usr/bin/env perl
use strict;
my ($name, $output_dir, $project) = @ARGV;
die "useage:perl $0 codes_dir name output_dir project" unless @ARGV == 4;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
open INFO, "$public_path/info.csv";
<INFO>;
my @promoter;
while (<INFO>) {
chomp;
my @line = split(/\t/, $_);
if ($line[0] eq $project) {
if ($line[4] ne "NA") {
@promoter = split(/\//, $line[4]);
open PRMT, ">$output_dir/mutation/${name}.target.promoter.txt";
}
}
}
my @prmt;
open IN2, "$output_dir/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt";
<IN2>;
while (<IN2>) {
chomp;
my @line = split(/\t/, $_);
if (@promoter and grep {$line[6] eq $_} @promoter) {
if ($line[5] =~ /UTR3|upstream/ and $line[17] < 0.01
and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01) {
my @var = split(/;/, $line[9]);
my $freq = (split(/:/, $line[-2]))[5];
my $dp4 = join(",", (split(/:/, $line[-2]))[2, 4, -1]);
my $predict_benign = 0;
$predict_benign++ if ($line[50] eq "N" or $line[50] eq "P");
$predict_benign++ if $line[56] eq "T";
$predict_benign++ if $line[64] eq "T";
if ($line[6] eq 'TERT') {
push @prmt, join("\t", @line[0 .. 9], $freq, $dp4, $predict_benign, @line[10, 11, 16, 18, $#line])
if (($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A'));
}
}
}
}
if (@prmt) {
print PRMT join("\t", @head[0 .. 9]), "\tFreq", "\tDP-AD-DP4", "\tpredict_benign(MutationTaster/FATHMM/MetaSVM)\t", join("\t", @head[10, 11, 16, 18]), "\tSTR", "\n";
print PRMT join("\n", @prmt) . "\n";
}