pipeline/codes/filter_longindel.pl

68 lines
1.7 KiB
Perl
Raw Normal View History

2023-11-29 15:13:30 +08:00
#!/usr/bin/perl
use strict;
use warnings;
die "usage:perl $0 input out project" unless @ARGV == 3;
my ($input, $out, $project) = @ARGV;
2023-11-30 15:31:35 +08:00
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
#
# print "LongIndel过滤使用public路径$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
2023-11-29 15:13:30 +08:00
2023-11-30 15:31:35 +08:00
print "longindel过滤使用database路径$database_path\n";
2023-11-29 15:13:30 +08:00
open IN, "$input";
open LONGINDEL, "> $out";
my ($longindel) = info();
my @longindels = @$longindel;
my @pos;
my $h2;
while (<IN>) {
chomp;
next if /^##/;
if (/^#CHROM/) {
$h2 = $_;
print LONGINDEL "$h2\n";
next;
}
my @line = split(/\t/);
$line[7] =~ /Gene.refGene=(.*?);/;
if ((grep {$1 =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
if ($1 eq "BCL2L11") {
if ($line[1] == '111883194') {
print LONGINDEL join("\n", @pos) . "\n";
}
}
else {
print LONGINDEL join("\n", @pos) . "\n";
}
}
}
sub info {
2023-11-30 15:31:35 +08:00
open INFO, "$database_path/info.csv";
2023-11-29 15:13:30 +08:00
# 读取并解析表头
my $header = <INFO>;
chomp($header);
my @column_names = split(',', $header);
my (@fusion, @longindel);
while (<INFO>) {
chomp;
my @line = split(/,/, $_);
# 将数据与表头对应
my %record;
@record{@column_names} = @line;
if ($record{'project'} eq $project) {
if ($record{'long_indel'} ne "NA") {
@longindel = split(/\//, $line[12]);
}
}
}
return \@longindel
}