2023-11-29 15:13:30 +08:00
|
|
|
|
#!/usr/bin/perl
|
|
|
|
|
|
use strict;
|
|
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
|
|
|
|
die "usage:perl $0 input out project" unless @ARGV == 3;
|
|
|
|
|
|
my ($input, $out, $project) = @ARGV;
|
2023-11-30 15:31:35 +08:00
|
|
|
|
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
|
|
|
|
|
#
|
|
|
|
|
|
# print "LongIndel过滤使用public路径:$public_path\n";
|
|
|
|
|
|
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
2023-11-29 15:13:30 +08:00
|
|
|
|
|
2023-11-30 15:31:35 +08:00
|
|
|
|
print "longindel过滤使用database路径:$database_path\n";
|
2023-11-29 15:13:30 +08:00
|
|
|
|
|
|
|
|
|
|
open IN, "$input";
|
|
|
|
|
|
open LONGINDEL, "> $out";
|
|
|
|
|
|
|
|
|
|
|
|
my ($longindel) = info();
|
|
|
|
|
|
my @longindels = @$longindel;
|
|
|
|
|
|
|
|
|
|
|
|
my @pos;
|
|
|
|
|
|
my $h2;
|
|
|
|
|
|
|
|
|
|
|
|
while (<IN>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
next if /^##/;
|
|
|
|
|
|
if (/^#CHROM/) {
|
|
|
|
|
|
$h2 = $_;
|
|
|
|
|
|
print LONGINDEL "$h2\n";
|
|
|
|
|
|
next;
|
|
|
|
|
|
}
|
|
|
|
|
|
my @line = split(/\t/);
|
|
|
|
|
|
$line[7] =~ /Gene.refGene=(.*?);/;
|
2023-12-25 14:06:30 +08:00
|
|
|
|
my $gene = $1;
|
|
|
|
|
|
if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
|
|
|
|
|
|
if ($gene eq "BCL2L11") {
|
2023-11-29 15:13:30 +08:00
|
|
|
|
if ($line[1] == '111883194') {
|
2023-12-25 14:06:30 +08:00
|
|
|
|
print LONGINDEL $_;
|
2023-11-29 15:13:30 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
2023-12-25 14:06:30 +08:00
|
|
|
|
print LONGINDEL $_;
|
2023-11-29 15:13:30 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub info {
|
2023-11-30 15:31:35 +08:00
|
|
|
|
open INFO, "$database_path/info.csv";
|
2023-11-29 15:13:30 +08:00
|
|
|
|
# 读取并解析表头
|
|
|
|
|
|
my $header = <INFO>;
|
|
|
|
|
|
chomp($header);
|
|
|
|
|
|
my @column_names = split(',', $header);
|
|
|
|
|
|
my (@fusion, @longindel);
|
|
|
|
|
|
while (<INFO>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split(/,/, $_);
|
|
|
|
|
|
# 将数据与表头对应
|
|
|
|
|
|
my %record;
|
|
|
|
|
|
@record{@column_names} = @line;
|
|
|
|
|
|
|
|
|
|
|
|
if ($record{'project'} eq $project) {
|
|
|
|
|
|
|
|
|
|
|
|
if ($record{'long_indel'} ne "NA") {
|
|
|
|
|
|
@longindel = split(/\//, $line[12]);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return \@longindel
|
|
|
|
|
|
}
|