pipeline/codes/filter_longindel.pl

#!/usr/bin/perl
use strict;
use warnings;

die "usage:perl $0 input out project" unless @ARGV == 3;
my ($input, $out, $project) = @ARGV;
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
#
# print "LongIndel过滤使用public路径：$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";

print "longindel过滤使用database路径：$database_path\n";

open IN, "$input";
open LONGINDEL, "> $out";

my ($longindel) = info();
my @longindels = @$longindel;

my @pos;
my $h2;

while (<IN>) {
    chomp;
    next if /^##/;
    if (/^#CHROM/) {
        $h2 = $_;
        print LONGINDEL "$h2\n";
        next;
    }
    my @line = split(/\t/);
    $line[7] =~ /Gene.refGene=(.*?);/;
    my $gene = $1;
    if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
        if ($gene eq "BCL2L11") {
            if ($line[1] == '111883194') {
                print LONGINDEL $_;
            }
        }
        else {
            print LONGINDEL $_;
        }
    }
}

sub info {
    open INFO, "$database_path/info.csv";
    # 读取并解析表头
    my $header = <INFO>;
    chomp($header);
    my @column_names = split(',', $header);
    my (@fusion, @longindel);
    while (<INFO>) {
        chomp;
        my @line = split(/,/, $_);
        # 将数据与表头对应
        my %record;
        @record{@column_names} = @line;

        if ($record{'project'} eq $project) {

            if ($record{'long_indel'} ne "NA") {
                @longindel = split(/\//, $line[12]);
            }
        }
    }
    return \@longindel
}