#!/usr/bin/perl
use strict;
use warnings;

die "usage:perl $0 input out project" unless @ARGV == 3;
my ($input, $out, $project) = @ARGV;
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
#
# print "LongIndel过滤使用public路径：$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";

print "longindel过滤使用database路径：$database_path\n";

open IN, "$input";
open LONGINDEL, "> $out";

my ($longindel) = info();
my @longindels = @$longindel;

my $h2;

while (<IN>) {
    chomp;
    next if /^##/;
    if (/^#CHROM/) {
        $h2 = $_;
        print LONGINDEL "$h2\tHGVS\tfreq\n";
        next;
    }
    my @line = split(/\t/);
    $line[7] =~ /Gene.refGene=(.*?);/;
    my $gene = $1;
    if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {

        my $freq = (split(/:/, $line[9]))[9] / (split(/:/, $line[9]))[7];
        my $hgvs = '.';
        if ($gene eq "BCL2L11") {
            if ($line[1] == '111883194') {
                $hgvs = "BCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del";
                print LONGINDEL $_ . "\t$hgvs\t$freq\n";
            }
        }
        else {
            print LONGINDEL $_ . "\t$hgvs\t$freq\n";
        }
    }
}

sub info {
    open INFO, "$database_path/info.csv";
    # 读取并解析表头
    my $header = <INFO>;
    chomp($header);
    my @column_names = split(',', $header);
    my (@fusion, @longindel);
    while (<INFO>) {
        chomp;
        my @line = split(/,/, $_);
        # 将数据与表头对应
        my %record;
        @record{@column_names} = @line;

        if ($record{'project'} eq $project) {

            if ($record{'long_indel'} ne "NA") {
                @longindel = split(/\//, $line[12]);
            }
        }
    }
    return \@longindel
}