#!/usr/bin/perl use strict; use warnings; die "usage:perl $0 input out project" unless @ARGV == 3; my ($input, $out, $project) = @ARGV; # my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/"; # # print "LongIndel过滤使用public路径:$public_path\n"; my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase"; print "longindel过滤使用database路径:$database_path\n"; open IN, "$input"; open LONGINDEL, "> $out"; my ($longindel) = info(); my @longindels = @$longindel; my $h2; while () { chomp; next if /^##/; if (/^#CHROM/) { $h2 = $_; print LONGINDEL "$h2\tHGVS\tfreq\n"; next; } my @line = split(/\t/); $line[7] =~ /Gene.refGene=(.*?);/; my $gene = $1; if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) { my $freq = (split(/:/, $line[9]))[9] / (split(/:/, $line[9]))[7]; my $hgvs = '.'; if ($gene eq "BCL2L11") { if ($line[1] == '111883194') { $hgvs = "BCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del"; print LONGINDEL $_ . "\t$hgvs\t$freq\n"; } } else { print LONGINDEL $_ . "\t$hgvs\t$freq\n"; } } } sub info { open INFO, "$database_path/info.csv"; # 读取并解析表头 my $header = ; chomp($header); my @column_names = split(',', $header); my (@fusion, @longindel); while () { chomp; my @line = split(/,/, $_); # 将数据与表头对应 my %record; @record{@column_names} = @line; if ($record{'project'} eq $project) { if ($record{'long_indel'} ne "NA") { @longindel = split(/\//, $line[12]); } } } return \@longindel }