2023-10-18 15:59:11 +08:00
|
|
|
|
#!/usr/bin/perl
|
2023-11-29 15:13:30 +08:00
|
|
|
|
# use strict;
|
2023-10-18 15:59:11 +08:00
|
|
|
|
use warnings;
|
2023-11-29 15:13:30 +08:00
|
|
|
|
die "usage:perl $0 input output project\n" unless @ARGV == 3;
|
2023-10-18 15:59:11 +08:00
|
|
|
|
|
2023-11-29 15:13:30 +08:00
|
|
|
|
open IN, "$ARGV[0]";
|
|
|
|
|
|
open OUT, ">$ARGV[1]";
|
|
|
|
|
|
my $project = $ARGV[2];
|
2023-10-18 15:59:11 +08:00
|
|
|
|
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
|
|
|
|
|
|
|
|
|
|
|
print "Cnv过滤使用public路径:$public_path\n";
|
|
|
|
|
|
|
|
|
|
|
|
my $cnv = info();
|
|
|
|
|
|
my @cnv_list = @$cnv;
|
|
|
|
|
|
|
|
|
|
|
|
my $head = <IN>;
|
2023-10-18 15:59:11 +08:00
|
|
|
|
chomp $head;
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my @head = split("\t", $head);
|
|
|
|
|
|
print OUT join("\t", (@head[0 .. 4], "ref_gene", "copy", @head[5 .. 9])), "\n";
|
|
|
|
|
|
|
|
|
|
|
|
while (<IN>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split(/\t/, $_);
|
|
|
|
|
|
my $cn = sprintf("%.2f", 2 ** (1 + $line[4]));
|
|
|
|
|
|
my @gene_list = split(/,/, $line[3]);
|
|
|
|
|
|
my %uniq;
|
|
|
|
|
|
foreach my $element (@gene_list) {
|
|
|
|
|
|
if (grep {$_ eq $element} @cnv_list) {
|
|
|
|
|
|
$uniq{$element}++;
|
|
|
|
|
|
next if $uniq{$element} > 1;
|
|
|
|
|
|
if (($cn <= 1 or $cn >= 3.5)) {
|
|
|
|
|
|
print OUT join("\t", (@line[0 .. 4], $element, $cn, @line[5 .. 9])), "\n";
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub info {
|
|
|
|
|
|
open INFO, "$public_path/info.csv";
|
|
|
|
|
|
# 读取并解析表头
|
|
|
|
|
|
my $header = <INFO>;
|
|
|
|
|
|
chomp($header);
|
|
|
|
|
|
my @column_names = split(',', $header);
|
|
|
|
|
|
my (@cnvs);
|
|
|
|
|
|
while (<INFO>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split(/,/, $_);
|
|
|
|
|
|
# 将数据与表头对应
|
|
|
|
|
|
my %record;
|
|
|
|
|
|
@record{@column_names} = @line;
|
|
|
|
|
|
if ($record{'project'} eq $project) {
|
|
|
|
|
|
if ($record{'cnv'} ne "NA") {
|
|
|
|
|
|
@cnvs = split(/\//, $record{'cnv'});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return \@cnvs
|
2023-10-18 15:59:11 +08:00
|
|
|
|
}
|