61 lines
1.5 KiB
Perl
Executable File
61 lines
1.5 KiB
Perl
Executable File
#!/usr/bin/perl
|
||
# use strict;
|
||
use warnings;
|
||
die "usage:perl $0 input output project\n" unless @ARGV == 3;
|
||
|
||
open IN, "$ARGV[0]";
|
||
open OUT, ">$ARGV[1]";
|
||
my $project = $ARGV[2];
|
||
|
||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||
|
||
print "Cnv过滤使用public路径:$public_path\n";
|
||
|
||
my $cnv = info();
|
||
my @cnv_list = @$cnv;
|
||
|
||
my $head = <IN>;
|
||
chomp $head;
|
||
my @head = split("\t", $head);
|
||
print OUT join("\t", (@head[0 .. 4], "ref_gene", "copy", @head[5 .. 9])), "\n";
|
||
|
||
while (<IN>) {
|
||
chomp;
|
||
my @line = split(/\t/, $_);
|
||
my $cn = sprintf("%.2f", 2 ** (1 + $line[4]));
|
||
my @gene_list = split(/,/, $line[3]);
|
||
my %uniq;
|
||
foreach my $element (@gene_list) {
|
||
if (grep {$_ eq $element} @cnv_list) {
|
||
$uniq{$element}++;
|
||
next if $uniq{$element} > 1;
|
||
if (($cn <= 1 or $cn >= 3.5)) {
|
||
print OUT join("\t", (@line[0 .. 4], $element, $cn, @line[5 .. 9])), "\n";
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
sub info {
|
||
open INFO, "$public_path/info.csv";
|
||
# 读取并解析表头
|
||
my $header = <INFO>;
|
||
chomp($header);
|
||
my @column_names = split(',', $header);
|
||
my (@cnvs);
|
||
while (<INFO>) {
|
||
chomp;
|
||
my @line = split(/,/, $_);
|
||
# 将数据与表头对应
|
||
my %record;
|
||
@record{@column_names} = @line;
|
||
if ($record{'project'} eq $project) {
|
||
if ($record{'cnv'} ne "NA") {
|
||
@cnvs = split(/\//, $record{'cnv'});
|
||
}
|
||
}
|
||
}
|
||
return \@cnvs
|
||
}
|