pipeline/script/netchop.pl

75 lines
2.3 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
use List::Util qw(min max);
#max_length:最大的epitope长度
my ($outputDir,$name,$tumor_prefix,$max_length)=@ARGV;
die "usage:perl $0 outputDir name tumor_prefix max_length" if @ARGV!=4;
open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta";
my %fa;
while(<IN>){
if (/^>MT/){
open OUT,">$outputDir/neoantigen/MHC_Class_I/tmp.fa";
print OUT;
$_=~/MT\.(\d+)\./;
my $id=$1;
my $seq=<IN>;
print OUT $seq;
chomp $seq;
$fa{$id}=$seq;
system"predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
close OUT;
}
}
unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa";
my %score;
open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
while(<IN>){
next unless /^\d+/;
chomp;
my @line=split;
$line[3]=~/MT\.(\d+)\./;
$score{$1}{$line[0]}=$line[2];
}
open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv";
open OUT,">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt";
open OUT2,">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
#my %neopt;
my $head=<IN>;
chomp $head;
print OUT "$head\tcleavage_score\n";
while(<IN>){
chomp;
my @line=split(/\t/);
$line[44]=~/^(\d+)\./;
my $id=$1;
my $pep=$line[18];
if(exists $fa{$id}){
my $index=index($fa{$id},$pep)+length($pep);
my $cleavage_score=$score{$id}{$index};
print OUT "$_\t$cleavage_score\n";
if ($line[21]<=5000 and ($line[23] eq "NA" or $line[23]>=1)){
print OUT2 "$_\t$cleavage_score\n";
}
}else{
print OUT "$_\tNA\n";
}
}
system"sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
open SORT,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
open OUT3,">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt";
print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n";
my %pep;
my $bool=0;
while(<SORT>){
chomp;
my @line=split(/\t/);
if (not exists $pep{$line[18]}){
$pep{$line[18]}++;
$bool+=1;
print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n";
}
}