pipeline/codes/netchop.pl

79 lines
2.6 KiB
Perl
Executable File

#!/usr/bin/env perl
use strict;
use warnings;
use List::Util qw(min max);
#max_length:最大的epitope长度
die "usage:perl $0 outputDir tumor_prefix" if @ARGV != 2;
my ($outputDir, $tumor_prefix) = @ARGV;
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta";
my %fa;
while (<IN>) {
if (/^>MT/) {
open OUT, ">$outputDir/neoantigen/MHC_Class_I/tmp.fa";
print OUT;
$_ =~ /MT\.(\d+)\./;
my $id = $1;
my $seq = <IN>;
print OUT $seq;
chomp $seq;
$fa{$id} = $seq;
system "predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
close OUT;
}
}
unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa";
my %score;
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
while (<IN>) {
next unless /^\d+/;
chomp;
my @line = split;
$line[3] =~ /MT\.(\d+)\./;
$score{$1}{$line[0]} = $line[2];
}
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv";
open OUT, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt";
open OUT2, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
#my %neopt;
my $head = <IN>;
chomp $head;
print OUT "$head\tcleavage_score\n";
while (<IN>) {
chomp;
my @line = split(/\t/);
$line[44] =~ /^(\d+)\./;
my $id = $1;
my $pep = $line[18];
if (exists $fa{$id}) {
my $index = index($fa{$id}, $pep) + length($pep);
my $cleavage_score = $score{$id}{$index};
print OUT "$_\t$cleavage_score\n";
if ($line[21] <= 5000 and ($line[23] eq "NA" or $line[23] >= 1)) {
print OUT2 "$_\t$cleavage_score\n";
}
}
else {
print OUT "$_\tNA\n";
}
}
system "sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
open SORT, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
open OUT3, ">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt";
print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n";
my %pep;
my $bool = 0;
while (<SORT>) {
chomp;
my @line = split(/\t/);
if (not exists $pep{$line[18]}) {
$pep{$line[18]}++;
$bool += 1;
print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n";
}
}