#!/usr/bin/perl use strict; use warnings; use List::Util qw(min max); #max_length:最大的epitope长度 my ($outputDir,$name,$tumor_prefix,$max_length)=@ARGV; die "usage:perl $0 outputDir name tumor_prefix max_length" if @ARGV!=4; open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta"; my %fa; while(){ if (/^>MT/){ open OUT,">$outputDir/neoantigen/MHC_Class_I/tmp.fa"; print OUT; $_=~/MT\.(\d+)\./; my $id=$1; my $seq=; print OUT $seq; chomp $seq; $fa{$id}=$seq; system"predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; close OUT; } } unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa"; my %score; open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; while(){ next unless /^\d+/; chomp; my @line=split; $line[3]=~/MT\.(\d+)\./; $score{$1}{$line[0]}=$line[2]; } open IN,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv"; open OUT,">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt"; open OUT2,">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; #my %neopt; my $head=; chomp $head; print OUT "$head\tcleavage_score\n"; while(){ chomp; my @line=split(/\t/); $line[44]=~/^(\d+)\./; my $id=$1; my $pep=$line[18]; if(exists $fa{$id}){ my $index=index($fa{$id},$pep)+length($pep); my $cleavage_score=$score{$id}{$index}; print OUT "$_\t$cleavage_score\n"; if ($line[21]<=5000 and ($line[23] eq "NA" or $line[23]>=1)){ print OUT2 "$_\t$cleavage_score\n"; } }else{ print OUT "$_\tNA\n"; } } system"sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; open SORT,"$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; open OUT3,">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt"; print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n"; my %pep; my $bool=0; while(){ chomp; my @line=split(/\t/); if (not exists $pep{$line[18]}){ $pep{$line[18]}++; $bool+=1; print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n"; } }