#!/usr/bin/env perl use strict; use warnings; use List::Util qw(min max); #max_length:最大的epitope长度 die "usage:perl $0 outputDir tumor_prefix" if @ARGV != 2; my ($outputDir, $tumor_prefix) = @ARGV; open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta"; my %fa; while () { if (/^>MT/) { open OUT, ">$outputDir/neoantigen/MHC_Class_I/tmp.fa"; print OUT; $_ =~ /MT\.(\d+)\./; my $id = $1; my $seq = ; print OUT $seq; chomp $seq; $fa{$id} = $seq; system "predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; close OUT; } } unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa"; my %score; open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt"; while () { next unless /^\d+/; chomp; my @line = split; $line[3] =~ /MT\.(\d+)\./; $score{$1}{$line[0]} = $line[2]; } open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv"; open OUT, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt"; open OUT2, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; #my %neopt; my $head = ; chomp $head; print OUT "$head\tcleavage_score\n"; while () { chomp; my @line = split(/\t/); $line[44] =~ /^(\d+)\./; my $id = $1; my $pep = $line[18]; if (exists $fa{$id}) { my $index = index($fa{$id}, $pep) + length($pep); my $cleavage_score = $score{$id}{$index}; print OUT "$_\t$cleavage_score\n"; if ($line[21] <= 5000 and ($line[23] eq "NA" or $line[23] >= 1)) { print OUT2 "$_\t$cleavage_score\n"; } } else { print OUT "$_\tNA\n"; } } system "sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt"; open SORT, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt"; open OUT3, ">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt"; print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n"; my %pep; my $bool = 0; while () { chomp; my @line = split(/\t/); if (not exists $pep{$line[18]}) { $pep{$line[18]}++; $bool += 1; print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n"; } }