pipeline/script/predict_neoantigen.sh

81 lines
3.0 KiB
Bash
Executable File

outputDir=$1
name=$2
tumor=$3
codes_dir=$4
ref="/dataseq/jmdna/database/genome/hg19/hg19.fa"
neoDir="$outputDir/neoantigen"
max_peptide_length=11
##step1:vep annotation and variant filter
vep \
--input_file $outputDir/mutation/${name}.snp.indel.Somatic.hc.vcf --output_file ${neoDir}/${name}_somatic_vepanno.vcf \
--format vcf --vcf --symbol --terms SO --tsl \
--hgvs --fasta /dataseq/jmdna/database/genome/hg19/ \
--offline --cache --dir /dataseq/jmdna/software/.vep/ \
--plugin Downstream --plugin Wildtype --plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
vep \
--input_file $outputDir/mutation/${name}.snp.indel.Germline.vcf --output_file ${neoDir}/${name}_germline_vepanno.vcf \
--format vcf --vcf --symbol --terms SO --tsl \
--hgvs --fasta /dataseq/jmdna/database/genome/hg19/ \
--offline --cache --dir /dataseq/jmdna/software/.vep/ \
--plugin Downstream --plugin Wildtype --plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
perl ${codes_dir}/neoantigen.variant.filter.pl germline ${tumor} ${name} ${neoDir}/${name}_germline_vepanno.vcf ${neoDir}
perl ${codes_dir}/neoantigen.variant.filter.pl somatic ${tumor} ${name} ${neoDir}/${name}_somatic_vepanno.vcf ${neoDir}
##step2 phasing variant
java -jar $GATK MergeVcfs -I ${neoDir}/${name}_somatic_vepanno_filter.vcf -I ${neoDir}/${name}_germline_vepanno_filter.vcf -O ${neoDir}/${name}_combined_somatic_plus_germline.vcf
java -Xmx16g -jar $PICARD SortVcf \
I=${neoDir}/${name}_combined_somatic_plus_germline.vcf \
O=${neoDir}/${name}_combined_somatic_plus_germline.sorted.vcf \
SEQUENCE_DICTIONARY=/dataseq/jmdna/database/genome/hg19/hg19.dict
java -Xmx16g -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar \
-T ReadBackedPhasing \
-R ${ref} \
-I $outputDir/alignment/${tumor}.rmdup.bam \
--variant ${neoDir}/${name}_combined_somatic_plus_germline.sorted.vcf \
-L ${neoDir}/${name}_combined_somatic_plus_germline.sorted.vcf \
--cacheWindowSize 90 \
-o ${neoDir}/${name}.phased.vcf
##step3:bgzip and index the VCF
bgzip -c ${neoDir}/${name}.phased.vcf > ${neoDir}/${name}.phased.vcf.gz
tabix -p vcf ${neoDir}/${name}.phased.vcf.gz
bgzip -c ${neoDir}/${name}_somatic_vepanno_filter.vcf > ${neoDir}/${name}_somatic_vepanno_filter.vcf.gz
tabix -p vcf ${neoDir}/${name}_somatic_vepanno_filter.vcf.gz
HLA=`head -n 2 ${neoDir}/HLA/*tsv | tail -n 1 | awk '{OFS=","}{print "HLA-"$2,"HLA-"$3,"HLA-"$4,"HLA-"$5,"HLA-"$6,"HLA-"$7}'|xargs echo -n`
#IEDB recommed NetMHCpan. but NetMHCcons is also outperformed(PMID: 31204427)
#8,9,10,11
docker run --rm -u $UID:1012 -v ${neoDir}:/data griffithlab/pvactools:2.0.7 pvacseq run \
-e1 9,10,11 \
--iedb-install-directory /opt/iedb/ \
-t 10 \
-k \
--normal-sample-name NORMAL \
-p /data/${name}.phased.vcf.gz \
/data/${name}_somatic_vepanno_filter.vcf.gz \
${tumor} \
$HLA \
NetMHCpan \
/data
#--netmhc-stab
#--net-chop-method cterm \
#--net-chop-threshold 0 \
#--pass-only \
`dos2unix ${neoDir}/MHC_Class_I/*.all_epitopes.tsv`
perl ${codes_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length