task run_neoantigen { String tumor String? normal String input_dir String output_dir String ref String tumor_rmdup_bam String somatic_vcf String germline_vcf String sample_type command <<< if [ ! -d ${output_dir}/neoantigen/hla ];then mkdir ${output_dir}/neoantigen/hla fi razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \ /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*1.fq.gz samtools bam2fq ${output_dir}/neoantigen/hla/fished_1.bam > ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq rm ${output_dir}/neoantigen/hla/fished_1.bam razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_2.bam \ /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*2.fq.gz samtools bam2fq ${output_dir}/neoantigen/hla/fished_2.bam > ${output_dir}/neoantigen/hla/${normal}_2_fished.fastq rm ${output_dir}/neoantigen/hla/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \ -i ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq \ ${output_dir}/neoantigen/hla/${normal}_2_fished.fastq --dna -v \ --prefix ${normal} -o ${output_dir}/neoantigen/hla/ #step1:vep annotation and variant filter vep \ --input_file ${somatic_vcf} \ --output_file ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \ --fasta /home/install/ref/hg19/ \ --dir /dataseq/jmdna/software/.vep/ \ --format vcf \ --vcf \ --symbol \ --terms SO \ --tsl \ --hgvs \ --offline \ --cache \ --plugin Downstream \ --plugin Wildtype \ --plugin Frameshift \ --pick \ --transcript_version 100 \ --force_overwrite vep \ --input_file ${germline_vcf} \ --output_file ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \ --fasta /home/install/ref/hg19/ \ --offline --cache --dir /dataseq/jmdna/software/.vep/ \ --format vcf \ --vcf \ --symbol \ --terms SO \ --tsl \ --hgvs \ --plugin Downstream --plugin Wildtype --plugin Frameshift \ --pick \ --transcript_version 100 \ --force_overwrite filter_vep \ -i ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \ -o ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \ --format vcf \ --force_overwrite \ --filter "(Consequence != synonymous_variant) \ and (Consequence != intron_variant) \ and (Consequence != intergenic_variant) \ and (Consequence != 3_prime_UTR_variant) \ and (Consequence != upstream_gene_variant) \ and (Consequence != downstream_gene_variant) \ and (Consequence != 5_prime_UTR_variant)" filter_vep \ -i ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \ -o ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \ --format vcf \ --force_overwrite \ --filter "(Consequence != synonymous_variant) \ and (Consequence != intron_variant) \ and (Consequence != intergenic_variant) \ and (Consequence != 3_prime_UTR_variant) \ and (Consequence != upstream_gene_variant) \ and (Consequence != downstream_gene_variant) \ and (Consequence != 5_prime_UTR_variant)" filter_neoantigen.pl somatic \ ${tumor} \ ${sample_type} \ ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \ ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf filter_neoantigen.pl germline \ ${tumor} \ ${sample_type} \ ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \ ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf #step2 phasing variant java -Xmx16g -jar $PICARD SortVcf \ I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \ I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \ O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \ SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict java -Xmx16g -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar \ -T ReadBackedPhasing \ -R ${ref} \ -I ${tumor_rmdup_bam} \ --variant ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \ -L ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \ --cacheWindowSize 90 \ -o ${output_dir}/neoantigen/${tumor}.phased.vcf ##step3:bgzip and index the VCF bgzip -c ${output_dir}/neoantigen/${tumor}.phased.vcf > ${output_dir}/neoantigen/${tumor}.phased.vcf.gz tabix -p vcf ${output_dir}/neoantigen/${tumor}.phased.vcf.gz bgzip -c ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf > ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz tabix -p vcf ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz HLA=`head -n 2 ${output_dir}/neoantigen/hla/*tsv | tail -n 1 | awk '{OFS=","}{print "HLA-"$2,"HLA-"$3,"HLA-"$4,"HLA-"$5,"HLA-"$6,"HLA-"$7}'|xargs echo -n` #IEDB recommed NetMHCpan. but NetMHCcons is also outperformed(PMID: 31204427) #8,9,10,11 docker run --rm -u $UID:$(id -g $UID) -v ${output_dir}/neoantigen/:/data griffithlab/pvactools:2.0.7 pvacseq run \ -e1 9,10,11 \ --iedb-install-directory /opt/iedb/ \ -t 10 \ -k \ --normal-sample-name NORMAL \ -p /data/${tumor}.phased.vcf.gz \ /data/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz \ ${tumor} \ $HLA \ NetMHCpan \ /data dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv # perl ${output_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length >>> } workflow call_neoantigen { Boolean run=true String tumor String? normal String input_dir String output_dir String ref String tumor_rmdup_bam String somatic_vcf String germline_vcf Boolean umi if (run) { call run_neoantigen { input: tumor=tumor, normal=normal, input_dir=input_dir, output_dir=output_dir, ref=ref, tumor_rmdup_bam=tumor_rmdup_bam, somatic_vcf=somatic_vcf, germline_vcf=germline_vcf, sample_type=if umi then 'c' else 't' } } }