pipeline/wdl/neoantigen.wdl

181 lines
6.7 KiB
Plaintext
Executable File

task run_neoantigen {
String tumor
String? normal
String input_dir
String output_dir
String ref
String tumor_rmdup_bam
String somatic_vcf
String germline_vcf
String sample_type
command <<<
if [ ! -d ${output_dir}/neoantigen/hla ];then
mkdir ${output_dir}/neoantigen/hla
fi
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*1.fq.gz
samtools bam2fq ${output_dir}/neoantigen/hla/fished_1.bam > ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq
rm ${output_dir}/neoantigen/hla/fished_1.bam
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_2.bam \
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*2.fq.gz
samtools bam2fq ${output_dir}/neoantigen/hla/fished_2.bam > ${output_dir}/neoantigen/hla/${normal}_2_fished.fastq
rm ${output_dir}/neoantigen/hla/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
-i ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq \
${output_dir}/neoantigen/hla/${normal}_2_fished.fastq --dna -v \
--prefix ${normal} -o ${output_dir}/neoantigen/hla/
#step1:vep annotation and variant filter
vep \
--input_file ${somatic_vcf} \
--output_file ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
--fasta /home/install/ref/hg19/ \
--dir /dataseq/jmdna/software/.vep/ \
--format vcf \
--vcf \
--symbol \
--terms SO \
--tsl \
--hgvs \
--offline \
--cache \
--plugin Downstream \
--plugin Wildtype \
--plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
vep \
--input_file ${germline_vcf} \
--output_file ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
--fasta /home/install/ref/hg19/ \
--offline --cache --dir /dataseq/jmdna/software/.vep/ \
--format vcf \
--vcf \
--symbol \
--terms SO \
--tsl \
--hgvs \
--plugin Downstream --plugin Wildtype --plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
filter_vep \
-i ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
-o ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
--format vcf \
--force_overwrite \
--filter "(Consequence != synonymous_variant) \
and (Consequence != intron_variant) \
and (Consequence != intergenic_variant) \
and (Consequence != 3_prime_UTR_variant) \
and (Consequence != upstream_gene_variant) \
and (Consequence != downstream_gene_variant) \
and (Consequence != 5_prime_UTR_variant)"
filter_vep \
-i ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
-o ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
--format vcf \
--force_overwrite \
--filter "(Consequence != synonymous_variant) \
and (Consequence != intron_variant) \
and (Consequence != intergenic_variant) \
and (Consequence != 3_prime_UTR_variant) \
and (Consequence != upstream_gene_variant) \
and (Consequence != downstream_gene_variant) \
and (Consequence != 5_prime_UTR_variant)"
filter_neoantigen.pl somatic \
${tumor} \
${sample_type} \
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf
filter_neoantigen.pl germline \
${tumor} \
${sample_type} \
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf
#step2 phasing variant
java -Xmx16g -jar $PICARD SortVcf \
I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict
java -Xmx16g -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar \
-T ReadBackedPhasing \
-R ${ref} \
-I ${tumor_rmdup_bam} \
--variant ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
-L ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
--cacheWindowSize 90 \
-o ${output_dir}/neoantigen/${tumor}.phased.vcf
##step3:bgzip and index the VCF
bgzip -c ${output_dir}/neoantigen/${tumor}.phased.vcf > ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
tabix -p vcf ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
bgzip -c ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf > ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
tabix -p vcf ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
HLA=`head -n 2 ${output_dir}/neoantigen/hla/*tsv | tail -n 1 | awk '{OFS=","}{print "HLA-"$2,"HLA-"$3,"HLA-"$4,"HLA-"$5,"HLA-"$6,"HLA-"$7}'|xargs echo -n`
#IEDB recommed NetMHCpan. but NetMHCcons is also outperformed(PMID: 31204427)
#8,9,10,11
docker run --rm -u $UID:$(id -g $UID) -v ${output_dir}/neoantigen/:/data griffithlab/pvactools:2.0.7 pvacseq run \
-e1 9,10,11 \
--iedb-install-directory /opt/iedb/ \
-t 10 \
-k \
--normal-sample-name NORMAL \
-p /data/${tumor}.phased.vcf.gz \
/data/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz \
${tumor} \
$HLA \
NetMHCpan \
/data
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
# perl ${output_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length
>>>
}
workflow call_neoantigen {
Boolean run=true
String tumor
String? normal
String input_dir
String output_dir
String ref
String tumor_rmdup_bam
String somatic_vcf
String germline_vcf
Boolean umi
if (run) {
call run_neoantigen {
input:
tumor=tumor,
normal=normal,
input_dir=input_dir,
output_dir=output_dir,
ref=ref,
tumor_rmdup_bam=tumor_rmdup_bam,
somatic_vcf=somatic_vcf,
germline_vcf=germline_vcf,
sample_type=if umi then 'c' else 't'
}
}
}