pipeline/wdl/neoantigen.wdl

188 lines
6.8 KiB
Plaintext
Raw Normal View History

2023-12-29 10:11:01 +08:00
# neoantigen
2023-12-25 14:06:30 +08:00
2023-12-19 13:37:52 +08:00
task run_neoantigen {
String tumor
String? normal
String input_dir
String output_dir
String ref
String tumor_rmdup_bam
String somatic_vcf
String germline_vcf
String sample_type
command <<<
if [ ! -d ${output_dir}/neoantigen/hla ];then
2023-12-25 14:06:30 +08:00
mkdir -p ${output_dir}/neoantigen/hla
2023-12-19 13:37:52 +08:00
fi
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*1.fq.gz
samtools bam2fq ${output_dir}/neoantigen/hla/fished_1.bam > ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq
rm ${output_dir}/neoantigen/hla/fished_1.bam
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_2.bam \
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*2.fq.gz
samtools bam2fq ${output_dir}/neoantigen/hla/fished_2.bam > ${output_dir}/neoantigen/hla/${normal}_2_fished.fastq
rm ${output_dir}/neoantigen/hla/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
-i ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq \
${output_dir}/neoantigen/hla/${normal}_2_fished.fastq --dna -v \
--prefix ${normal} -o ${output_dir}/neoantigen/hla/
#step1:vep annotation and variant filter
vep \
--input_file ${somatic_vcf} \
--output_file ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
--fasta /home/install/ref/hg19/ \
--dir /dataseq/jmdna/software/.vep/ \
--format vcf \
--vcf \
--symbol \
--terms SO \
--tsl \
--hgvs \
--offline \
--cache \
--plugin Downstream \
--plugin Wildtype \
--plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
vep \
--input_file ${germline_vcf} \
--output_file ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
--fasta /home/install/ref/hg19/ \
--offline --cache --dir /dataseq/jmdna/software/.vep/ \
--format vcf \
--vcf \
--symbol \
--terms SO \
--tsl \
--hgvs \
--plugin Downstream --plugin Wildtype --plugin Frameshift \
--pick \
--transcript_version 100 \
--force_overwrite
filter_vep \
-i ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
-o ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
--format vcf \
--force_overwrite \
--filter "(Consequence != synonymous_variant) \
and (Consequence != intron_variant) \
and (Consequence != intergenic_variant) \
and (Consequence != 3_prime_UTR_variant) \
and (Consequence != upstream_gene_variant) \
and (Consequence != downstream_gene_variant) \
and (Consequence != 5_prime_UTR_variant)"
filter_vep \
-i ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
-o ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
--format vcf \
--force_overwrite \
--filter "(Consequence != synonymous_variant) \
and (Consequence != intron_variant) \
and (Consequence != intergenic_variant) \
and (Consequence != 3_prime_UTR_variant) \
and (Consequence != upstream_gene_variant) \
and (Consequence != downstream_gene_variant) \
and (Consequence != 5_prime_UTR_variant)"
filter_neoantigen.pl somatic \
${tumor} \
${sample_type} \
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf
filter_neoantigen.pl germline \
${tumor} \
${sample_type} \
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf
#step2 phasing variant
java -Xmx16g -jar $PICARD SortVcf \
2023-12-21 10:22:54 +08:00
I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
2023-12-19 13:37:52 +08:00
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict
java -Xmx16g -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar \
-T ReadBackedPhasing \
-R ${ref} \
-I ${tumor_rmdup_bam} \
--variant ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
-L ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
--cacheWindowSize 90 \
-o ${output_dir}/neoantigen/${tumor}.phased.vcf
##step3:bgzip and index the VCF
bgzip -c ${output_dir}/neoantigen/${tumor}.phased.vcf > ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
tabix -p vcf ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
bgzip -c ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf > ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
tabix -p vcf ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
HLA=`head -n 2 ${output_dir}/neoantigen/hla/*tsv | tail -n 1 | awk '{OFS=","}{print "HLA-"$2,"HLA-"$3,"HLA-"$4,"HLA-"$5,"HLA-"$6,"HLA-"$7}'|xargs echo -n`
#IEDB recommed NetMHCpan. but NetMHCcons is also outperformed(PMID: 31204427)
#8,9,10,11
docker run --rm -u $UID:$(id -g $UID) -v ${output_dir}/neoantigen/:/data griffithlab/pvactools:2.0.7 pvacseq run \
-e1 9,10,11 \
--iedb-install-directory /opt/iedb/ \
-t 10 \
-k \
--normal-sample-name NORMAL \
-p /data/${tumor}.phased.vcf.gz \
/data/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz \
${tumor} \
$HLA \
NetMHCpan \
/data
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
2023-12-25 14:06:30 +08:00
netchop.pl ${output_dir} ${tumor}
2023-12-26 10:18:15 +08:00
2023-12-19 13:37:52 +08:00
>>>
}
workflow call_neoantigen {
Boolean run=true
String tumor
String? normal
String input_dir
String output_dir
String ref
String tumor_rmdup_bam
String somatic_vcf
String germline_vcf
Boolean umi
if (run) {
call run_neoantigen {
input:
tumor=tumor,
normal=normal,
input_dir=input_dir,
output_dir=output_dir,
ref=ref,
tumor_rmdup_bam=tumor_rmdup_bam,
somatic_vcf=somatic_vcf,
germline_vcf=germline_vcf,
sample_type=if umi then 'c' else 't'
}
}
2023-12-25 14:06:30 +08:00
output {
String neoantigen_txt = "${output_dir}neoantigen/MHC_Class_I/neoantigen.txt"
}
2023-12-19 13:37:52 +08:00
}