2023-12-29 10:11:01 +08:00
|
|
|
# neoantigen
|
2023-12-25 14:06:30 +08:00
|
|
|
|
2023-12-19 13:37:52 +08:00
|
|
|
task run_neoantigen {
|
|
|
|
|
String tumor
|
|
|
|
|
String? normal
|
|
|
|
|
String input_dir
|
|
|
|
|
String output_dir
|
|
|
|
|
String ref
|
|
|
|
|
String tumor_rmdup_bam
|
|
|
|
|
String somatic_vcf
|
|
|
|
|
String germline_vcf
|
|
|
|
|
String sample_type
|
|
|
|
|
|
|
|
|
|
command <<<
|
|
|
|
|
|
|
|
|
|
if [ ! -d ${output_dir}/neoantigen/hla ];then
|
2023-12-25 14:06:30 +08:00
|
|
|
mkdir -p ${output_dir}/neoantigen/hla
|
2023-12-19 13:37:52 +08:00
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
|
|
|
|
|
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*1.fq.gz
|
|
|
|
|
samtools bam2fq ${output_dir}/neoantigen/hla/fished_1.bam > ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq
|
|
|
|
|
rm ${output_dir}/neoantigen/hla/fished_1.bam
|
|
|
|
|
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_2.bam \
|
|
|
|
|
/dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${input_dir}/*_${normal}_*2.fq.gz
|
|
|
|
|
samtools bam2fq ${output_dir}/neoantigen/hla/fished_2.bam > ${output_dir}/neoantigen/hla/${normal}_2_fished.fastq
|
|
|
|
|
rm ${output_dir}/neoantigen/hla/fished_2.bam
|
|
|
|
|
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
|
|
|
|
|
-i ${output_dir}/neoantigen/hla/${normal}_1_fished.fastq \
|
|
|
|
|
${output_dir}/neoantigen/hla/${normal}_2_fished.fastq --dna -v \
|
|
|
|
|
--prefix ${normal} -o ${output_dir}/neoantigen/hla/
|
|
|
|
|
|
|
|
|
|
#step1:vep annotation and variant filter
|
|
|
|
|
vep \
|
|
|
|
|
--input_file ${somatic_vcf} \
|
|
|
|
|
--output_file ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
|
|
|
|
|
--fasta /home/install/ref/hg19/ \
|
|
|
|
|
--dir /dataseq/jmdna/software/.vep/ \
|
|
|
|
|
--format vcf \
|
|
|
|
|
--vcf \
|
|
|
|
|
--symbol \
|
|
|
|
|
--terms SO \
|
|
|
|
|
--tsl \
|
|
|
|
|
--hgvs \
|
|
|
|
|
--offline \
|
|
|
|
|
--cache \
|
|
|
|
|
--plugin Downstream \
|
|
|
|
|
--plugin Wildtype \
|
|
|
|
|
--plugin Frameshift \
|
|
|
|
|
--pick \
|
|
|
|
|
--transcript_version 100 \
|
|
|
|
|
--force_overwrite
|
|
|
|
|
|
|
|
|
|
vep \
|
|
|
|
|
--input_file ${germline_vcf} \
|
|
|
|
|
--output_file ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
|
|
|
|
|
--fasta /home/install/ref/hg19/ \
|
|
|
|
|
--offline --cache --dir /dataseq/jmdna/software/.vep/ \
|
|
|
|
|
--format vcf \
|
|
|
|
|
--vcf \
|
|
|
|
|
--symbol \
|
|
|
|
|
--terms SO \
|
|
|
|
|
--tsl \
|
|
|
|
|
--hgvs \
|
|
|
|
|
--plugin Downstream --plugin Wildtype --plugin Frameshift \
|
|
|
|
|
--pick \
|
|
|
|
|
--transcript_version 100 \
|
|
|
|
|
--force_overwrite
|
|
|
|
|
|
|
|
|
|
filter_vep \
|
|
|
|
|
-i ${output_dir}/neoantigen/${tumor}_somatic_vepanno.vcf \
|
|
|
|
|
-o ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
|
|
|
|
|
--format vcf \
|
|
|
|
|
--force_overwrite \
|
|
|
|
|
--filter "(Consequence != synonymous_variant) \
|
|
|
|
|
and (Consequence != intron_variant) \
|
|
|
|
|
and (Consequence != intergenic_variant) \
|
|
|
|
|
and (Consequence != 3_prime_UTR_variant) \
|
|
|
|
|
and (Consequence != upstream_gene_variant) \
|
|
|
|
|
and (Consequence != downstream_gene_variant) \
|
|
|
|
|
and (Consequence != 5_prime_UTR_variant)"
|
|
|
|
|
|
|
|
|
|
filter_vep \
|
|
|
|
|
-i ${output_dir}/neoantigen/${tumor}_germline_vepanno.vcf \
|
|
|
|
|
-o ${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
|
|
|
|
|
--format vcf \
|
|
|
|
|
--force_overwrite \
|
|
|
|
|
--filter "(Consequence != synonymous_variant) \
|
|
|
|
|
and (Consequence != intron_variant) \
|
|
|
|
|
and (Consequence != intergenic_variant) \
|
|
|
|
|
and (Consequence != 3_prime_UTR_variant) \
|
|
|
|
|
and (Consequence != upstream_gene_variant) \
|
|
|
|
|
and (Consequence != downstream_gene_variant) \
|
|
|
|
|
and (Consequence != 5_prime_UTR_variant)"
|
|
|
|
|
|
|
|
|
|
filter_neoantigen.pl somatic \
|
|
|
|
|
${tumor} \
|
|
|
|
|
${sample_type} \
|
|
|
|
|
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter.vcf \
|
|
|
|
|
${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf
|
|
|
|
|
|
|
|
|
|
filter_neoantigen.pl germline \
|
|
|
|
|
${tumor} \
|
|
|
|
|
${sample_type} \
|
|
|
|
|
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter.vcf \
|
|
|
|
|
${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf
|
|
|
|
|
|
|
|
|
|
#step2 phasing variant
|
|
|
|
|
|
|
|
|
|
java -Xmx16g -jar $PICARD SortVcf \
|
2023-12-21 10:22:54 +08:00
|
|
|
I=${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf \
|
|
|
|
|
I=${output_dir}/neoantigen/${tumor}_germline_vepanno_vepfilter_filter.vcf \
|
2023-12-19 13:37:52 +08:00
|
|
|
O=${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
|
|
|
|
|
SEQUENCE_DICTIONARY=/home/install/ref/hg19/hg19.dict
|
|
|
|
|
|
|
|
|
|
java -Xmx16g -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar \
|
|
|
|
|
-T ReadBackedPhasing \
|
|
|
|
|
-R ${ref} \
|
|
|
|
|
-I ${tumor_rmdup_bam} \
|
|
|
|
|
--variant ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
|
|
|
|
|
-L ${output_dir}/neoantigen/${tumor}_combined.sorted.vcf \
|
|
|
|
|
--cacheWindowSize 90 \
|
|
|
|
|
-o ${output_dir}/neoantigen/${tumor}.phased.vcf
|
|
|
|
|
|
|
|
|
|
##step3:bgzip and index the VCF
|
|
|
|
|
bgzip -c ${output_dir}/neoantigen/${tumor}.phased.vcf > ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
|
|
|
|
|
tabix -p vcf ${output_dir}/neoantigen/${tumor}.phased.vcf.gz
|
|
|
|
|
|
|
|
|
|
bgzip -c ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf > ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
|
|
|
|
|
tabix -p vcf ${output_dir}/neoantigen/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz
|
|
|
|
|
|
|
|
|
|
HLA=`head -n 2 ${output_dir}/neoantigen/hla/*tsv | tail -n 1 | awk '{OFS=","}{print "HLA-"$2,"HLA-"$3,"HLA-"$4,"HLA-"$5,"HLA-"$6,"HLA-"$7}'|xargs echo -n`
|
|
|
|
|
|
|
|
|
|
#IEDB recommed NetMHCpan. but NetMHCcons is also outperformed(PMID: 31204427)
|
|
|
|
|
#8,9,10,11
|
|
|
|
|
docker run --rm -u $UID:$(id -g $UID) -v ${output_dir}/neoantigen/:/data griffithlab/pvactools:2.0.7 pvacseq run \
|
|
|
|
|
-e1 9,10,11 \
|
|
|
|
|
--iedb-install-directory /opt/iedb/ \
|
|
|
|
|
-t 10 \
|
|
|
|
|
-k \
|
|
|
|
|
--normal-sample-name NORMAL \
|
|
|
|
|
-p /data/${tumor}.phased.vcf.gz \
|
|
|
|
|
/data/${tumor}_somatic_vepanno_vepfilter_filter.vcf.gz \
|
|
|
|
|
${tumor} \
|
|
|
|
|
$HLA \
|
|
|
|
|
NetMHCpan \
|
|
|
|
|
/data
|
|
|
|
|
|
|
|
|
|
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
|
2023-12-25 14:06:30 +08:00
|
|
|
netchop.pl ${output_dir} ${tumor}
|
2023-12-26 10:18:15 +08:00
|
|
|
|
2023-12-19 13:37:52 +08:00
|
|
|
>>>
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
workflow call_neoantigen {
|
|
|
|
|
|
|
|
|
|
Boolean run=true
|
|
|
|
|
|
|
|
|
|
String tumor
|
|
|
|
|
String? normal
|
|
|
|
|
String input_dir
|
|
|
|
|
String output_dir
|
|
|
|
|
String ref
|
|
|
|
|
String tumor_rmdup_bam
|
|
|
|
|
String somatic_vcf
|
|
|
|
|
String germline_vcf
|
|
|
|
|
Boolean umi
|
|
|
|
|
|
|
|
|
|
if (run) {
|
|
|
|
|
call run_neoantigen {
|
|
|
|
|
input:
|
|
|
|
|
tumor=tumor,
|
|
|
|
|
normal=normal,
|
|
|
|
|
input_dir=input_dir,
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
ref=ref,
|
|
|
|
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
|
|
|
|
somatic_vcf=somatic_vcf,
|
|
|
|
|
germline_vcf=germline_vcf,
|
|
|
|
|
sample_type=if umi then 'c' else 't'
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-12-25 14:06:30 +08:00
|
|
|
|
|
|
|
|
output {
|
|
|
|
|
String neoantigen_txt = "${output_dir}neoantigen/MHC_Class_I/neoantigen.txt"
|
|
|
|
|
}
|
2023-12-19 13:37:52 +08:00
|
|
|
}
|