pipeline/wdl/task.wdl

600 lines
19 KiB
Plaintext
Raw Normal View History

2023-08-25 10:06:31 +08:00
#create project directory
task create_dir {
String workdir
command <<<
if [ ! -d ${workdir}];then
mkdir -p ${workdir}/log
fi
>>>
}
task mutation_calling {
String name
String tumor_pileup
String normal_pileup
String outputDir
command <<<
if [ ! -d ${outputDir}/mutation ];then
mkdir ${outputDir}/mutation
fi
java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \
--output-snp ${outputDir}/mutation/${name}.snp.vcf \
--output-indel ${outputDir}/mutation/${name}.indel.vcf \
--min-var-freq 0.01 \
--min-freq-for-hom 0.9 \
--somatic-p-value 0.05 \
--output-vcf 1 \
--min-avg-qual 20 \
--min-coverage-normal 10 \
2023-08-29 17:46:31 +08:00
--min-coverage-tumor 30 \
--min-reads2 3
2023-08-25 10:06:31 +08:00
java -jar $VARSCAN processSomatic \
${outputDir}/mutation/${name}.snp.vcf \
--min-tumor-freq 0.01 \
--max-normal-freq 0.01 \
--p-value 0.05
java -jar $VARSCAN processSomatic \
${outputDir}/mutation/${name}.indel.vcf \
--min-tumor-freq 0.01 \
--max-normal-freq 0.01 \
--p-value 0.05
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \
-I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.Germline.vcf \
-I ${outputDir}/mutation/${name}.indel.Germline.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \
-I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
>>>
output {
String somatic_hc_vcf = "${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf"
String germline_vcf="${outputDir}/mutation/${name}.snp.indel.Germline.vcf"
String loh_hc_vcf="${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf"
}
}
task annovar {
String name
String outputDir
String ref
String somatic_hc_vcf
String germline_vcf
String loh_hc_vcf
String rmdupBam
command <<<
if [ ! -d ${outputDir}/mutation ];then
mkdir ${outputDir}/mutation
fi
table_annovar.pl \
${somatic_hc_vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.Somatic.anno
table_annovar.pl \
${germline_vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.Germline.anno
table_annovar.pl \
${loh_hc_vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.LOH.anno
java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \
-R ${ref} \
-I ${rmdupBam} \
-V ${somatic_hc_vcf} \
-o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
--annotation TandemRepeatAnnotator
grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
|cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \
> ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt
>>>
output {
String somatic_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt"
String germline_anno = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno.txt"
String somatic_all_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt"
}
}
task tmb {
String codesDir
String name
String outputDir
String somatic_anno
command <<<
perl ${codesDir}/tmb.pl ${outputDir} ${name}
>>>
output {
String tmb="${outputDir}/mutation/${name}.tmb.txt"
}
}
task fusion {
String name
String ref
String codesDir
String outputDir
String rmdupBam
String cancer
String project
2023-08-29 10:31:06 +08:00
String tumor_bamdst_depth
2023-08-25 10:06:31 +08:00
command <<<
if [ ! -d ${outputDir}/fusion ];then
mkdir ${outputDir}/fusion
fi
# Extract the discordant paired-end alignments.
samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${name}.discordants.bam
# Extract the split-read alignments
samtools view -h ${rmdupBam} \
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
| samtools view -Sb - \
> ${outputDir}/fusion/${name}.splitters.bam
lumpyexpress \
2023-08-29 10:31:06 +08:00
-B ${rmdupBam} \
2023-08-25 10:06:31 +08:00
-S ${outputDir}/fusion/${name}.splitters.bam \
-D ${outputDir}/fusion/${name}.discordants.bam \
-o ${outputDir}/fusion/${name}.fusion.vcf
perl ${codesDir}/fusion.filter.pl ${outputDir}/fusion/${name}.fusion.vcf ${outputDir}/fusion/${name}.fusion.filter.vcf
svtyper \
-B ${rmdupBam} \
-i ${outputDir}/fusion/${name}.fusion.filter.vcf \
-T ${ref} \
-o ${outputDir}/fusion/${name}.fusion.gt.vcf
table_annovar.pl \
${outputDir}/fusion/${name}.fusion.gt.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-operation g \
--outfile ${outputDir}/fusion/${name}.fusion
2023-08-29 10:31:06 +08:00
perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name}
2023-08-25 10:06:31 +08:00
perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer}
>>>
output {
String fusion = "${outputDir}/fusion/${name}.fusion.pos.txt"
}
}
task tumor_content {
String name
String tumor_pileup
String normal_pileup
String ref
String outputDir
String codesDir
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
command <<<
sequenza-utils bam2seqz \
-p -gc ${gc_wiggle} \
-F ${ref} \
-n ${normal_pileup} \
-t ${tumor_pileup} \
| gzip > ${outputDir}/qc/target_${name}.200base.seqz.gz
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz \
| gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz
Rscript ${codesDir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!"
>>>
output {
String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf"
}
}
task cnvkit {
String tumor
String normal
String tumor_rmdupBam
String normal_rmdupBam
String ref
String bed
String outputDir
String cancer
String codesDir
String project
String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed"
String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt"
command <<<
if [ ! -d ${outputDir}/cnvkit ];then
mkdir ${outputDir}/cnvkit
fi
cnvkit.py batch \
${tumor_rmdupBam} \
--normal ${normal_rmdupBam} \
--targets ${bed} \
--fasta ${ref} \
--access ${accessBed} \
--output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \
--annotate ${annotateGene} \
--drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit
cnvkit.py scatter \
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \
--y-max 3 --y-min -3 \
--title ${tumor}.cns \
-o ${outputDir}/cnvkit/${tumor}.cnv.png
perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn
perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer}
>>>
output {
2023-08-29 17:46:31 +08:00
String cns = "${outputDir}/cnvkit/${tumor}.rmdup.cns"
String png = "${outputDir}/cnvkit/${tumor}.cnv.png"
2023-08-25 10:06:31 +08:00
}
}
task chemo {
String codesDir
String outputDir
String project
String normal
String rmdupBam
command <<<
if [ ! -d ${outputDir}/chemo ];then
mkdir ${outputDir}/chemo
fi
${codesDir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal}
>>>
}
task msi {
String bed
String name
String outputDir
String tumor_rmdupBam
String normal_rmdupBam
command <<<
2023-08-29 10:31:06 +08:00
if [ ! -d ${outputDir}/msi ];then
mkdir ${outputDir}/msi
2023-08-25 10:06:31 +08:00
fi
msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \
2023-08-29 10:31:06 +08:00
-n ${normal_rmdupBam} \
-t ${tumor_rmdupBam} \
-e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi
2023-08-25 10:06:31 +08:00
>>>
output {
String target="${outputDir}/MSI/${name}.msi"
}
}
task hla {
String inputDir
String outputDir
String normal
command <<<
if [ ! -d ${outputDir}/neoantigen ];then
mkdir -p ${outputDir}/neoantigen/HLA
fi
razers3 -tc 10 -i 95 -m 1 -dr 0 \
-o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
${inputDir}/*_${normal}_*1.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_1.bam
razers3 -tc 10 -i 95 -m 1 -dr 0 \
-o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
${inputDir}/*_${normal}_*2.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
-i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq \
--dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/
>>>
output {
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv"
}
}
task neoantigen {
String codesDir
String outputDir
String name
String normal
String somatic_hc_vcf
String hla
command <<<
sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${outputDir} ${name} ${name} ${codesDir}
>>>
output {
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt"
}
2023-08-29 10:31:06 +08:00
}
2023-08-29 17:46:31 +08:00
task dealwithsnvindel {
String codesDir
String name
String somatic_all_anno
String germline_anno
String project
String outputDir
String cancer
command <<<
perl ${codesDir}/pick_variant.pl ${outputDir} ${name}
perl ${codesDir}/pick_mut_splice_promoter.pl ${codesDir} ${name} ${outputDir} ${project}
perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
>>>
output {
String snvindel_filtered= "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt"
String germline_filtered = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
}
}
task hereditary {
String codesDir
String name
String outputDir
String project
String germline_filtered
command <<<
${codesDir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${name}
>>>
output {
String hereditary_pre = "${outputDir}/hereditary/${name}.hereditary.pre.txt"
}
}
task conpair {
String codesDir
String name
String tumor_rmdupBam
String normal_rmdupBam
String outputDir
String ref
command <<<
if [ ! -d ${outputDir}/conpair ];then
mkdir -p ${outputDir}/conpair
fi
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${tumor_rmdupBam} \
-O ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${normal_rmdupBam} \
-O ${outputDir}/conpair/${name}.normal.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.tumor.gatk.mpileup
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.normal.gatk.mpileup
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
-H \
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \
-O ${outputDir}/conpair/${name}_concordance.txt
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \
-O ${outputDir}/conpair/${name}_contamination.txt
>>>
output {
String concordance = "${outputDir}/conpair/${name}_concordance.txt"
String contamination = "${outputDir}/conpair/${name}_contamination.txt"
}
}
task mmr {
String codesDir
String name
String outputDir
String germline_filtered
command <<<
if [ ! -d ${outputDir}/MMR ];then
mkdir -p ${outputDir}/MMR
fi
perl ${codesDir}/mmr_controlsample.pl ${outputDir} ${name}
>>>
output {
String mmr = "${outputDir}/MMR/${name}_mmr.txt"
}
}
task hrr {
String codesDir
String name
String outputDir
String germline_filtered
command <<<
if [ ! -d ${outputDir}/HRR ];then
mkdir -p ${outputDir}/HRR
fi
perl ${codesDir}/hrr_controlsample_tissue.pl ${outputDir} ${name}
>>>
output {
String hrr = "${outputDir}/HRR/${name}_hrr.txt"
}
}
task hotspot {
String name
String outputDir
String ref
String rmdupBam
String codesDir
command <<<
if [ ! -d ${outputDir}/mutation/hotspot/ ];then
mkdir -p ${outputDir}/mutation/hotspot/
fi
samtools mpileup -Bq 20 -Q 20 \
-f ${ref} \
-l ${codesDir}/hotspot.bed \
-o ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
${rmdupBam}
java -jar $VARSCAN mpileup2cns \
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
--min-var-freq 0.005 \
--min-avg-qual 20 \
--output-vcf 1 \
--variants 1 \
--p-value 0.99 \
--min-reads2 2 \
--strand-filter 0 \
> ${outputDir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf
java -jar $VARSCAN mpileup2cns \
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
--min-var-freq 0.01 \
--min-avg-qual 20 \
--output-vcf 1 \
--variants 1 \
--p-value 0.05 \
--min-reads2 3 \
--strand-filter 1 \
> ${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf
perl ${codesDir}/hotspot.hvl.pl ${outputDir} ${name}
if [ -e "${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then
table_annovar.pl \
${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-argument '-hgvs' \
-operation g \
--outfile ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.anno
perl ${codesDir}/hotspot.filter.pl ${outputDir} ${name}
fi
>>>
output {
String hotspot = "${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf"
}
}
task auto_report {
String cancer
String codesDir
String outputDir
String normal
String tumor
String cnv_cns
String cnv_png
String fusion_pos
String snvindel_filtered
String tmb
String mmr
String hrr
String hereditary_pre
command <<<
if [ ! -d ${outputDir}/report ];then
mkdir -p ${outputDir}/report
fi
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
ln -s ${cnv_cns} ${outputDir}/report/
ln -s ${cnv_png} ${outputDir}/report/
ln -s ${fusion_pos} ${outputDir}/report/
ln -s ${snvindel_filtered} ${outputDir}/report/
ln -s ${tmb} ${outputDir}/report/
ln -s ${mmr} ${outputDir}/report/
ln -s ${hrr} ${outputDir}/report/
ln -s ${hereditary_pre} ${outputDir}/report/
>>>
}