#create project directory task create_dir { String workdir command <<< if [ ! -d ${workdir}];then mkdir -p ${workdir}/log fi >>> } task mutation_calling { String name String tumor_pileup String normal_pileup String outputDir command <<< if [ ! -d ${outputDir}/mutation ];then mkdir ${outputDir}/mutation fi java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \ --output-snp ${outputDir}/mutation/${name}.snp.vcf \ --output-indel ${outputDir}/mutation/${name}.indel.vcf \ --min-var-freq 0.01 \ --min-freq-for-hom 0.9 \ --somatic-p-value 0.05 \ --output-vcf 1 \ --min-avg-qual 20 \ --min-coverage-normal 10 \ --min-coverage-tumor 30 \ --min-reads2 3 java -jar $VARSCAN processSomatic \ ${outputDir}/mutation/${name}.snp.vcf \ --min-tumor-freq 0.01 \ --max-normal-freq 0.01 \ --p-value 0.05 java -jar $VARSCAN processSomatic \ ${outputDir}/mutation/${name}.indel.vcf \ --min-tumor-freq 0.01 \ --max-normal-freq 0.01 \ --p-value 0.05 java -jar $GATK MergeVcfs \ -I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \ -I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \ -O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \ -D /dataseq/jmdna/database/genome/hg19/hg19.dict java -jar $GATK MergeVcfs \ -I ${outputDir}/mutation/${name}.snp.Germline.vcf \ -I ${outputDir}/mutation/${name}.indel.Germline.vcf \ -O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \ -D /dataseq/jmdna/database/genome/hg19/hg19.dict java -jar $GATK MergeVcfs \ -I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \ -I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \ -O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \ -D /dataseq/jmdna/database/genome/hg19/hg19.dict >>> output { String somatic_hc_vcf = "${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf" String germline_vcf="${outputDir}/mutation/${name}.snp.indel.Germline.vcf" String loh_hc_vcf="${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf" } } task annovar { String name String outputDir String ref String somatic_hc_vcf String germline_vcf String loh_hc_vcf String rmdupBam command <<< if [ ! -d ${outputDir}/mutation ];then mkdir ${outputDir}/mutation fi table_annovar.pl \ ${somatic_hc_vcf} \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${name}.snp.indel.Somatic.anno table_annovar.pl \ ${germline_vcf} \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${name}.snp.indel.Germline.anno table_annovar.pl \ ${loh_hc_vcf} \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${name}.snp.indel.LOH.anno java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ -R ${ref} \ -I ${rmdupBam} \ -V ${somatic_hc_vcf} \ -o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ --annotation TandemRepeatAnnotator grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ |cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \ > ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt >>> output { String somatic_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt" String germline_anno = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno.txt" String somatic_all_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt" } } task tmb { String codesDir String name String outputDir String somatic_anno command <<< perl ${codesDir}/tmb.pl ${outputDir} ${name} >>> output { String tmb="${outputDir}/mutation/${name}.tmb.txt" } } task fusion { String name String ref String codesDir String outputDir String rmdupBam String cancer String project String tumor_bamdst_depth command <<< if [ ! -d ${outputDir}/fusion ];then mkdir ${outputDir}/fusion fi # Extract the discordant paired-end alignments. samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${name}.discordants.bam # Extract the split-read alignments samtools view -h ${rmdupBam} \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | samtools view -Sb - \ > ${outputDir}/fusion/${name}.splitters.bam lumpyexpress \ -B ${rmdupBam} \ -S ${outputDir}/fusion/${name}.splitters.bam \ -D ${outputDir}/fusion/${name}.discordants.bam \ -o ${outputDir}/fusion/${name}.fusion.vcf perl ${codesDir}/fusion.filter.pl ${outputDir}/fusion/${name}.fusion.vcf ${outputDir}/fusion/${name}.fusion.filter.vcf svtyper \ -B ${rmdupBam} \ -i ${outputDir}/fusion/${name}.fusion.filter.vcf \ -T ${ref} \ -o ${outputDir}/fusion/${name}.fusion.gt.vcf table_annovar.pl \ ${outputDir}/fusion/${name}.fusion.gt.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -operation g \ --outfile ${outputDir}/fusion/${name}.fusion perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name} perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer} >>> output { String fusion = "${outputDir}/fusion/${name}.fusion.pos.txt" } } task tumor_content { String name String tumor_pileup String normal_pileup String ref String outputDir String codesDir String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" command <<< sequenza-utils bam2seqz \ -p -gc ${gc_wiggle} \ -F ${ref} \ -n ${normal_pileup} \ -t ${tumor_pileup} \ | gzip > ${outputDir}/qc/target_${name}.200base.seqz.gz sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz \ | gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz Rscript ${codesDir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!" >>> output { String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf" } } task cnvkit { String tumor String normal String tumor_rmdupBam String normal_rmdupBam String ref String bed String outputDir String cancer String codesDir String project String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed" String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt" command <<< if [ ! -d ${outputDir}/cnvkit ];then mkdir ${outputDir}/cnvkit fi cnvkit.py batch \ ${tumor_rmdupBam} \ --normal ${normal_rmdupBam} \ --targets ${bed} \ --fasta ${ref} \ --access ${accessBed} \ --output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \ --annotate ${annotateGene} \ --drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit cnvkit.py scatter \ ${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \ --y-max 3 --y-min -3 \ --title ${tumor}.cns \ -o ${outputDir}/cnvkit/${tumor}.cnv.png perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer} >>> output { String cns = "${outputDir}/cnvkit/${tumor}.rmdup.cns" String png = "${outputDir}/cnvkit/${tumor}.cnv.png" } } task chemo { String codesDir String outputDir String project String normal String rmdupBam command <<< if [ ! -d ${outputDir}/chemo ];then mkdir ${outputDir}/chemo fi ${codesDir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} >>> } task msi { String bed String name String outputDir String tumor_rmdupBam String normal_rmdupBam command <<< if [ ! -d ${outputDir}/msi ];then mkdir ${outputDir}/msi fi msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \ -n ${normal_rmdupBam} \ -t ${tumor_rmdupBam} \ -e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi >>> output { String target="${outputDir}/MSI/${name}.msi" } } task hla { String inputDir String outputDir String normal command <<< if [ ! -d ${outputDir}/neoantigen ];then mkdir -p ${outputDir}/neoantigen/HLA fi razers3 -tc 10 -i 95 -m 1 -dr 0 \ -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ ${inputDir}/*_${normal}_*1.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_1.bam razers3 -tc 10 -i 95 -m 1 -dr 0 \ -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ ${inputDir}/*_${normal}_*2.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \ -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq \ --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/ >>> output { String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv" } } task neoantigen { String codesDir String outputDir String name String normal String somatic_hc_vcf String hla command <<< sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${outputDir} ${name} ${name} ${codesDir} >>> output { String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt" } } task dealwithsnvindel { String codesDir String name String somatic_all_anno String germline_anno String project String outputDir String cancer command <<< perl ${codesDir}/pick_variant.pl ${outputDir} ${name} perl ${codesDir}/pick_mut_splice_promoter.pl ${codesDir} ${name} ${outputDir} ${project} perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${name} ${outputDir} ${project} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${name} ${outputDir} ${project} ${cancer} >>> output { String snvindel_filtered= "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt" String germline_filtered = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt" } } task hereditary { String codesDir String name String outputDir String project String germline_filtered command <<< ${codesDir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${name} >>> output { String hereditary_pre = "${outputDir}/hereditary/${name}.hereditary.pre.txt" } } task conpair { String codesDir String name String tumor_rmdupBam String normal_rmdupBam String outputDir String ref command <<< if [ ! -d ${outputDir}/conpair ];then mkdir -p ${outputDir}/conpair fi python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${tumor_rmdupBam} \ -O ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${normal_rmdupBam} \ -O ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar sed -i 's/^chr//g' ${outputDir}/conpair/${name}.tumor.gatk.mpileup sed -i 's/^chr//g' ${outputDir}/conpair/${name}.normal.gatk.mpileup python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \ -H \ -T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -O ${outputDir}/conpair/${name}_concordance.txt python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \ -T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -O ${outputDir}/conpair/${name}_contamination.txt >>> output { String concordance = "${outputDir}/conpair/${name}_concordance.txt" String contamination = "${outputDir}/conpair/${name}_contamination.txt" } } task mmr { String codesDir String name String outputDir String germline_filtered command <<< if [ ! -d ${outputDir}/MMR ];then mkdir -p ${outputDir}/MMR fi perl ${codesDir}/mmr_controlsample.pl ${outputDir} ${name} >>> output { String mmr = "${outputDir}/MMR/${name}_mmr.txt" } } task hrr { String codesDir String name String outputDir String germline_filtered command <<< if [ ! -d ${outputDir}/HRR ];then mkdir -p ${outputDir}/HRR fi perl ${codesDir}/hrr_controlsample_tissue.pl ${outputDir} ${name} >>> output { String hrr = "${outputDir}/HRR/${name}_hrr.txt" } } task hotspot { String name String outputDir String ref String rmdupBam String codesDir command <<< if [ ! -d ${outputDir}/mutation/hotspot/ ];then mkdir -p ${outputDir}/mutation/hotspot/ fi samtools mpileup -Bq 20 -Q 20 \ -f ${ref} \ -l ${codesDir}/hotspot.bed \ -o ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ ${rmdupBam} java -jar $VARSCAN mpileup2cns \ ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ --min-var-freq 0.005 \ --min-avg-qual 20 \ --output-vcf 1 \ --variants 1 \ --p-value 0.99 \ --min-reads2 2 \ --strand-filter 0 \ > ${outputDir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf java -jar $VARSCAN mpileup2cns \ ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ --min-var-freq 0.01 \ --min-avg-qual 20 \ --output-vcf 1 \ --variants 1 \ --p-value 0.05 \ --min-reads2 3 \ --strand-filter 1 \ > ${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf perl ${codesDir}/hotspot.hvl.pl ${outputDir} ${name} if [ -e "${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then table_annovar.pl \ ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -argument '-hgvs' \ -operation g \ --outfile ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.anno perl ${codesDir}/hotspot.filter.pl ${outputDir} ${name} fi >>> output { String hotspot = "${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf" } } task auto_report { String cancer String codesDir String outputDir String normal String tumor String cnv_cns String cnv_png String fusion_pos String snvindel_filtered String tmb String mmr String hrr String hereditary_pre command <<< if [ ! -d ${outputDir}/report ];then mkdir -p ${outputDir}/report fi perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor} perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor} python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} ln -s ${cnv_cns} ${outputDir}/report/ ln -s ${cnv_png} ${outputDir}/report/ ln -s ${fusion_pos} ${outputDir}/report/ ln -s ${snvindel_filtered} ${outputDir}/report/ ln -s ${tmb} ${outputDir}/report/ ln -s ${mmr} ${outputDir}/report/ ln -s ${hrr} ${outputDir}/report/ ln -s ${hereditary_pre} ${outputDir}/report/ >>> }