workflow pancancer_controlsample{ String normal String tumor String cancer String project = "650gene" String bed = "/dataseq/jmdna/database/bed/650.bed" String bed_plus = "/dataseq/jmdna/database/bed/650plus50bp.bed" String name String outputDir String need_TMB_test = "yes" String need_MSI_test = "yes" String need_neoantigen_test = "yes" String inputDir String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa" String codes_dir = "/dataseq/jmdna/codes/pancancer_controlsample" String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed" String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt" String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" #创建目录 call create_dir{ input: outputDir=outputDir, need_neoantigen_test=need_neoantigen_test, need_MSI_test=need_MSI_test } scatter (name in [normal,tumor]){ call qc{ input: name=name, inputDir=inputDir, outputDir=outputDir } call alignment_bwa{ input: name=name, outputDir=outputDir, ref=ref, read1=qc.outputFile[0], read2=qc.outputFile[1] } call markduplicates{ input: name=name, outputDir=outputDir, ref=ref, sortedBam=alignment_bwa.sorted } call generater_mpileup{ input: name=name, ref=ref, bed=bed_plus, outputDir=outputDir, rmdupBam=markduplicates.rmdup } } call mutation_calling{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, pileup=generater_mpileup.pileup } call annovar{ input: tumor=tumor, normal=normal, outputDir=outputDir, ref=ref, vcf=mutation_calling.vcf } scatter (name in [normal,tumor]){ call qc_2{ input: ref=ref, bed=bed, codes_dir=codes_dir, name=name, tumor=tumor, normal=normal, outputDir=outputDir, anno=annovar.anno } } call dealwithsnvindel{ input: codes_dir=codes_dir, project=project, outputDir=outputDir, tumor=tumor, cancer=cancer, anno=annovar.anno } call fusion{ input: ref=ref, codes_dir=codes_dir, project=project, tumor=tumor, outputDir=outputDir, cancer=cancer, rmdupBam=markduplicates.rmdup } if (need_TMB_test=="yes"){ call TMB{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, anno=annovar.anno, concordance=conpair.concordance, snv_result=dealwithsnvindel.snv, cnv_result=cnvkit.cnv, fusion_result=fusion.fusion } } if (need_MSI_test=="yes"){ call MSI{ input: bed=bed, tumor=tumor, normal=normal, outputDir=outputDir, rmdupBam=markduplicates.rmdup } } call MMR{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, snv=dealwithsnvindel.snv } call HRR{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, snv=dealwithsnvindel.snv } call chemoTherapy{ input: codes_dir=codes_dir, normal=normal, tumor=tumor, outputDir=outputDir, ref=ref, project=project, rmdupBam=markduplicates.rmdup } call hereditary{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, project=project, snv=dealwithsnvindel.snv } call conpair{ input: name=name, codes_dir=codes_dir, tumor=tumor, normal=normal, ref=ref, outputDir=outputDir, rmdupBam=markduplicates.rmdup } call cnvkit{ input: name=name, normal=normal, tumor=tumor, ref=ref, bed=bed, cancer=cancer, project=project, codes_dir=codes_dir, outputDir=outputDir, rmdupBam=markduplicates.rmdup, accessBed=accessBed, annotateGene=annotateGene, purity=tumor_content.purity } call tumor_content{ input: name=name, ref=ref, tumor=tumor, normal=normal, outputDir=outputDir, gc_wiggle=gc_wiggle, codes_dir=codes_dir, pileup=generater_mpileup.pileup } if (need_neoantigen_test=="yes"){ call HLA{ input: inputDir=inputDir, outputDir=outputDir, normal=normal, newdir=create_dir.newdir } call neoantigen{ input: codes_dir=codes_dir, outputDir=outputDir, name=name, normal=normal, tumor=tumor, hla=HLA.hla, vcf=mutation_calling.vcf } } call auto_report{ input: cancer=cancer, codes_dir=codes_dir, outputDir=outputDir, name=name, normal=normal, tumor=tumor, qc_2=qc_2.qc, concordance=conpair.concordance, snv_result=dealwithsnvindel.snv, cnv_result=cnvkit.cnv, fusion_result=fusion.fusion, tmb=TMB.tmb } call hotspot{ input: tumor=tumor, outputDir=outputDir, ref=ref, rmdupBam=markduplicates.rmdup, codes_dir=codes_dir } } #create project directory task create_dir{ String outputDir String need_MSI_test String need_neoantigen_test command <<< #创建目录 if [ ! -d ${outputDir} ];then mkdir ${outputDir} fi #创建qc目录 if [ ! -d ${outputDir}/qc ];then mkdir ${outputDir}/qc fi #创建alignment目录 if [ ! -d ${outputDir}/alignment ];then mkdir ${outputDir}/alignment fi #创建mutation目录 if [ ! -d ${outputDir}/mutation ];then mkdir ${outputDir}/mutation fi #创建cnv目录 if [ ! -d ${outputDir}/cnvkit ];then mkdir ${outputDir}/cnvkit fi #创建report目录 if [ ! -d ${outputDir}/report ];then mkdir -p ${outputDir}/report fi #创建chemo目录 if [ ! -d ${outputDir}/chemo ];then mkdir ${outputDir}/chemo fi #创建fusion目录 if [ ! -d ${outputDir}/fusion ];then mkdir ${outputDir}/fusion fi #创建MSI目录 if [ ${need_MSI_test} = "yes" ] && [ ! -d ${outputDir}/MSI ];then mkdir ${outputDir}/MSI fi if [ ${need_neoantigen_test} = "yes" ] && [ ! -d ${outputDir}/neoantigen/HLA ];then mkdir -p ${outputDir}/neoantigen/HLA fi #创建MMR目录 if [ ! -d ${outputDir}/MMR ];then mkdir ${outputDir}/MMR fi ##创建HRR目录 if [ ! -d ${outputDir}/HRR ];then mkdir ${outputDir}/HRR fi #创建HPD目录 if [ ! -d ${outputDir}/HPD ];then mkdir ${outputDir}/HPD fi >>> output{ String newdir = "${outputDir}/report" } } #generator raw fastq to clean fastq task qc{ String name String inputDir String outputDir command <<< echo processing raw reads with fastp fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \ -I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \ -w 10 \ --correction \ --overlap_len_require 10 \ -j ${outputDir}/qc/${name}.json \ -h ${outputDir}/qc/${name}.html --report_title ${name} -e 20 # --adapter_sequence AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA \ # --adapter_sequence_r2 AAGTCGGATCGTAGCCATGTCGTTCTGTGAGCCAAGGAGTTG \ >>> output{ Array[String] outputFile = [ "${outputDir}/qc/${name}_clean_R1.fq.gz ", "${outputDir}/qc/${name}_clean_R2.fq.gz", "${outputDir}/qc/${name}.json", "${outputDir}/qc/${name}.html" ] } } #alignment clean fastq to reference task alignment_bwa{ String name String ref String outputDir String read1 String read2 command<<< bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} \ ${outputDir}/qc/${name}_clean_R1.fq.gz ${outputDir}/qc/${name}_clean_R2.fq.gz | \ samtools view -@ 5 -bh -o - | samtools sort -@ 5 -o ${outputDir}/alignment/${name}.sorted.bam samtools index ${outputDir}/alignment/${name}.sorted.bam >>> output{ String sorted = "${outputDir}/alignment/${name}.sorted.bam" } } #remove PCR duplicates task markduplicates{ String name String ref String outputDir String sortedBam command<<< java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \ I=${outputDir}/alignment/${name}.sorted.bam \ O=${outputDir}/alignment/${name}.rmdup.bam \ CREATE_INDEX=true \ M=${outputDir}/alignment/${name}.rmdup.metrics.txt \ R=${ref} >>> output{ String rmdup = "${outputDir}/alignment/${name}.rmdup.bam" } } # generater mpileup file task generater_mpileup{ String name String ref String bed String outputDir String rmdupBam command<<< samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \ ${outputDir}/alignment/${name}.rmdup.bam -o ${outputDir}/alignment/${name}.pileup >>> output{ String pileup = "${outputDir}/alignment/${name}.pileup" } } task mutation_calling{ String codes_dir String tumor Array[String] pileup String outputDir command<<< sh ${codes_dir}/mpileup2cns.sh ${sep=" " pileup} ${outputDir} ${tumor} java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.Somatic.hc.vcf -I ${outputDir}/mutation/${tumor}.indel.Somatic.hc.vcf \ -O ${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.Germline.vcf -I ${outputDir}/mutation/${tumor}.indel.Germline.vcf \ -O ${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.LOH.hc.vcf -I ${outputDir}/mutation/${tumor}.indel.LOH.hc.vcf \ -O ${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict >>> output{ Array[String] vcf = [ "${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf", "${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf", "${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf" ] } } task annovar{ String tumor String normal String outputDir String ref Array[String] vcf command<<< table_annovar.pl \ ${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno table_annovar.pl \ ${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.Germline.anno table_annovar.pl \ ${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.LOH.anno java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ -R ${ref} \ -I ${outputDir}/alignment/${tumor}.rmdup.bam \ -V ${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf \ -o ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf \ --annotation TandemRepeatAnnotator # -nt 10 grep -v "^##" ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno.hg19_multianno.txt - >${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt >>> output{ Array[String] anno = [ "${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno.hg19_multianno.txt", "${outputDir}/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno.txt", "${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt", ] } } task dealwithsnvindel{ String codes_dir String project String outputDir String tumor Array[String] anno String cancer command <<< perl ${codes_dir}/pick_variant.pl ${outputDir} ${tumor} perl ${codes_dir}/pick_mut_splice_promoter.pl ${codes_dir} ${tumor} ${outputDir} ${project} perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt" } } task qc_2{ String ref String codes_dir String name String tumor String normal String bed String outputDir #for task chain Array[String] anno command <<< samtools flagstat -@ 10 ${outputDir}/alignment/${name}.rmdup.bam >${outputDir}/qc/${name}.rmdup.flagstat samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${name}.rmdup.bam > ${outputDir}/alignment/${name}.stat mkdir ${outputDir}/qc/${name}_bamdst bamdst -p ${bed} -o ${outputDir}/qc/${name}_bamdst ${outputDir}/alignment/${name}.rmdup.bam Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${name}_InsertAndDepthStat ${outputDir}/qc/${name}_bamdst/insertsize.plot ${outputDir}/qc/${name}_bamdst/depth_distribution.plot python3 ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} --n ${normal} python3 ${codes_dir}/qc_stat.py ${outputDir}/qc/${name}.json ${outputDir}/qc/${name}_bamdst/coverage.report ${outputDir}/qc/${name}_bamdst/depth_distribution.plot ${outputDir} ${name} ${tumor} >>> output{ String qc = "${outputDir}/qc/${name}_qc.txt" } } task fusion{ String ref String codes_dir String tumor String outputDir String cancer String project #for task chain Array[String] rmdupBam command<<< # Extract the discordant paired-end alignments. samtools view -b -F 1294 ${outputDir}/alignment/${tumor}.rmdup.bam > ${outputDir}/fusion/${tumor}.discordants.bam # Extract the split-read alignments samtools view -h ${outputDir}/alignment/${tumor}.rmdup.bam \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | samtools view -Sb - \ > ${outputDir}/fusion/${tumor}.splitters.bam lumpyexpress \ -B ${outputDir}/alignment/${tumor}.rmdup.bam \ -S ${outputDir}/fusion/${tumor}.splitters.bam \ -D ${outputDir}/fusion/${tumor}.discordants.bam \ -o ${outputDir}/fusion/${tumor}.fusion.vcf perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf svtyper \ -B ${outputDir}/alignment/${tumor}.rmdup.bam \ -i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \ -T ${ref} \ -o ${outputDir}/fusion/${tumor}.fusion.gt.vcf table_annovar.pl \ ${outputDir}/fusion/${tumor}.fusion.gt.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -operation g \ --outfile ${outputDir}/fusion/${tumor}.fusion perl ${codes_dir}/fusion.reanno.pl ${outputDir}/qc/${tumor}_bamdst/depth.tsv.gz ${outputDir} ${tumor} perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt" } } task chemoTherapy{ String codes_dir String normal String tumor String outputDir String ref String project Array[String] rmdupBam command <<< # perl ${codes_dir}/chemo/singlecancer_chemo_2.pl ${codes_dir} ${outputDir} ${normal} ${ref} ${project} ${codes_dir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} >>> } task hereditary{ String codes_dir String tumor String outputDir String project String snv command <<< ${codes_dir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${tumor} >>> } task conpair{ String name String tumor String normal String outputDir String codes_dir String ref #for task chain Array[String] rmdupBam command <<< python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${outputDir}/alignment/${normal}.rmdup.bam \ -O ${outputDir}/alignment/${normal}.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${outputDir}/alignment/${tumor}.rmdup.bam \ -O ${outputDir}/alignment/${tumor}.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar sed -i 's/^chr//g' ${outputDir}/alignment/${normal}.gatk.mpileup sed -i 's/^chr//g' ${outputDir}/alignment/${tumor}.gatk.mpileup python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \ -T ${outputDir}/alignment/${tumor}.gatk.mpileup \ -N ${outputDir}/alignment/${normal}.gatk.mpileup \ -H \ -O ${outputDir}/qc/${name}_concordance.txt python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \ -T ${outputDir}/alignment/${tumor}.gatk.mpileup \ -N ${outputDir}/alignment/${normal}.gatk.mpileup \ -O ${outputDir}/qc/${name}_contamination.txt >>> output{ Array[String] concordance = [ "${outputDir}/report/qc/${name}_concordance.txt", "${outputDir}/report/qc/${name}_contamination.txt", ] } } task MSI{ String bed String tumor String normal String outputDir Array[String] rmdupBam command <<< msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list -n ${outputDir}/alignment/${normal}.rmdup.bam -t ${outputDir}/alignment/${tumor}.rmdup.bam -e ${bed} -b 10 -o ${outputDir}/MSI/${tumor}.msi >>> output{ String target="${outputDir}/MSI/${tumor}.msi" } } task MMR{ String codes_dir String tumor String outputDir String snv command <<< perl ${codes_dir}/mmr_controlsample.pl ${outputDir} ${tumor} >>> output{ String mmr_out = "${outputDir}/MMR/${tumor}_mmr.txt" } } task HRR{ String codes_dir String tumor String outputDir String snv command <<< perl ${codes_dir}/hrr_controlsample_tissue.pl ${outputDir} ${tumor} >>> output{ String mmr_out = "${outputDir}/HRR/${tumor}_hrr.txt" } } task TMB{ String codes_dir String tumor String outputDir Array[String] anno Array[String] concordance String snv_result String cnv_result String fusion_result command <<< perl ${codes_dir}/tmb.pl ${outputDir} ${tumor} >>> output{ String tmb="${outputDir}/mutation/${tumor}.tmb.txt" } } task cnvkit{ String ref String bed String name String tumor String normal String outputDir String cancer String accessBed String annotateGene String codes_dir String project #for task chain Array[String] rmdupBam String purity command <<< echo run cnvkit batch to processing cnv calling cnvkit.py batch \ ${outputDir}/alignment/${tumor}.rmdup.bam \ --normal ${outputDir}/alignment/${normal}.rmdup.bam \ --targets ${bed} \ --fasta ${ref} \ --access ${accessBed} \ --output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \ --annotate ${annotateGene} \ --drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit cnvkit.py scatter \ ${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \ --y-max 3 --y-min -3 \ --title ${tumor}.cns \ -o ${outputDir}/cnvkit/${tumor}.cnv.png if [ -e "${outputDir}/qc/sequenza/${name}_confints_CP.txt" ]; then # absolute copy number cnvkit.py call \ -m clonal \ ${outputDir}/cnvkit/${tumor}.rmdup.cns \ -y \ --purity `head -n2 ${outputDir}/qc/sequenza/${name}_confints_CP.txt |tail -n1|cut -f1` \ --drop-low-coverage \ --filter ampdel \ -o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc fi perl ${codes_dir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String cnv = "${outputDir}/cnvkit/${tumor}.rmdup.cns" } } task tumor_content{ String name String ref String tumor String normal String outputDir String gc_wiggle String codes_dir #for task chain Array[String] pileup command <<< sequenza-utils bam2seqz -p -gc ${gc_wiggle} \ -F ${ref} \ -n ${outputDir}/alignment/${normal}.pileup \ -t ${outputDir}/alignment/${tumor}.pileup | gzip >${outputDir}/qc/target_${name}.200base.seqz.gz sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz | gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz Rscript ${codes_dir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!" # Rscript ${codes_dir}/qc_stat.R ${outputDir} ${name} # cp ${outputDir}/qc/${name}_qcstat.txt ${outputDir}/report/qc/${name}_qcstat.txt >>> output{ String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf" } } task HLA{ String inputDir String outputDir String normal String newdir command <<< razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*1.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_1.bam razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*2.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/ >>> output{ String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv" } } task neoantigen{ String codes_dir String outputDir String name String normal String tumor String hla Array[String] vcf command <<< sh /home/jm001/test_duantao/database_update/test_project/t20230818/predict_neoantigen.sh ${outputDir} ${name} ${tumor} ${codes_dir} >>> output{ String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${tumor}.all_epitopes.netchop.txt" } } task auto_report{ String cancer String codes_dir String outputDir String name String normal String tumor Array[String] qc_2 Array[String] concordance # String purity String snv_result String cnv_result String fusion_result String tmb command <<< perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} python3 ${codes_dir}/qc_check_tissue.py ${outputDir} ${tumor} ${normal} ${name} python3 ${codes_dir}/drug_dedup.py ${outputDir} ${tumor} perl ${codes_dir}/hpd.pl ${outputDir} ${tumor} python3 ${codes_dir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} python3 ${codes_dir}/wdl_check.py -o ${outputDir} perl ${codes_dir}/file_format_change.pl ${outputDir} ${tumor} # cp ${outputDir}/qc/sequenza/${name}_confints_CP.txt ${outputDir}/report/${name}_confints_CP.txt cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/qc_fail.txt cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/ cp ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn ${outputDir}/report/ cp ${outputDir}/cnvkit/${tumor}.cnv.png ${outputDir}/report/ cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/ cp ${outputDir}/fusion/${tumor}.fusion.reanno.vcf ${outputDir}/report/ # cp ${outputDir}/mutation/*filter* ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.snvindel.Germline.target.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.nontarget.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.promoter.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.tmb.txt ${outputDir}/report/ cp ${outputDir}/MMR/${tumor}.mmr.pre.txt ${outputDir}/report/ cp ${outputDir}/HRR/${tumor}.hrr.pre.txt ${outputDir}/report/ cp ${outputDir}/HPD/${tumor}.hpd.pre.txt ${outputDir}/report/ cp ${outputDir}/hereditary/${tumor}.hereditary.pre.txt ${outputDir}/report/ python3 ${codes_dir}/txt_xlsx.py ${outputDir} ${tumor} >>> } # hotspot task hotspot{ String tumor String outputDir String ref Array[String] rmdupBam String codes_dir command<<< mkdir -p ${outputDir}/mutation/hotspot samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${codes_dir}/hotspot.bed ${outputDir}/alignment/${tumor}.rmdup.bam -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.005 --min-avg-qual 20 --output-vcf 1 --variants 1 --p-value 0.99 --min-reads2 2 --strand-filter 0 > ${outputDir}/mutation/hotspot/${tumor}.hotspot.L.snp.indel.vcf java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.01 --min-avg-qual 20 --output-vcf 1 --variants 1 --p-value 0.05 --min-reads2 3 --strand-filter 1 > ${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor} if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then table_annovar.pl \ ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -argument '-hgvs' \ -operation g \ --outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno # java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ # -R ${ref} \ # -I ${outputDir}/alignment/${tumor}.rmdup.bam \ # -V ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \ # -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf \ # --annotation TandemRepeatAnnotator # grep -v "^##" ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt - >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.Somatic.annoall.hg19_multianno.txt # cp ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt ${outputDir}/report/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt perl ${codes_dir}/hotspot.filter.pl ${outputDir} ${tumor} fi >>> output{ String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf" } }