workflow singlecancer_singlesample_umi{ String project = "肺癌17基因" String bed = "/dataseq/jmdna/database/bed/lung17gene.hg19.liftover.bed" String tumor String inputDir String outputDir String need_fusion_test = "yes" String need_cnv_test = "yes" String need_chemotherapy = "no" String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa" String codes_dir = "/dataseq/jmdna/codes/singlecancer_singlesample" String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed" String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt" String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" #创建目录 call create_dir{ input: outputDir=outputDir, need_fusion_test=need_fusion_test, need_cnv_test=need_cnv_test, need_chemotherapy=need_chemotherapy } call qc{ input: tumor=tumor, inputDir=inputDir, outputDir=outputDir } call alignment_bwa{ input: tumor=tumor, outputDir=outputDir, ref=ref, read1=qc.outputFile[0], read2=qc.outputFile[1] } call consensusreads{ input: tumor=tumor, outputDir=outputDir, ref=ref, bam=alignment_bwa.bam } call generater_mpileup{ input: tumor=tumor, ref=ref, bed=bed, outputDir=outputDir, rmdupBam=consensusreads.outputFile[1] } call qc_2{ input: ref=ref, bed=bed, tumor=tumor, outputDir=outputDir, codes_dir=codes_dir, rmdupBam=consensusreads.outputFile[1] } call mutation_calling{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, pileup=generater_mpileup.pileup } call annovar{ input: tumor=tumor, outputDir=outputDir, ref=ref, vcf=mutation_calling.vcf, rmdupBam=consensusreads.outputFile[1] } if (need_fusion_test=="yes"){ call fusion{ input: ref=ref, codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, rmdupBam=consensusreads.outputFile[1], project=project } } # call chemoTherapy{ # input: # codes_dir=codes_dir, # normal=normal, # outputDir=outputDir, # ref=ref, # project=project, # rmdupBam=consensusreads.outputFile[1] # } if (need_cnv_test=="yes"){ call cnvkit{ input: tumor=tumor, ref=ref, bed=bed, outputDir=outputDir, rmdupBam=consensusreads.outputFile[1], accessBed=accessBed, codes_dir=codes_dir, annotateGene=annotateGene, project=project } } call dealwithsnvindel{ input: codes_dir=codes_dir, project=project, outputDir=outputDir, tumor=tumor, anno=annovar.anno } call auto_report{ input: codes_dir=codes_dir, outputDir=outputDir, tumor=tumor, snv_result=dealwithsnvindel.snv, cnv_result=cnvkit.cnv, fusion_result=fusion.fusion } call hotspot{ input: tumor=tumor, outputDir=outputDir, ref=ref, rmdupBam=consensusreads.outputFile[1], codes_dir=codes_dir } } #create project directory task create_dir{ String outputDir String need_fusion_test String need_cnv_test String need_chemotherapy command <<< #创建目录 if [ ! -d ${outputDir} ];then mkdir ${outputDir} fi #创建qc目录 if [ ! -d ${outputDir}/qc ];then mkdir ${outputDir}/qc fi #创建alignment目录 if [ ! -d ${outputDir}/alignment ];then mkdir ${outputDir}/alignment fi #创建mutation目录 if [ ! -d ${outputDir}/mutation ];then mkdir ${outputDir}/mutation fi #创建cnv目录 if [ ${need_cnv_test} = "yes" ] && [ ! -d ${outputDir}/cnvkit ];then mkdir ${outputDir}/cnvkit fi #创建report目录 if [ ! -d ${outputDir}/report ];then mkdir -p ${outputDir}/report fi #创建chemo目录 if [ ${need_chemotherapy} = "yes" ] && [ ! -d ${outputDir}/chemo ];then mkdir ${outputDir}/chemo fi #创建fusion目录 if [ ${need_fusion_test} = "yes" ] && [ ! -d ${outputDir}/fusion ];then mkdir ${outputDir}/fusion fi >>> output{ String newdir = "${outputDir}/report/qc" } } #generator raw fastq to clean fastq task qc{ String tumor String inputDir String outputDir command <<< echo processing raw reads with fastp fastp -i ${inputDir}/*_${tumor}_*1.fq.gz -o ${outputDir}/qc/${tumor}_clean_R1.fq.gz \ -I ${inputDir}/*_${tumor}_*2.fq.gz -O ${outputDir}/qc/${tumor}_clean_R2.fq.gz \ -w 10 \ --disable_trim_poly_g \ --disable_quality_filtering \ --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \ --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \ -j ${outputDir}/qc/${tumor}.json \ -h ${outputDir}/qc/${tumor}.html --report_title ${tumor} >>> output{ Array[String] outputFile = [ "${outputDir}/qc/${tumor}_clean_R1.fq.gz", "${outputDir}/qc/${tumor}_clean_R2.fq.gz", "${outputDir}/qc/${tumor}.json", "${outputDir}/qc/${tumor}.html" ] } } #alignment clean fastq to reference task alignment_bwa{ String tumor String ref String outputDir String read1 String read2 command<<< #fastqtosam java -Xmx8G -jar $PICARD FastqToSam \ FASTQ=${read1} \ FASTQ2=${read2} \ OUTPUT=${outputDir}/alignment/unmapped.bam \ SAMPLE_name=${tumor} #ExtractUmisFromBam java -jar /dataseq/jmdna/software/fgbio/target/scala-2.13/fgbio-1.4.0-468a843-SNAPSHOT.jar ExtractUmisFromBam \ --input=${outputDir}/alignment/unmapped.bam \ --output=${outputDir}/alignment/unmapped.withUMI.bam \ --read-structure=1S3M3S+T 1S3M3S+T \ --single-tag=RX \ --molecular-index-tags=ZA ZB #align reads java -Xmx4G -jar $PICARD SamToFastq \ I=${outputDir}/alignment/unmapped.withUMI.bam \ F=/dev/stdout \ INTERLEAVE=true \ | bwa mem -p -t 10 ${ref} /dev/stdin \ | java -Xmx4G -jar $PICARD MergeBamAlignment \ UNMAPPED=${outputDir}/alignment/unmapped.withUMI.bam \ ALIGNED=/dev/stdin \ O=${outputDir}/alignment/${tumor}.mapped.bam \ R=${ref} \ SO=coordinate \ ALIGNER_PROPER_PAIR_FLAGS=True \ MAX_GAPS=-1 \ ORIENTATIONS=FR \ VALIDATION_STRINGENCY=SILENT \ CREATE_INDEX=True rm ${outputDir}/alignment/unmapped.bam ${outputDir}/alignment/unmapped.withUMI.bam >>> output{ String bam = "${outputDir}/alignment/${tumor}.mapped.bam" } } #group by umi and call consensusreads task consensusreads{ String tumor String ref String outputDir String bam command<<< #GroupReadsByUmi java -Xmx4g -jar /dataseq/jmdna/software/fgbio/target/scala-2.13/fgbio-1.4.0-468a843-SNAPSHOT.jar GroupReadsByUmi \ --input=${bam} \ --output=${outputDir}/alignment/${tumor}.grouped.bam \ --strategy=paired \ --edits=1 \ --min-map-q=20 \ --allow-inter-contig=true #generate consensus reads java -Xmx4g -jar /dataseq/jmdna/software/fgbio/target/scala-2.13/fgbio-1.4.0-468a843-SNAPSHOT.jar CallDuplexConsensusReads \ --input=${outputDir}/alignment/${tumor}.grouped.bam \ --output=${outputDir}/alignment/${tumor}.consensus.unmapped.bam \ --error-rate-pre-umi=45 \ --error-rate-post-umi=30 \ --min-input-base-quality=20 \ --min-reads=1 0 0 \ --threads 20 #remap consensusreads java -Xmx4G -jar $PICARD SamToFastq \ I=${outputDir}/alignment/${tumor}.consensus.unmapped.bam \ F=/dev/stdout \ INTERLEAVE=true \ | bwa mem -p -t 10 ${ref} /dev/stdin \ | java -Xmx4G -jar $PICARD MergeBamAlignment \ UNMAPPED=${outputDir}/alignment/${tumor}.consensus.unmapped.bam \ ALIGNED=/dev/stdin \ O=${outputDir}/alignment/${tumor}.consensus.mapped.bam \ R=${ref} \ SO=coordinate \ ALIGNER_PROPER_PAIR_FLAGS=True \ MAX_GAPS=-1 \ ORIENTATIONS=FR \ VALIDATION_STRINGENCY=SILENT \ CREATE_INDEX=True rm ${outputDir}/alignment/${tumor}.consensus.unmapped.bam >>> output{ Array[String] outputFile = [ "${outputDir}/alignment/${tumor}.grouped.bam", "${outputDir}/alignment/${tumor}.consensus.mapped.bam" ] } } # generater mpileup file task generater_mpileup{ String tumor String ref String bed String outputDir String rmdupBam command<<< samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \ ${rmdupBam} -o ${outputDir}/alignment/${tumor}.pileup >>> output{ String pileup = "${outputDir}/alignment/${tumor}.pileup" } } task mutation_calling{ String codes_dir String tumor String pileup String outputDir command<<< java -jar $VARSCAN mpileup2cns ${pileup} \ --min-var-freq 0.002 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 3 --strand-filter 1 >${outputDir}/mutation/${tumor}.snp.indel.vcf >>> output{ String vcf = "${outputDir}/mutation/${tumor}.snp.indel.vcf" } } # hotspot task hotspot{ String tumor String outputDir String ref String rmdupBam String codes_dir command<<< mkdir -p ${outputDir}/mutation/hotspot samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${codes_dir}/hotspot.bed ${rmdupBam} -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.001 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 2 --strand-filter 0 >${outputDir}/mutation/hotspot/${tumor}.hotspot.L.snp.indel.vcf java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.002 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 3 --strand-filter 1 >${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor} if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then table_annovar.pl \ ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ -operation g,f,f,f,f,f,f,f,f,f \ --outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno # java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ # -R ${ref} \ # -I ${outputDir}/alignment/${tumor}.rmdup.bam \ # -V ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \ # -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf \ # --annotation TandemRepeatAnnotator # grep -v "^##" ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt - >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.Somatic.annoall.hg19_multianno.txt cp ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt ${outputDir}/report/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt fi >>> output{ String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf" } } task annovar{ String tumor String outputDir String ref String vcf String rmdupBam command<<< table_annovar.pl \ ${outputDir}/mutation/${tumor}.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.annoall # java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ # -R ${ref} \ # -I ${rmdupBam} \ # -V ${vcf} \ # -o ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf \ # --annotation TandemRepeatAnnotator # grep -v "^##" ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/${tumor}.snp.indel.hg19_multianno.txt - >${outputDir}/mutation/${tumor}.snp.indel.annoall.hg19_multianno.txt >>> output{ String anno = "${outputDir}/mutation/${tumor}.snp.indel.annoall.hg19_multianno.txt" } } task qc_2{ String ref String bed String tumor String outputDir String codes_dir #for task chain String rmdupBam command <<< mkdir ${outputDir}/qc/group_bamdst ${outputDir}/qc/consensus_bamdst bamdst -p ${bed} -o ${outputDir}/qc/group_bamdst ${outputDir}/alignment/${tumor}.mapped.bam bamdst -p ${bed} -o ${outputDir}/qc/consensus_bamdst ${outputDir}/alignment/${tumor}.consensus.mapped.bam java -Xmx4g -jar /dataseq/jmdna/software/fgbio/target/scala-2.13/fgbio-1.4.0-468a843-SNAPSHOT.jar CollectDuplexSeqMetrics \ -i ${outputDir}/alignment/${tumor}.grouped.bam \ -o ${outputDir}/qc/${tumor}_umi_qc \ -d ${tumor} \ -u true samtools flagstat -@ 10 ${outputDir}/alignment/${tumor}.mapped.bam >${outputDir}/qc/${tumor}.flagstat samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${tumor}.consensus.mapped.bam > ${outputDir}/alignment/${tumor}.stat Rscript ${codes_dir}/qc_umi.r ${outputDir} ${tumor} Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${tumor}_InsertAndDepthStat ${outputDir}/qc/group_bamdst/insertsize.plot ${outputDir}/qc/group_bamdst/depth_distribution.plot # mv ${outputDir}/qc/${tumor}_qcstat.txt ${outputDir}/report/qc/${tumor}_qcstat.txt >>> } task fusion{ String ref String codes_dir String tumor String outputDir #for task chain String rmdupBam String project command<<< # Extract the discordant paired-end alignments. samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${tumor}.discordants.bam # Extract the split-read alignments samtools view -h ${rmdupBam} \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | samtools view -Sb - \ > ${outputDir}/fusion/${tumor}.splitters.bam lumpyexpress \ -B ${rmdupBam} \ -S ${outputDir}/fusion/${tumor}.splitters.bam \ -D ${outputDir}/fusion/${tumor}.discordants.bam \ -o ${outputDir}/fusion/${tumor}.fusion.vcf perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf svtyper \ -B ${rmdupBam} \ -i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \ -T ${ref} \ -o ${outputDir}/fusion/${tumor}.fusion.gt.vcf table_annovar.pl \ ${outputDir}/fusion/${tumor}.fusion.gt.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -operation g \ --outfile ${outputDir}/fusion/${tumor}.fusion perl ${codes_dir}/fusion.reanno.pl ${outputDir}/qc/consensus_bamdst/depth.tsv.gz ${outputDir} ${tumor} perl ${codes_dir}/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} >>> output{ String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt" } } task chemoTherapy{ String codes_dir String normal String outputDir String ref String project #for task chain Array[String] rmdupBam command <<< perl ${codes_dir}/chemo/singlecancer_chemo_2.pl ${codes_dir} ${outputDir} ${normal} ${ref} ${project} >>> } task cnvkit{ String ref String bed String tumor String outputDir String accessBed String annotateGene String codes_dir String project #for task chain String rmdupBam command <<< echo run cnvkit batch to processing cnv calling cnvkit.py batch \ ${rmdupBam} \ -r ${codes_dir}/cnvkit-lung17gene-cfDNA/lung17gene_pool_tumor_reference.cnn \ --drop-low-coverage --scatter --diagram --output-dir ${outputDir}/cnvkit perl ${codes_dir}/cnv_targetTherapy_umi.pl ${codes_dir} ${tumor} ${outputDir} ${project} >>> output{ String cnv = "${outputDir}/cnvkit/${tumor}.cnv.pos.txt" } } task dealwithsnvindel{ String codes_dir String project String outputDir String tumor String anno command <<< perl ${codes_dir}/pick_mut_splice_promoter.pl ${codes_dir} ${tumor} ${outputDir} ${project} perl ${codes_dir}/targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} >>> output{ String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt" } } task auto_report{ String codes_dir String outputDir String tumor String snv_result String cnv_result String fusion_result command <<< python ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} python ${codes_dir}/drug_dedup.py ${outputDir} ${tumor} python ${codes_dir}/report_template/lung_17gene_umi_report.py ${outputDir} ${tumor} python ${codes_dir}/qc_check.py ${outputDir} ${tumor} c python3 ${codes_dir}/wdl_check.py -o ${outputDir} cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/ cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/ cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/ # cp ${outputDir}/fusion/${tumor}.longindel.pos.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/ >>> output{ String report = "${outputDir}/report/${tumor}_report.docx" } }