workflow pancancer_singlesample_umi{ String normal String tumor String cancer String project = "624gene" String raw_bed = "/dataseq/jmdna/database/bed/624.merge.bed" String bed = "/dataseq/jmdna/database/bed/624plus20bp_merge.bed" String outputDir String inputDir String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa" String codes_dir = "/dataseq/jmdna/codes/624" String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed" String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt" String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" #创建目录 call create_dir{ input: outputDir=outputDir } scatter (read in ["1","2"]){ call changehead{ input: newdir=create_dir.newdir, tumor=tumor, inputDir=inputDir, outputDir=outputDir, read=read } } call qc{ input: data=changehead.outputFile, tumor=tumor, inputDir=inputDir, outputDir=outputDir, codes_dir=codes_dir } call qc_normal{ input: data=changehead.outputFile, normal=normal, inputDir=inputDir, outputDir=outputDir } scatter (name in [normal,tumor]){ call alignment_bwa{ input: name=name, outputDir=outputDir, ref=ref, qc_T=qc.outputFile, qc_N=qc_normal.outputFile } call rmdup{ input: name=name, outputDir=outputDir, ref=ref, bam=alignment_bwa.sorted } call generater_mpileup{ input: rmdup=rmdup.rmdup, name=name, ref=ref, bed=bed, outputDir=outputDir } } call mutation_calling{ input: tumor=tumor, outputDir=outputDir, ref=ref, bed=bed, normal=normal, codes_dir=codes_dir, rmdup=rmdup.rmdup[1] } call mutation_calling_normal{ input: normal=normal, outputDir=outputDir, ref=ref, bed=bed, rmdup=rmdup.rmdup[0] } call hotspot{ input: rmdupBam=rmdup.rmdup[1], tumor=tumor, outputDir=outputDir, ref=ref, codes_dir=codes_dir } call annovar{ input: codes_dir=codes_dir, tumor=tumor, normal=normal, outputDir=outputDir, tvcf=mutation_calling.vcf, nvcf=mutation_calling_normal.vcf } scatter (name in [normal,tumor]){ call qc_2{ input: ref=ref, bed=raw_bed, name=name, tumor=tumor, outputDir=outputDir, codes_dir=codes_dir, anno=annovar.anno } } call fusion{ input: ref=ref, codes_dir=codes_dir, tumor=tumor, cancer=cancer, outputDir=outputDir, Bam=alignment_bwa.sorted[1], project=project } call TMB{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, anno=annovar.anno } call conpair{ input: tumor=tumor, normal=normal, ref=ref, outputDir=outputDir, rmdupBam=rmdup.rmdup } call chemoTherapy{ input: codes_dir=codes_dir, normal=normal, outputDir=outputDir, ref=ref, project=project, rmdupBam=rmdup.rmdup[0] } call tumor_content{ input: ref=ref, tumor=tumor, normal=normal, outputDir=outputDir, gc_wiggle=gc_wiggle, codes_dir=codes_dir, pileup=generater_mpileup.pileup } call cnvkit{ input: tumor=tumor, cancer=cancer, outputDir=outputDir, rmdupBam=rmdup.rmdup[1], codes_dir=codes_dir, project=project, purity=tumor_content.purity } call dealwithsnvindel{ input: codes_dir=codes_dir, project=project, outputDir=outputDir, tumor=tumor, cancer=cancer, anno=annovar.anno } call MMR{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, snv=dealwithsnvindel.snv } call HRR{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, snv=dealwithsnvindel.snv } call HLA{ input: inputDir=inputDir, outputDir=outputDir, normal=normal, newdir=create_dir.newdir } call neoantigen{ input: codes_dir=codes_dir, outputDir=outputDir, normal=normal, tumor=tumor, hla=HLA.hla, vcf=mutation_calling.vcf } call MSI{ input: bed=bed, codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, rmdupBam=rmdup.rmdup[1] } call hereditary{ input: codes_dir=codes_dir, tumor=tumor, outputDir=outputDir, project=project, snv=dealwithsnvindel.snv } call auto_report{ input: outputDir=outputDir, tumor=tumor, normal=normal, cancer=cancer, codes_dir=codes_dir, qc_2=qc_2.qc, concordance=conpair.concordance, snv_result=dealwithsnvindel.snv, cnv_result=cnvkit.cnv, fusion_result=fusion.fusion, tmb=TMB.tmb } } #create project directory task create_dir{ String outputDir command <<< #创建目录 if [ ! -d ${outputDir} ];then mkdir ${outputDir} fi #创建qc目录 if [ ! -d ${outputDir}/qc ];then mkdir ${outputDir}/qc fi #创建alignment目录 if [ ! -d ${outputDir}/alignment ];then mkdir ${outputDir}/alignment fi #创建mutation目录 if [ ! -d ${outputDir}/mutation ];then mkdir ${outputDir}/mutation fi #创建cnv目录 if [ ! -d ${outputDir}/cnvkit ];then mkdir ${outputDir}/cnvkit fi #创建chemo目录 if [ ! -d ${outputDir}/chemo ];then mkdir ${outputDir}/chemo fi #创建fusion目录 if [ ! -d ${outputDir}/fusion ];then mkdir ${outputDir}/fusion fi #创建MSI目录 if [ ! -d ${outputDir}/MSI ];then mkdir ${outputDir}/MSI fi #创建MMR目录 if [ ! -d ${outputDir}/MMR ];then mkdir ${outputDir}/MMR fi #创建HPD目录 if [ ! -d ${outputDir}/HPD ];then mkdir ${outputDir}/HPD fi ##创建HRR目录 if [ ! -d ${outputDir}/HRR ];then mkdir ${outputDir}/HRR fi #创建neoantigen目录 if [ ! -d ${outputDir}/neoantigen/HLA ];then mkdir -p ${outputDir}/neoantigen/HLA fi #创建hereditary目录 if [ ! -d ${outputDir}/hereditary ];then mkdir ${outputDir}/hereditary fi #创建report目录 if [ ! -d ${outputDir}/report ];then mkdir -p ${outputDir}/report fi >>> output{ String newdir = "${outputDir}/report" } } #generator raw fastq to clean fastq task changehead{ String newdir String tumor String inputDir String outputDir String read command <<< seqkit replace -p "/${read}" -r " ${read}" -j 10 ${inputDir}/*_${tumor}_*${read}.fq.gz -o ${outputDir}/qc/${tumor}_changehead_R${read}.fq >>> output{ String outputFile = "${outputDir}/qc/${tumor}_changehead_R${read}.fq" } } task qc_normal{ Array[String] data String normal String inputDir String outputDir command <<< echo processing raw reads with fastp fastp -i ${inputDir}/*_${normal}_*1.fq.gz -o ${outputDir}/qc/${normal}_clean_R1.fq.gz \ -I ${inputDir}/*_${normal}_*2.fq.gz -O ${outputDir}/qc/${normal}_clean_R2.fq.gz \ -w 10 \ --correction \ --overlap_len_require 20 \ --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \ --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \ -j ${outputDir}/qc/${normal}.json \ -h ${outputDir}/qc/${normal}.html --report_title ${normal} -e 20 >>> output{ Array[String] outputFile = [ "${outputDir}/qc/${normal}_clean_R1.fq.gz", "${outputDir}/qc/${normal}_clean_R2.fq.gz", "${outputDir}/qc/${normal}.json", "${outputDir}/qc/${normal}.html" ] } } task qc{ String tumor String inputDir String outputDir String codes_dir Array[String] data command <<< echo processing raw reads with fastp fastp -i ${outputDir}/qc/${tumor}_changehead_R1.fq -o ${outputDir}/qc/${tumor}_clean_R1.fq.gz \ -I ${outputDir}/qc/${tumor}_changehead_R2.fq -O ${outputDir}/qc/${tumor}_clean_R2.fq.gz \ -w 10 \ -U --umi_loc=per_read --umi_len=4 --umi_prefix=UMI --umi_skip=3 \ --disable_trim_poly_g \ --disable_quality_filtering \ --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \ --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \ --correction \ --overlap_len_require 10 \ -j ${outputDir}/qc/${tumor}.json \ -h ${outputDir}/qc/${tumor}.html --report_title ${tumor} # UMI python3 ${codes_dir}/UMI_Project.py -i ${inputDir} -o ${outputDir} -t ${tumor} rm ${outputDir}/qc/${tumor}_changehead_R1.fq ${outputDir}/qc/${tumor}_changehead_R2.fq >>> output{ Array[String] outputFile = [ "${outputDir}/qc/${tumor}_clean_R1.fq.gz", "${outputDir}/qc/${tumor}_clean_R2.fq.gz", "${outputDir}/qc/${tumor}.json", "${outputDir}/qc/${tumor}.html" ] } } #alignment clean fastq to reference task alignment_bwa{ String name String ref String outputDir Array[String] qc_T Array[String] qc_N command<<< bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} \ ${outputDir}/qc/${name}_clean_R1.fq.gz ${outputDir}/qc/${name}_clean_R2.fq.gz | \ samtools view -@ 5 -bh -o - | samtools sort -@ 5 -o ${outputDir}/alignment/${name}.sorted.bam samtools index ${outputDir}/alignment/${name}.sorted.bam >>> output{ String sorted = "${outputDir}/alignment/${name}.sorted.bam" } } ##rmdup task rmdup{ String name String outputDir String ref String bam command<<< gencore -i ${outputDir}/alignment/${name}.sorted.bam -o ${outputDir}/alignment/${name}.rmdup.bam \ -r ${ref} -j ${outputDir}/qc/${name}_rmdup.json -h ${outputDir}/qc/${name}_rmdup.html samtools index ${outputDir}/alignment/${name}.rmdup.bam >>> output{ String rmdup = "${outputDir}/alignment/${name}.rmdup.bam" } } # generater mpileup file task generater_mpileup{ String rmdup String name String ref String bed String outputDir command<<< samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \ ${outputDir}/alignment/${name}.rmdup.bam -o ${outputDir}/alignment/${name}.pileup >>> output{ String pileup = "${outputDir}/alignment/${name}.pileup" } } task mutation_calling{ String tumor String rmdup String outputDir String ref String bed String codes_dir String normal command<<< ##1条call java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.002 -N ${tumor} -b ${outputDir}/alignment/${tumor}.rmdup.bam \ -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.002 >${outputDir}/mutation/${tumor}.1r.snp.indel.vcf ##提取2条矫正的序列和NM<4的序列 python3 ${codes_dir}/fetch_bam.py ${outputDir} ${tumor} ##2条矫正的call java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.0001 -N ${tumor}_2r -b ${outputDir}/alignment/${tumor}.2r.rmdup.bam \ -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.001 >${outputDir}/mutation/${tumor}.2r.snp.indel.vcf ##merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 perl ${codes_dir}/1r_plus_2r.pl ${outputDir} ${tumor} ##去除control,control 5%以上的cfDNA都去除,1%-5%的,cfDNA/control<5 的都去除。 perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal} ##add msi and strandbias flag perl ${codes_dir}/add_flag.pl ${outputDir} ${tumor} ${normal} ##add malt flag grep -v ^# ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - \ >${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed samtools mpileup -aBq 20 -Q 20 -f ${ref} -l ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed \ ${outputDir}/alignment/${tumor}.2r.rmdup.bam -o ${outputDir}/alignment/${tumor}_substract_${normal}.snp.indel.pileup python ${codes_dir}/add_malt.py ${outputDir} ${tumor} ${normal} ##filter flag perl ${codes_dir}/filter_flag.pl ${outputDir} ${tumor} ${normal} >>> output{ String vcf = "${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf" } } task mutation_calling_normal{ String normal String rmdup String outputDir String ref String bed command<<< java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.01 -N ${normal} -b ${outputDir}/alignment/${normal}.rmdup.bam \ -Q 20 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${normal} -E -f 0.01 >${outputDir}/mutation/${normal}.snp.indel.vcf >>> output{ String vcf = "${outputDir}/mutation/${normal}.snp.indel.vcf" } } # hotspot task hotspot{ String tumor String outputDir String ref String rmdupBam String codes_dir command<<< mkdir -p ${outputDir}/mutation/hotspot samtools mpileup -Bq 20 -Q 20 -f ${ref} -l /dataseq/jmdna/codes/public/hotspot.bed ${rmdupBam} -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.001 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 2 --strand-filter 0 >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf # java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.002 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 3 --strand-filter 1 >${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf # perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor} # if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then table_annovar.pl \ ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -argument '-hgvs' \ -operation g \ --outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno # fi perl ${codes_dir}/hotspot.filter.pl ${outputDir} ${tumor} >>> output{ String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.filter.anno.hg19_multianno.txt" } } task annovar{ String codes_dir String tumor String normal String outputDir String tvcf String nvcf command<<< perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal} table_annovar.pl \ ${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall table_annovar.pl \ ${outputDir}/mutation/${normal}.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${outputDir}/mutation/${tumor}.snp.indel.Germline.anno >>> output{ Array[String] anno = [ "${outputDir}/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno.txt", "${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt", ] } } task qc_2{ String ref String bed String name String tumor String outputDir String codes_dir #for task chain Array[String] anno command <<< samtools flagstat -@ 10 ${outputDir}/alignment/${name}.rmdup.bam >${outputDir}/qc/${name}.rmdup.flagstat samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${name}.rmdup.bam > ${outputDir}/alignment/${name}.stat mkdir ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/qc/${name}_rmdup_bamdst bamdst -p ${bed} -o ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/alignment/${name}.sorted.bam bamdst -p ${bed} -o ${outputDir}/qc/${name}_rmdup_bamdst ${outputDir}/alignment/${name}.rmdup.bam Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${name}_InsertAndDepthStat ${outputDir}/qc/${name}_rmdup_bamdst/insertsize.plot ${outputDir}/qc/${name}_rmdup_bamdst/depth_distribution.plot python3 ${codes_dir}/qc_stat_umi.py ${outputDir} ${name} ${tumor} >>> output{ String qc = "${outputDir}/qc/${name}_qc.txt" } } task fusion{ String ref String codes_dir String tumor String cancer String outputDir #for task chain String Bam String project command<<< java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \ I=${outputDir}/alignment/${tumor}.sorted.bam \ O=${outputDir}/alignment/${tumor}.picard.rmdup.bam \ CREATE_INDEX=true \ M=${outputDir}/alignment/${tumor}.picard.rmdup.metrics.txt \ R=${ref} # Extract the discordant paired-end alignments. samtools view -b -F 1294 ${outputDir}/alignment/${tumor}.picard.rmdup.bam > ${outputDir}/fusion/${tumor}.discordants.bam # Extract the split-read alignments samtools view -h ${outputDir}/alignment/${tumor}.picard.rmdup.bam \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | samtools view -Sb - \ > ${outputDir}/fusion/${tumor}.splitters.bam lumpyexpress \ -B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \ -S ${outputDir}/fusion/${tumor}.splitters.bam \ -D ${outputDir}/fusion/${tumor}.discordants.bam \ -o ${outputDir}/fusion/${tumor}.fusion.vcf perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf svtyper \ -B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \ -i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \ -T ${ref} \ -o ${outputDir}/fusion/${tumor}.fusion.gt.vcf table_annovar.pl \ ${outputDir}/fusion/${tumor}.fusion.gt.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -operation g \ --outfile ${outputDir}/fusion/${tumor}.fusion perl ${codes_dir}/fusion.reanno.624.pl ${outputDir}/qc/${tumor}_sorted_bamdst/depth.tsv.gz ${outputDir} ${tumor} perl ${codes_dir}/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt" } } task TMB{ String codes_dir String tumor String outputDir Array[String] anno command <<< perl ${codes_dir}/tmb_umi.pl ${outputDir} ${tumor} python3 ${codes_dir}/correct_f1r1_tmb.py ${outputDir} ${tumor} >>> output{ String tmb="${outputDir}/mutation/${tumor}.tmb.txt" } } task conpair{ String tumor String normal String outputDir String ref #for task chain Array[String] rmdupBam command <<< python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${outputDir}/alignment/${normal}.rmdup.bam \ -O ${outputDir}/alignment/${normal}.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${outputDir}/alignment/${tumor}.rmdup.bam \ -O ${outputDir}/alignment/${tumor}.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar sed -i 's/^chr//g' ${outputDir}/alignment/${normal}.gatk.mpileup sed -i 's/^chr//g' ${outputDir}/alignment/${tumor}.gatk.mpileup python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \ -T ${outputDir}/alignment/${tumor}.gatk.mpileup \ -N ${outputDir}/alignment/${normal}.gatk.mpileup \ -H \ -O ${outputDir}/qc/${tumor}_concordance.txt python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \ -T ${outputDir}/alignment/${tumor}.gatk.mpileup \ -N ${outputDir}/alignment/${normal}.gatk.mpileup \ -O ${outputDir}/qc/${tumor}_contamination.txt >>> output{ Array[String] concordance = [ "${outputDir}/report/qc/${tumor}_concordance.txt", "${outputDir}/report/qc/${tumor}_contamination.txt", ] } } task chemoTherapy{ String codes_dir String normal String outputDir String ref String project String rmdupBam command <<< ${codes_dir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} --r ${ref} >>> } task tumor_content{ String ref String tumor String normal String outputDir String gc_wiggle String codes_dir #for task chain Array[String] pileup command <<< sequenza-utils bam2seqz -p -gc ${gc_wiggle} \ -F ${ref} \ -n ${outputDir}/alignment/${normal}.pileup \ -t ${outputDir}/alignment/${tumor}.pileup | gzip >${outputDir}/qc/target_${tumor}.200base.seqz.gz sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${tumor}.200base.seqz.gz | gzip > ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz Rscript ${codes_dir}/sequenza.R ${tumor} ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!" >>> output{ String purity = "${outputDir}/qc/sequenza/${tumor}_CP_contours.pdf" } } task cnvkit{ String tumor String outputDir String cancer String codes_dir String project #for task chain String rmdupBam String purity command <<< echo run cnvkit batch to processing cnv calling cnvkit.py batch \ ${rmdupBam} \ -r ${codes_dir}/cnvkit/624gene_pool_normal_reference.cnn \ --drop-low-coverage --scatter --diagram --output-dir ${outputDir}/cnvkit cnvkit.py scatter \ ${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \ --y-max 3 --y-min -3 \ --title ${tumor}.cns \ -o ${outputDir}/cnvkit/${tumor}.cnv.png if [ -e "${outputDir}/qc/sequenza/${tumor}_confints_CP.txt" ]; then # absolute copy number cnvkit.py call \ -m clonal \ ${outputDir}/cnvkit/${tumor}.rmdup.cns \ -y \ --purity `head -n2 ${outputDir}/qc/sequenza/${tumor}_confints_CP.txt |tail -n1|cut -f1` \ --drop-low-coverage \ --filter ampdel \ -o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc fi perl ${codes_dir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn perl ${codes_dir}/cnv_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String cnv = "${outputDir}/cnvkit/${tumor}.rmdup.cns" } } task dealwithsnvindel{ String codes_dir String project String outputDir String tumor Array[String] anno String cancer command <<< perl ${codes_dir}/pick_variant_control_umi.pl ${outputDir} ${tumor} python3 ${codes_dir}/correct_f1r1.py ${outputDir} ${tumor} perl ${codes_dir}/pick_mut_splice_promoter_control.pl ${codes_dir} ${tumor} ${outputDir} ${project} perl ${codes_dir}/germline_targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer} perl ${codes_dir}/targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer} >>> output{ String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt" } } task MMR{ String codes_dir String tumor String outputDir String snv command <<< perl ${codes_dir}/mmr_controlsample.pl ${outputDir} ${tumor} >>> output{ String mmr_out = "${outputDir}/MMR/${tumor}_mmr.txt" } } task HRR{ String codes_dir String tumor String outputDir String snv command <<< perl ${codes_dir}/hrr_controlsample_cfDNA.pl ${outputDir} ${tumor} >>> output{ String mmr_out = "${outputDir}/HRR/${tumor}_hrr.txt" } } task HLA{ String inputDir String outputDir String normal String newdir command <<< razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*1.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_1.bam razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*2.fq.gz samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq rm ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/ >>> output{ String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv" } } task neoantigen{ String codes_dir String outputDir String normal String tumor String hla String vcf command <<< sh ${codes_dir}/predict_neoantigen.sh ${outputDir} ${normal} ${tumor} ${codes_dir} >>> output{ String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${tumor}.all_epitopes.netchop.txt" } } task MSI{ String bed String codes_dir String tumor String outputDir String rmdupBam command <<< ##msings echo ${outputDir}/alignment/${tumor}.rmdup.bam >${outputDir}/MSI/bam.list ${codes_dir}/MSI/run_msings.sh ${outputDir}/MSI/bam.list mv ${outputDir}/alignment/${tumor}.rmdup ${outputDir}/MSI/ mv ${outputDir}/alignment/msi_run_log.txt ${outputDir}/MSI/ mv ${outputDir}/alignment/Combined_MSI.txt ${outputDir}/MSI/ ##msisensor2 msisensor2 msi -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 -t ${rmdupBam} -e ${bed} -b 10 -o ${outputDir}/MSI/${tumor}.msi >>> output{ String target="${outputDir}/MSI/${tumor}.msi" } } task hereditary{ String codes_dir String tumor String outputDir String project String snv command <<< python3 /dataseq/jmdna/codes/624/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${tumor} >>> } task auto_report{ String tumor String normal String cancer String outputDir String codes_dir Array[String] qc_2 Array[String] concordance String snv_result String cnv_result String fusion_result String tmb command <<< perl ${codes_dir}/hpd_control_umi.pl ${outputDir} ${tumor} python3 ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} --n ${normal} perl ${codes_dir}/indication.pl ${outputDir} ${cancer} python3 ${codes_dir}/qc_check_control_cfdna.py ${outputDir} ${tumor} ${normal} python3 ${codes_dir}/drug_dedup.py ${outputDir} ${tumor} python3 ${codes_dir}/report_template/624gene_cfdna_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} python3 ${codes_dir}/wdl_check.py -o ${outputDir} perl ${codes_dir}/file_format_change.pl ${outputDir} ${tumor} cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/qc_fail.txt cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/ cp ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn ${outputDir}/report/ cp ${outputDir}/cnvkit/${tumor}.cnv.png ${outputDir}/report/ cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/ cp ${outputDir}/fusion/${tumor}.fusion.reanno.vcf ${outputDir}/report/ # cp ${outputDir}/mutation/*filter* ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.snvindel.Germline.target.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.nontarget.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.target.promoter.txt ${outputDir}/report/ cp ${outputDir}/mutation/${tumor}.tmb.txt ${outputDir}/report/ cp ${outputDir}/MMR/${tumor}.mmr.pre.txt ${outputDir}/report/ cp ${outputDir}/HRR/${tumor}.hrr.pre.txt ${outputDir}/report/ cp ${outputDir}/HPD/${tumor}.hpd.pre.txt ${outputDir}/report/ cp ${outputDir}/hereditary/${tumor}.hereditary.pre.txt ${outputDir}/report/ python3 ${codes_dir}/txt_xlsx.py ${outputDir} ${tumor} >>> }