From 188e7dd8a4fd06f461b75c6537c7e17d36c7eaa7 Mon Sep 17 00:00:00 2001 From: chaopower Date: Wed, 27 Sep 2023 10:47:03 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4call=5Fmutation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +- pipeline.wdl | 201 +++++---------------------------------- wdl/alignment.wdl | 122 ++++++++++++++++-------- wdl/call_mutation.wdl | 215 ++++++++++++++++++++++++++++++++++++++++++ wdl/qc.wdl | 118 +++++++++++++++++------ wdl/task.wdl | 69 +++++--------- 6 files changed, 439 insertions(+), 292 deletions(-) create mode 100644 wdl/call_mutation.wdl diff --git a/README.md b/README.md index da61ac9..d16519b 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,10 @@ ## run script example ```bash -/home/zhangchao/soft/jdk-17.0.7+7/bin/java -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \ --jar /home/zhangchao/soft/cromwell-85.jar run \ +export PATH=/dataseq/product/workflow/software/bin:$PATH + +java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \ +-jar /dataseq/product/workflow/software/cromwell-85.jar run \ --inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \ /home/zhangchao/project/pipeline/workflow/pipeline.wdl ``` diff --git a/pipeline.wdl b/pipeline.wdl index 89343c7..eed900a 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -1,21 +1,23 @@ import "./wdl/task.wdl" as mytask import "./wdl/qc.wdl" import "./wdl/alignment.wdl" -import "./wdl/statistics.wdl" +import "./wdl/call_mutation.wdl" workflow pipeline { String tumor String? normal - String inputDir - String outputDir - String cancer - String project="650gene" - String codesDir="/dataseq/jmdna/codes/pancancer_controlsample" - String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa" - String bed = "/dataseq/jmdna/database/bed/650.bed" + Boolean umi=false - String workdir="${outputDir}/${tumor}" + String input_dir + String output_dir + + String bed + + String codesDir="/home/zhangchao/project/pipeline/workflow/script" + String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa" + + String workdir="${output_dir}" call mytask.create_dir as create_dir { input: @@ -26,8 +28,9 @@ workflow pipeline { input: tumor=tumor, normal=normal, - inputDir=inputDir, - outputDir=workdir + umi=umi, + input_dir=input_dir, + output_dir=workdir } call alignment.alignment as alignment { @@ -41,183 +44,23 @@ workflow pipeline { normal_r1=qc.normal_r1, normal_r2=qc.normal_r2, + umi=umi, + ref=ref, bed=bed, - outputDir=workdir + output_dir=workdir } - call statistics.statistics as statistics { + call call_mutation.call_mutation as call_mutation { input: tumor=tumor, - tumor_rmdupBam=alignment.tumor_rmdupBam, - + tumor_rmdup_bam=alignment.tumor_rmdup_bam, normal=normal, - normal_rmdupBam=alignment.normal_rmdupBam, + normal_rmdup_bam=alignment.normal_rmdup_bam, + umi=umi, ref=ref, bed=bed, - outputDir=workdir, - codesDir=codesDir - } - - call mytask.conpair as conpair { - input: - codesDir=codesDir, - name=tumor, - tumor_rmdupBam=alignment.tumor_rmdupBam, - normal_rmdupBam=alignment.normal_rmdupBam, - outputDir=workdir, - ref=ref - } - - call mytask.mutation_calling as mutation_calling { - input: - name=tumor, - tumor_pileup=alignment.tumor_pileup, - normal_pileup=alignment.normal_pileup, - outputDir=workdir - } - - call mytask.annovar as annovar { - input: - name=tumor, - outputDir=workdir, - ref=ref, - somatic_hc_vcf=mutation_calling.somatic_hc_vcf, - germline_vcf=mutation_calling.germline_vcf, - loh_hc_vcf=mutation_calling.loh_hc_vcf, - rmdupBam=alignment.tumor_rmdupBam - } - - call mytask.dealwithsnvindel as dealwithsnvindel { - input: - codesDir=codesDir, - name=tumor, - somatic_all_anno=annovar.somatic_all_anno, - germline_anno=annovar.germline_anno, - project=project, - outputDir=workdir, - cancer=cancer - } - - call mytask.hereditary as hereditary { - input: - codesDir=codesDir, - name=tumor, - outputDir=workdir, - project=project, - germline_filtered = dealwithsnvindel.germline_filtered - } - - call mytask.tmb as tmb { - input: - codesDir=codesDir, - name=tumor, - outputDir=workdir, - somatic_anno=annovar.somatic_anno - } - - call mytask.fusion as fusion { - input: - name=tumor, - ref=ref, - codesDir=codesDir, - outputDir=workdir, - rmdupBam=alignment.tumor_rmdupBam, - cancer=cancer, - project=project, - tumor_bamdst_depth=statistics.tumor_bamdst_depth - } - - call mytask.cnvkit as cnvkit { - input: - tumor=tumor, - normal=normal, - - tumor_rmdupBam=alignment.tumor_rmdupBam, - normal_rmdupBam=alignment.normal_rmdupBam, - ref=ref, - bed=bed, - outputDir=workdir, - cancer=cancer, - codesDir=codesDir, - project=project, - } - - call mytask.chemo as chemo { - input: - codesDir=codesDir, - outputDir=workdir, - normal=normal, - project=project, - rmdupBam=alignment.tumor_rmdupBam, - } - - call mytask.msi as msi { - input: - bed=bed, - name=tumor, - outputDir=workdir, - tumor_rmdupBam = alignment.tumor_rmdupBam, - normal_rmdupBam =alignment.normal_rmdupBam - } - - call mytask.hla as hla { - input: - inputDir=inputDir, - outputDir=workdir, - normal=normal, - } - - call mytask.neoantigen as neoantigen { - input: - codesDir=codesDir, - outputDir=workdir, - name=tumor, - somatic_hc_vcf=mutation_calling.somatic_hc_vcf, - normal=normal, - hla=hla.hla - } - - call mytask.mmr as mmr { - input: - codesDir=codesDir, - name=tumor, - outputDir=workdir, - germline_filtered = dealwithsnvindel.germline_filtered - } - - call mytask.hrr as hrr { - input: - codesDir=codesDir, - name=tumor, - outputDir=workdir, - germline_filtered = dealwithsnvindel.germline_filtered - } - - call mytask.hotspot as hotspot { - input: - name=tumor, - outputDir=workdir, - ref=ref, - rmdupBam=alignment.tumor_rmdupBam, - codesDir=codesDir, - } - - call mytask.auto_report { - input: - tumor=tumor, - normal=normal, - outputDir=workdir, - codesDir=codesDir, - cancer=cancer, - cnv_cns=cnvkit.cns, - cnv_png=cnvkit.png, - fusion_pos=fusion.fusion, - snvindel_filtered=dealwithsnvindel.snvindel_filtered, - tmb=tmb.tmb, - mmr=mmr.mmr, - hrr=hrr.hrr, - hereditary_pre=hereditary.hereditary_pre + output_dir=workdir } } diff --git a/wdl/alignment.wdl b/wdl/alignment.wdl index 304a8ea..8868bed 100644 --- a/wdl/alignment.wdl +++ b/wdl/alignment.wdl @@ -4,48 +4,75 @@ task bwa { String name String read1 String read2 - String outputDir + String output_dir String ref command <<< - if [ ! -d ${outputDir}/alignment ];then - mkdir ${outputDir}/alignment + if [ ! -d ${output_dir}/alignment ];then + mkdir ${output_dir}/alignment fi bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \ - samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${outputDir}/alignment/${name}.sorted.bam - samtools index ${outputDir}/alignment/${name}.sorted.bam + samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${output_dir}/alignment/${name}.sorted.bam + samtools index ${output_dir}/alignment/${name}.sorted.bam >>> output { - String sortedBam = "${outputDir}/alignment/${name}.sorted.bam" + String sorted_bam = "${output_dir}/alignment/${name}.sorted.bam" } } #remove PCR duplicates -task markduplicates { +task markduplicates_genecore { String name String ref - String sortedBam - String outputDir + String sorted_bam + String output_dir command <<< - if [ ! -d ${outputDir}/alignment ];then - mkdir ${outputDir}/alignment + if [ ! -d ${output_dir}/alignment ];then + mkdir ${output_dir}/alignment fi + + gencore -i ${sorted_bam} \ + -o ${output_dir}/alignment/${name}.rmdup.bam \ + -r ${ref} \ + -u UMI \ + -j ${output_dir}/alignment/${name}_rmdup.json \ + -h ${output_dir}/alignment/${name}_rmdup.html + + samtools index ${output_dir}/alignment/${name}.rmdup.bam + >>> + + output { + String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam" + } +} + +task markduplicates_picard { + String name + String ref + String sorted_bam + String output_dir + + command <<< + if [ ! -d ${output_dir}/alignment ];then + mkdir ${output_dir}/alignment + fi + java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \ - I=${sortedBam} \ - O=${outputDir}/alignment/${name}.rmdup.bam \ + I=${sorted_bam} \ + O=${output_dir}/alignment/${name}.rmdup.bam \ CREATE_INDEX=true \ - M=${outputDir}/alignment/${name}.rmdup.metrics.txt \ + M=${output_dir}/alignment/${name}.rmdup.metrics.txt \ R=${ref} >>> output { - String rmdupBam = "${outputDir}/alignment/${name}.rmdup.bam" + String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam" } } @@ -56,16 +83,16 @@ task generater_mpileup { String rmdupBam String ref String bed - String outputDir + String output_dir command <<< - samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${outputDir}/alignment/${name}.pileup + samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${output_dir}/alignment/${name}.pileup >>> output { - String pileup = "${outputDir}/alignment/${name}.pileup" + String pileup = "${output_dir}/alignment/${name}.pileup" } } @@ -79,9 +106,11 @@ workflow alignment { String? normal_r1 String? normal_r2 + Boolean umi + String ref String bed - String outputDir + String output_dir scatter(name in [tumor, normal]) { if (defined(name)) { @@ -89,36 +118,51 @@ workflow alignment { input: name=name, ref=ref, - outputDir=outputDir, + output_dir=output_dir, read1=if name==tumor then tumor_r1 else normal_r1, read2=if name==tumor then tumor_r2 else normal_r2 } - call markduplicates { - input: - name=name, - ref=ref, - outputDir=outputDir, - sortedBam=bwa.sortedBam + if (name==tumor) { + if (umi) { + call markduplicates_genecore as tumor_markduplicates_genecore { + input: + name=name, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa.sorted_bam, + } + } + if (!umi) { + call markduplicates_picard as tumor_markduplicates_picard { + input: + name=name, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa.sorted_bam, + } + } } - call generater_mpileup { - input: - name=name, - ref=ref, - outputDir=outputDir, - bed=bed, - rmdupBam=markduplicates.rmdupBam + + if (name==select_first([normal, 'None'])) { + call markduplicates_picard as normal_markduplicates_picard { + input: + name=name, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa.sorted_bam, + } } } } output { - String tumor_sortedBam = "${outputDir}/alignment/${tumor}.sorted.bam" - String tumor_rmdupBam = "${outputDir}/alignment/${tumor}.rmdup.bam" - String tumor_pileup = "${outputDir}/alignment/${tumor}.pileup" - String normal_sortedBam = "${outputDir}/alignment/${normal}.sorted.bam" - String normal_rmdupBam = "${outputDir}/alignment/${normal}.rmdup.bam" - String normal_pileup = "${outputDir}/alignment/${normal}.pileup" + String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam" + String tumor_rmdup_bam = "${output_dir}/alignment/${tumor}.rmdup.bam" + String tumor_pileup = "${output_dir}/alignment/${tumor}.pileup" + String normal_sorted_bam = "${output_dir}/alignment/${normal}.sorted.bam" + String normal_rmdup_bam = "${output_dir}/alignment/${normal}.rmdup.bam" + String normal_pileup = "${output_dir}/alignment/${normal}.pileup" } } \ No newline at end of file diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl new file mode 100644 index 0000000..41efa3f --- /dev/null +++ b/wdl/call_mutation.wdl @@ -0,0 +1,215 @@ +task mutation_calling_umi { + String name + String output_dir + String rmdup_bam + String ref + String bed + command <<< + + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation + fi + + + #1条call + # 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多 + # -r 3 是指有3条这样样的reads支撑 + # -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信 + + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G ${ref} \ + -f 0.001 \ + -N ${name} \ + -b ${rmdup_bam} \ + -UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \ + -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf + + #提取>=2条矫正的序列 + python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam + samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam + + # 保证 1r call mut umi family 里面有2条reads + #2条矫正的call + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ + -f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \ + -UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf + + #merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 + perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \ + ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ + ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ + ${output_dir}/mutation/${name}.snp.indel.vcf + + table_annovar.pl \ + ${output_dir}/mutation/${name}.snp.indel.vcf \ + /dataseq/jmdna/software/annovar/humandb/ \ + -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ + -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ + -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ + --intronhgvs 50 \ + -operation g,f,f,f,f,f,f,f,f,f,r \ + --outfile ${output_dir}/mutation/${name}.snp.indel.anno + + >>> + + output { + String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf" + } +} + +task mutation_calling_tissue { + String name + String bed + String ref + String output_dir + String rmdup_bam + + command <<< + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation + fi + + # vardict + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G ${ref} \ + -f 0.01 \ + -N ${name} \ + -b ${rmdup_bam} \ + -UN \ + -Q 20 \ + -m 3 \ + -r 3 \ + -th 10 \ + -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf + + table_annovar.pl \ + ${output_dir}/mutation/${name}.snp.indel.vcf \ + /dataseq/jmdna/software/annovar/humandb/ \ + -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ + -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ + -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ + -operation g,f,f,f,f,f,f,f,f,f \ + --intronhgvs 50 \ + --outfile ${output_dir}/mutation/${name}.snp.indel.anno + + >>> + + output { + String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" + } +} + +task mutation_calling_tissue_control { + String name + String bed + String ref + String output_dir + String tumor_rmdup_bam + String normal_rmdup_bam + + command <<< + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation + fi + + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G ${ref} \ + -f 0.01 \ + -N ${name} \ + -b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \ + -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf + + table_annovar.pl \ + ${output_dir}/mutation/${name}.snp.indel.vcf \ + /dataseq/jmdna/software/annovar/humandb/ \ + -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ + -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ + -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ + -operation g,f,f,f,f,f,f,f,f,f \ + --intronhgvs 50 \ + --outfile ${output_dir}/mutation/${name}.snp.indel.anno + + >>> + + output { + String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" + } +} + +workflow call_mutation { + + String tumor + String tumor_rmdup_bam + String? normal + String? normal_rmdup_bam + Boolean umi + String output_dir + String ref + String bed + + scatter(name in [tumor, normal]) { + if (defined(name)) { + if (name==tumor) { + if (umi) { + call mutation_calling_umi as tumor_mutation_calling_umi { + input: + name=name, + output_dir=output_dir, + ref=ref, + bed=bed, + rmdup_bam=tumor_rmdup_bam + } + } + if (!umi) { + # 单样本模式,normal没有定义 + if (name==select_first([normal, tumor])) { + call mutation_calling_tissue as tumor_mutation_calling_tissue { + input: + name=name, + output_dir=output_dir, + ref=ref, + bed=bed, + rmdup_bam=normal_rmdup_bam + } + } + # 双样本模式,normal有定义 + if (name!=select_first([normal, tumor])) { + call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control { + input: + name=name, + output_dir=output_dir, + ref=ref, + bed=bed, + tumor_rmdup_bam=tumor_rmdup_bam, + normal_rmdup_bam=normal_rmdup_bam + } + } + } + } + if (name==select_first([normal, 'None'])) { + if (umi) { + call mutation_calling_tissue as normal_mutation_calling_tissue { + input: + name=name, + output_dir=output_dir, + ref=ref, + bed=bed, + rmdup_bam=normal_rmdup_bam + } + } + + } + + } + } + + output { + String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf" + String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf" + } +} \ No newline at end of file diff --git a/wdl/qc.wdl b/wdl/qc.wdl index 60d6930..fccd4bf 100644 --- a/wdl/qc.wdl +++ b/wdl/qc.wdl @@ -1,31 +1,71 @@ task runqc { String name - String inputDir - String outputDir + String input_dir + String output_dir command <<< - echo "###### ${name} fastp beginning at: $(date) ######" + # echo "###### ${name} fastp beginning at: $(date) ######" - if [ ! -d ${outputDir}/qc ];then - mkdir ${outputDir}/qc + if [ ! -d ${output_dir}/qc ];then + mkdir ${output_dir}/qc fi - fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \ - -I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \ + fastp \ + -i ${input_dir}/*_${name}_*1.fq.gz \ + -o ${output_dir}/qc/${name}_clean_R1.fq.gz \ + -I ${input_dir}/*_${name}_*2.fq.gz \ + -O ${output_dir}/qc/${name}_clean_R2.fq.gz \ -w 10 \ + -e 20 \ --correction \ --overlap_len_require 10 \ - -j ${outputDir}/qc/${name}.json \ - -h ${outputDir}/qc/${name}.html \ - --report_title $name \ - -e 20 + -j ${output_dir}/qc/${name}.json \ + -h ${output_dir}/qc/${name}.html \ + --report_title ${name} - echo "###### ${name} fastp end at: $(date) ######" >>> output { - String r1 = "${outputDir}/qc/${name}_clean_R1.fq.gz " - String r2 = "${outputDir}/qc/${name}_clean_R2.fq.gz " - String json = "${outputDir}/qc/${name}.json" + String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz " + String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz " + String json = "${output_dir}/qc/${name}.json" + } + +} + +task umiqc { + String name + String input_dir + String output_dir + + command <<< + # echo "###### ${name} fastp beginning at: $(date) ######" + + if [ ! -d ${output_dir}/qc ];then + mkdir ${output_dir}/qc + fi + + fastp -i ${input_dir}/*_${name}_*1.fq.gz -o ${output_dir}/qc/${name}_clean_R1.fq.gz \ + -I ${input_dir}/*_${name}_*2.fq.gz -O ${output_dir}/qc/${name}_clean_R2.fq.gz \ + -w 10 \ + --correction \ + --overlap_len_require=10 \ + --umi \ + --umi_loc=per_read \ + --umi_len=4 \ + --umi_prefix=UMI \ + --umi_skip=3 \ + --umi_delim \ : \ + --disable_trim_poly_g \ + -j ${output_dir}/qc/${name}.json \ + -h ${output_dir}/qc/${name}.html \ + --report_title ${name} + + >>> + + output { + String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz " + String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz " + String json = "${output_dir}/qc/${name}.json" } } @@ -34,27 +74,49 @@ workflow qc { String tumor String? normal - String inputDir - String outputDir + Boolean umi + String input_dir + String output_dir scatter(name in [tumor, normal]) { if (defined(name)) { - call runqc { - input: - name=name, - inputDir=inputDir, - outputDir=outputDir + if (name==tumor) { + if (umi) { + call umiqc as run_umi_qc { + input: + name=name, + input_dir=input_dir, + output_dir=output_dir + } + } + if (!umi) { + call runqc as run_tumor_qc { + input: + name=name, + input_dir=input_dir, + output_dir=output_dir + } + } } + if (name==select_first([normal, 'None'])) { + call runqc as run_normal_qc { + input: + name=name, + input_dir=input_dir, + output_dir=output_dir + } + } + } } output { - String tumor_r1 = "${outputDir}/qc/${tumor}_clean_R1.fq.gz " - String tumor_r2 = "${outputDir}/qc/${tumor}_clean_R2.fq.gz " - String tumor_json = "${outputDir}/qc/${tumor}.json" - String normal_r1 = "${outputDir}/qc/${normal}_clean_R1.fq.gz " - String normal_r2 = "${outputDir}/qc/${normal}_clean_R2.fq.gz " - String normal_json = "${outputDir}/qc/${normal}.json" + String tumor_r1 = "${output_dir}/qc/${tumor}_clean_R1.fq.gz " + String tumor_r2 = "${output_dir}/qc/${tumor}_clean_R2.fq.gz " + String tumor_json = "${output_dir}/qc/${tumor}.json" + String normal_r1 = "${output_dir}/qc/${normal}_clean_R1.fq.gz " + String normal_r2 = "${output_dir}/qc/${normal}_clean_R2.fq.gz " + String normal_json = "${output_dir}/qc/${normal}.json" } } diff --git a/wdl/task.wdl b/wdl/task.wdl index 23cbf13..50c742d 100644 --- a/wdl/task.wdl +++ b/wdl/task.wdl @@ -3,7 +3,7 @@ task create_dir { String workdir command <<< - if [ ! -d ${workdir}];then + if [ ! -d ${workdir} ];then mkdir -p ${workdir}/log fi @@ -12,9 +12,10 @@ task create_dir { task mutation_calling { String name - String tumor_pileup - String normal_pileup + String tumor_rmdupBam + String normal_rmdupBam String outputDir + String bed command <<< @@ -22,47 +23,27 @@ task mutation_calling { mkdir ${outputDir}/mutation fi - java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \ - --output-snp ${outputDir}/mutation/${name}.snp.vcf \ - --output-indel ${outputDir}/mutation/${name}.indel.vcf \ - --min-var-freq 0.01 \ - --min-freq-for-hom 0.9 \ - --somatic-p-value 0.05 \ - --output-vcf 1 \ - --min-avg-qual 20 \ - --min-coverage-normal 10 \ - --min-coverage-tumor 30 \ - --min-reads2 3 + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G /dataseq/jmdna/database/genome/hg19/hg19.fa \ + -f 0.01 \ + -N ${name} \ + -b "${tumor_rmdupBam}|${normal_rmdupBam}" \ + -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \ + /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \ + /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \ + > ${outputDir}/mutation/${name}_vardict.snp.indel.vcf - java -jar $VARSCAN processSomatic \ - ${outputDir}/mutation/${name}.snp.vcf \ - --min-tumor-freq 0.01 \ - --max-normal-freq 0.01 \ - --p-value 0.05 - - java -jar $VARSCAN processSomatic \ - ${outputDir}/mutation/${name}.indel.vcf \ - --min-tumor-freq 0.01 \ - --max-normal-freq 0.01 \ - --p-value 0.05 - - java -jar $GATK MergeVcfs \ - -I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \ - -I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \ - -O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \ - -D /dataseq/jmdna/database/genome/hg19/hg19.dict - - java -jar $GATK MergeVcfs \ - -I ${outputDir}/mutation/${name}.snp.Germline.vcf \ - -I ${outputDir}/mutation/${name}.indel.Germline.vcf \ - -O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \ - -D /dataseq/jmdna/database/genome/hg19/hg19.dict - - java -jar $GATK MergeVcfs \ - -I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \ - -I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \ - -O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \ - -D /dataseq/jmdna/database/genome/hg19/hg19.dict + vep \ + --input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \ + --output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \ + --format vcf \ + --vcf \ + --symbol \ + --terms SO \ + --hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \ + --offline --cache --dir_cache /home/software/.vep \ + --pick \ + --force_overwrite >>> @@ -580,7 +561,7 @@ task auto_report { perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor} perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor} - python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} + python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} ln -s ${cnv_cns} ${outputDir}/report/ ln -s ${cnv_png} ${outputDir}/report/