From d0335deb1835a8472f42b263ce6081a6463c6e90 Mon Sep 17 00:00:00 2001 From: chaopower Date: Tue, 29 Aug 2023 10:31:06 +0800 Subject: [PATCH] =?UTF-8?q?=E8=9E=8D=E5=90=88=EF=BC=8Ccnv=20=E5=AE=8C?= =?UTF-8?q?=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pipeline.wdl | 30 ++++++++++++++---------- script/info.txt | 3 +++ wdl/qc.wdl | 4 ++-- wdl/statistics.wdl | 57 ++++++++++++++++++++++++++++++++++++++++++++++ wdl/task.wdl | 32 ++++++++------------------ 5 files changed, 89 insertions(+), 37 deletions(-) create mode 100755 script/info.txt create mode 100644 wdl/statistics.wdl diff --git a/pipeline.wdl b/pipeline.wdl index 6a82116..98e43e0 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -1,6 +1,8 @@ import "./wdl/task.wdl" as mytask import "./wdl/qc.wdl" import "./wdl/alignment.wdl" +import "./wdl/statistics.wdl" + workflow pipeline { @@ -45,6 +47,20 @@ workflow pipeline { outputDir=workdir } + call statistics.statistics as statistics { + input: + tumor=tumor, + tumor_rmdupBam=alignment.tumor_rmdupBam, + + normal=normal, + normal_rmdupBam=alignment.normal_rmdupBam, + + ref=ref, + bed=bed, + outputDir=workdir, + codesDir=codesDir + } + call mytask.mutation_calling as mutation_calling { input: name=tumor, @@ -80,17 +96,8 @@ workflow pipeline { outputDir=workdir, rmdupBam=alignment.tumor_rmdupBam, cancer=cancer, - project=project - } - - call mytask.tumor_content as tumor_content { - input: - name=tumor, - tumor_pileup=alignment.tumor_pileup, - normal_pileup=alignment.normal_pileup, - ref=ref, - outputDir=workdir, - codesDir=codesDir, + project=project, + tumor_bamdst_depth=statistics.tumor_bamdst_depth } call mytask.cnvkit as cnvkit { @@ -106,7 +113,6 @@ workflow pipeline { cancer=cancer, codesDir=codesDir, project=project, - purity=tumor_content.purity } call mytask.chemo as chemo { diff --git a/script/info.txt b/script/info.txt new file mode 100755 index 0000000..d53b55b --- /dev/null +++ b/script/info.txt @@ -0,0 +1,3 @@ +project probe mutation splicing promoter cnv fusion long_indel chemotherapy_drug +160gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MET/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MDM2/MDM4/CDKN2A ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA +650gene /dataseq/jmdna/database/bed/650.bed ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MET/MPL/MTOR/MYCN/MYD88/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MYCN/MDM2/MDM4/CDKN2A/CDKN2B ALK/BRAF/FGFR1/FGFR2/FGFR3/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA diff --git a/wdl/qc.wdl b/wdl/qc.wdl index 2af1b67..60d6930 100644 --- a/wdl/qc.wdl +++ b/wdl/qc.wdl @@ -10,8 +10,8 @@ task runqc { mkdir ${outputDir}/qc fi - fastp -i ${inputDir}/*_$name_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \ - -I ${inputDir}/*_$name_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \ + fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \ + -I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \ -w 10 \ --correction \ --overlap_len_require 10 \ diff --git a/wdl/statistics.wdl b/wdl/statistics.wdl new file mode 100644 index 0000000..0ed6861 --- /dev/null +++ b/wdl/statistics.wdl @@ -0,0 +1,57 @@ +task runstatistics { + String name + String outputDir + String rmdupBam + + String ref + String codesDir + String bed + + command <<< + + if [ ! -d ${outputDir}/qc/${name}_bamdst ];then + mkdir -p ${outputDir}/qc/${name}_bamdst + fi + + samtools flagstat -@ 10 ${rmdupBam} >${outputDir}/qc/${name}.rmdup.flagstat + samtools stats --reference ${ref} -t ${bed} -@ 10 ${rmdupBam} > ${outputDir}/qc/${name}.rmdup.stat + bamdst -p ${bed} -o ${outputDir}/qc/${name}_bamdst ${rmdupBam} + + Rscript ${codesDir}/InsertAndDepthStat.R \ + ${outputDir}/qc/${name}_InsertAndDepthStat \ + ${outputDir}/qc/${name}_bamdst/insertsize.plot \ + ${outputDir}/qc/${name}_bamdst/depth_distribution.plot + >>> +} + +workflow statistics { + + String tumor + String tumor_rmdupBam + + String? normal + String? normal_rmdupBam + + String ref + String bed + String outputDir + String codesDir + + scatter(name in [tumor, normal]) { + if (defined(name)) { + call runstatistics { + input: + name=name, + outputDir=outputDir, + rmdupBam=if name==tumor then tumor_rmdupBam else normal_rmdupBam, + ref=ref, + codesDir=codesDir, + bed=bed + } + } + } + + output { + String tumor_bamdst_depth = "${outputDir}/qc/${tumor}_bamdst/depth.tsv.gz" + } +} \ No newline at end of file diff --git a/wdl/task.wdl b/wdl/task.wdl index 17ce7f7..f15e5cc 100644 --- a/wdl/task.wdl +++ b/wdl/task.wdl @@ -125,8 +125,6 @@ task annovar { -o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ --annotation TandemRepeatAnnotator - # -nt 10 - grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ |cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \ > ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt @@ -161,6 +159,7 @@ task fusion { String rmdupBam String cancer String project + String tumor_bamdst_depth command <<< @@ -178,7 +177,7 @@ task fusion { > ${outputDir}/fusion/${name}.splitters.bam lumpyexpress \ - -B ${rmdupBam}\ + -B ${rmdupBam} \ -S ${outputDir}/fusion/${name}.splitters.bam \ -D ${outputDir}/fusion/${name}.discordants.bam \ -o ${outputDir}/fusion/${name}.fusion.vcf @@ -199,7 +198,7 @@ task fusion { -operation g \ --outfile ${outputDir}/fusion/${name}.fusion - perl ${codesDir}/fusion.reanno.pl ${outputDir}/qc/${name}_bamdst/depth.tsv.gz ${outputDir} ${name} + perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name} perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer} >>> @@ -251,7 +250,6 @@ task cnvkit { String cancer String codesDir String project - String purity String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed" String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt" @@ -277,18 +275,6 @@ task cnvkit { --title ${tumor}.cns \ -o ${outputDir}/cnvkit/${tumor}.cnv.png - if [ -e "${outputDir}/qc/sequenza/${tumor}_confints_CP.txt" ]; then - # absolute copy number - cnvkit.py call \ - -m clonal \ - ${outputDir}/cnvkit/${tumor}.rmdup.cns \ - -y \ - --purity `head -n2 ${outputDir}/qc/sequenza/${tumor}_confints_CP.txt |tail -n1|cut -f1` \ - --drop-low-coverage \ - --filter ampdel \ - -o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc - fi - perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer} >>> @@ -323,14 +309,14 @@ task msi { String normal_rmdupBam command <<< - if [ ! -d ${outputDir}/MSI ];then - mkdir ${outputDir}/MSI + if [ ! -d ${outputDir}/msi ];then + mkdir ${outputDir}/msi fi msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \ - -n ${outputDir}/alignment/${normal_rmdupBam} \ - -t ${outputDir}/alignment/${tumor_rmdupBam} \ - -e ${bed} -b 10 -o ${outputDir}/MSI/${name}.msi + -n ${normal_rmdupBam} \ + -t ${tumor_rmdupBam} \ + -e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi >>> output { @@ -385,4 +371,4 @@ task neoantigen { output { String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt" } -} \ No newline at end of file +}