From 47cbe45d8b5bcb5a3d241f5b01bf10abac87c2fc Mon Sep 17 00:00:00 2001 From: chaopower Date: Thu, 28 Dec 2023 09:14:58 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + codes/cromwell.examples.conf | 62 ++------------------- codes/filter_snpindel.pl | 10 ++-- codes/postprocess.py | 2 +- codes/run_wdl.py | 4 +- pipeline.wdl | 33 ++++++----- wdl/alignment.wdl | 105 +++++++++++++++++++++++------------ wdl/call_mutation.wdl | 4 +- wdl/chemo.wdl | 1 + wdl/cnv.wdl | 1 + wdl/fusion.wdl | 1 + wdl/hereditary.wdl | 3 +- wdl/msi.wdl | 4 +- wdl/pollution.wdl | 1 + wdl/postprocess.wdl | 6 +- wdl/qc.wdl | 1 + wdl/statistics.wdl | 3 +- wdl/tmb.wdl | 1 + 18 files changed, 122 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index d16519b..e66b5dc 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf ## dot ```bash womtool graph pipeline.wdl > other/pipeline.dot +dot -Tpng .\pipeline.dot -o pipeline.png ``` ![流程图](./other/pipeline.svg) diff --git a/codes/cromwell.examples.conf b/codes/cromwell.examples.conf index 326a87f..98822b9 100755 --- a/codes/cromwell.examples.conf +++ b/codes/cromwell.examples.conf @@ -39,7 +39,7 @@ system { # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode, # in particular clearing up all queued database writes before letting the JVM shut down. # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details. - graceful-server-shutdown = false + graceful-server-shutdown = true # Cromwell will cap the number of running workflows at N max-concurrent-workflows = 10000 @@ -95,7 +95,7 @@ system { # These are the default values in Cromwell, in most circumstances there should not be a need to change them. # How frequently Cromwell should scan for aborts. - scan-frequency: 600 seconds + scan-frequency: 30 seconds # The cache of in-progress aborts. Cromwell will add entries to this cache once a WorkflowActor has been messaged to abort. # If on the next scan an 'Aborting' status is found for a workflow that has an entry in this cache, Cromwell will not ask @@ -134,7 +134,7 @@ workflow-options { default { # When a workflow type is not provided on workflow submission, this specifies the default type. - #workflow-type: WDL + workflow-type: WDL # When a workflow type version is not provided on workflow submission, this specifies the default type version. workflow-type-version: "draft-2" @@ -193,58 +193,6 @@ call-caching { # } } -# Google configuration -google { - - #application-name = "cromwell" - - # Default: just application default - #auths = [ - - # Application default - #{ - # name = "application-default" - # scheme = "application_default" - #}, - - # Use a static service account - #{ - # name = "service-account" - # scheme = "service_account" - # Choose between PEM file and JSON file as a credential format. They're mutually exclusive. - # PEM format: - # service-account-id = "my-service-account" - # pem-file = "/path/to/file.pem" - # JSON format: - # json-file = "/path/to/file.json" - #} - - # Use service accounts provided through workflow options - #{ - # name = "user-service-account" - # scheme = "user_service_account" - #} - #] -} - -docker { - hash-lookup { - # Set this to match your available quota against the Google Container Engine API - #gcr-api-queries-per-100-seconds = 1000 - - # Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again - #cache-entry-ttl = "20 minutes" - - # Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache - #cache-size = 200 - - # How should docker hashes be looked up. Possible values are "local" and "remote" - # "local": Lookup hashes on the local docker daemon using the cli - # "remote": Lookup hashes on docker hub, gcr, gar, quay - #method = "remote" - } -} - engine { # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need. # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here. @@ -261,7 +209,7 @@ engine { # project = "google-billing-project" # } local { - #enabled: true + enabled: true } } } @@ -373,7 +321,7 @@ backend { # `script-epilogue` configures a shell command to run after the execution of every command block. # # If this value is not set explicitly, the default value is `sync`, equivalent to: - script-epilogue = "" + # script-epilogue = "" # # To turn off the default `sync` behavior set this value to an empty string: # script-epilogue = "" diff --git a/codes/filter_snpindel.pl b/codes/filter_snpindel.pl index 60fd083..1fad58e 100755 --- a/codes/filter_snpindel.pl +++ b/codes/filter_snpindel.pl @@ -136,13 +136,13 @@ while () { $hgvs =~ s/exon(\d+)/intron$intron;exon$exon/; $line[9] = join(":", ($gene, $hgvs)); } - elsif ($gene eq "MET") { - $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx")); - $line[8] = 'skipping' - } else { push @reason, 'not_need_spl'; } + if ($gene eq "MET" ) { + $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx")); + $line[8] = 'skipping' + } $protein = 'Truncating Mutations'; } else { @@ -175,7 +175,7 @@ while () { # tmb 流程去掉 不过滤但是修改 hgvs if ($pipeline eq 'tmb') { - @reason = grep(!/synonymous|benign/, @reason); + @reason = grep(!/synonymous/, @reason); if (($freq < 0.05) and ($sample_type eq 't')) { push @reason, 'lowfreq_tissue_tmb'; } diff --git a/codes/postprocess.py b/codes/postprocess.py index 86281ca..78a9a5e 100755 --- a/codes/postprocess.py +++ b/codes/postprocess.py @@ -369,7 +369,7 @@ class PostProcess: res = df.to_dict('records')[0] msi_res['msi_count'] = res['Total_Number_of_Sites'] msi_res['msi_value'] = res['%'] - if msi_res['msi_value'] >= 0.3: + if msi_res['msi_value'] >= 30: msi_res['msi_result'] = 'MSI-H' msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感' else: diff --git a/codes/run_wdl.py b/codes/run_wdl.py index 2c732ba..b94a7d7 100755 --- a/codes/run_wdl.py +++ b/codes/run_wdl.py @@ -94,9 +94,9 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl # f'{"-Dcall-caching.enabled=false " if uncache else ""}' # f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \ - cmd4 = f'/home/install/product/workflow/software/jdk-17.0.7+7/bin/java -DLOG_MODE=standard ' \ + cmd4 = f'/usr/bin/java -DLOG_MODE=standard ' \ f'-Dconfig.file=$WORKFLOW/codes/cromwell.examples.conf ' \ - f'-jar $WORKFLOW/software/cromwell-86.jar run {wdl} --inputs {jsfile_path} ' + f'-jar $WORKFLOW/software/cromwell-51.jar run {wdl} --inputs {jsfile_path} ' # cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}' cmd = f'{cmd3}; {cmd4}' diff --git a/pipeline.wdl b/pipeline.wdl index 30a3e0b..c34e497 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -1,18 +1,18 @@ +# pipeline -import "wdl/qc.wdl" -import "wdl/alignment.wdl" -import "wdl/call_mutation.wdl" -import "wdl/fusion.wdl" -import "wdl/statistics.wdl" -import "wdl/cnv.wdl" -import "wdl/msi.wdl" -import "wdl/chemo.wdl" -import "wdl/hereditary.wdl" -import "wdl/pollution.wdl" -import "wdl/tmb.wdl" -import "wdl/postprocess.wdl" -import "wdl/neoantigen.wdl" - +import "wdl/qc.wdl" as qc +import "wdl/alignment.wdl" as alignment +import "wdl/call_mutation.wdl" as call_mutation +import "wdl/fusion.wdl" as fusion +import "wdl/statistics.wdl" as statistics +import "wdl/cnv.wdl" as cnv +import "wdl/msi.wdl" as msi +import "wdl/chemo.wdl" as chemo +import "wdl/hereditary.wdl" as hereditary +import "wdl/pollution.wdl" as pollution +import "wdl/tmb.wdl" as tmb +import "wdl/postprocess.wdl" as postprocess +import "wdl/neoantigen.wdl" as neoantigen workflow pipeline { @@ -199,6 +199,7 @@ workflow pipeline { fusion=call_fusion.fusion_vcf, cnv=call_cnv.cnv_filter, msi=call_msi.msi_txt, + tmb=call_tmb.tmb_txt, hereditary=call_hereditary.hereditary_txt, chemo=call_chemo.chemo_res, neoantigen=call_neoantigen.neoantigen_txt, @@ -209,4 +210,8 @@ workflow pipeline { cancer=cancer, project=project } + + output { + String result = "${output_dir}/report/${tumor}.merged_file.xlsx" + } } \ No newline at end of file diff --git a/wdl/alignment.wdl b/wdl/alignment.wdl index 2d8e3e2..f741df9 100755 --- a/wdl/alignment.wdl +++ b/wdl/alignment.wdl @@ -115,49 +115,84 @@ workflow alignment { String output_dir if (run) { - scatter(name in [tumor, normal]) { - if (defined(name)) { - call bwa { + # 单样本 + if (!defined(normal)) { + call bwa as bwa_tumor { + input: + name=tumor, + ref=ref, + output_dir=output_dir, + read1=tumor_r1, + read2=tumor_r2 + } + + if (umi) { + call markduplicates_genecore as tumor_markduplicates_genecore { input: - name=name, + name=tumor, ref=ref, output_dir=output_dir, - read1=if name==tumor then tumor_r1 else normal_r1, - read2=if name==tumor then tumor_r2 else normal_r2 - } - if (name==tumor) { - if (umi) { - call markduplicates_genecore as tumor_markduplicates_genecore { - input: - name=name, - ref=ref, - output_dir=output_dir, - sorted_bam=bwa.sorted_bam, - } - } - if (!umi) { - call markduplicates_picard as tumor_markduplicates_picard { - input: - name=name, - ref=ref, - output_dir=output_dir, - sorted_bam=bwa.sorted_bam, - } - } + sorted_bam=bwa_tumor.sorted_bam, } + } - if (name==select_first([normal, 'None'])) { - call markduplicates_picard as normal_markduplicates_picard { - input: - name=name, - ref=ref, - output_dir=output_dir, - sorted_bam=bwa.sorted_bam, - } + if (!umi) { + call markduplicates_picard as tumor_markduplicates_picard { + input: + name=tumor, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa_tumor.sorted_bam, } - } } + # 双样本 + if (defined(normal)) { + call bwa as bwa_tumor_control { + input: + name=tumor, + ref=ref, + output_dir=output_dir, + read1=tumor_r1, + read2=tumor_r2 + } + call bwa as bwa_normal_control { + input: + name=normal, + ref=ref, + output_dir=output_dir, + read1=normal_r1, + read2=normal_r2 + } + call markduplicates_picard as tumor_markduplicates_picard_control { + input: + name=tumor, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa_tumor_control.sorted_bam, + } + + if (umi) { + call markduplicates_genecore as normal_markduplicates_genecore { + input: + name=normal, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa_normal_control.sorted_bam, + } + } + + if (!umi) { + call markduplicates_picard as normal_markduplicates_picard { + input: + name=normal, + ref=ref, + output_dir=output_dir, + sorted_bam=bwa_normal_control.sorted_bam, + } + } + } + } output { diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl index 2658d98..4c477f7 100755 --- a/wdl/call_mutation.wdl +++ b/wdl/call_mutation.wdl @@ -1,3 +1,4 @@ +# mutation task mutation_calling_umi { String name @@ -266,7 +267,7 @@ task mutation_calling_tissue_control { vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \ - -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \ + -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \AF[0] > 3*FORMAT/AF[1] -e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )' vcf_filter.py -i ${output_dir}/mutation/${name}.raw.snp_indel.vcf \ @@ -718,6 +719,7 @@ workflow call_mutation { cancer=cancer } } + if (!umi) { call mutation_calling_tissue_control { input: diff --git a/wdl/chemo.wdl b/wdl/chemo.wdl index bba4d73..7f9aa5e 100755 --- a/wdl/chemo.wdl +++ b/wdl/chemo.wdl @@ -1,3 +1,4 @@ +# chemo task run_chemo { String name diff --git a/wdl/cnv.wdl b/wdl/cnv.wdl index 9e145c4..a44a349 100755 --- a/wdl/cnv.wdl +++ b/wdl/cnv.wdl @@ -1,3 +1,4 @@ +# cnv task cnv_single { String name diff --git a/wdl/fusion.wdl b/wdl/fusion.wdl index b04a7cd..7995c58 100755 --- a/wdl/fusion.wdl +++ b/wdl/fusion.wdl @@ -1,3 +1,4 @@ + task rmdup_picard { String name String sorted_bam diff --git a/wdl/hereditary.wdl b/wdl/hereditary.wdl index 596f7df..c2dda62 100755 --- a/wdl/hereditary.wdl +++ b/wdl/hereditary.wdl @@ -1,3 +1,4 @@ +# hereditary task run_hereditary { String name @@ -41,4 +42,4 @@ workflow call_hereditary { output { String hereditary_txt = "${output_dir}/hereditary/${name}.hereditary.txt" } -} \ No newline at end of file +} diff --git a/wdl/msi.wdl b/wdl/msi.wdl index 53a76d7..b566ed2 100755 --- a/wdl/msi.wdl +++ b/wdl/msi.wdl @@ -1,3 +1,4 @@ +# msi task msi_single { String name @@ -89,6 +90,3 @@ workflow call_msi { String msi_txt = "${output_dir}/msi/${tumor}.msi.txt" } } - - - diff --git a/wdl/pollution.wdl b/wdl/pollution.wdl index b548e47..737cf8f 100755 --- a/wdl/pollution.wdl +++ b/wdl/pollution.wdl @@ -1,3 +1,4 @@ + task run_pollution { String name String output_dir diff --git a/wdl/postprocess.wdl b/wdl/postprocess.wdl index 0b43dfc..34ae89a 100755 --- a/wdl/postprocess.wdl +++ b/wdl/postprocess.wdl @@ -1,9 +1,11 @@ +# postprocess task run_post { String? mutation String? fusion String? cnv - String? msi + String? msi + String? tmb String? hereditary String? chemo String? neoantigen @@ -38,6 +40,7 @@ workflow call_postprocess { String? fusion String? cnv String? msi + String? tmb String? hereditary String? pollution String? chemo @@ -55,6 +58,7 @@ workflow call_postprocess { fusion=fusion, cnv=cnv, msi=msi, + tmb=tmb, hereditary=hereditary, chemo=chemo, neoantigen=neoantigen, diff --git a/wdl/qc.wdl b/wdl/qc.wdl index 13f4465..eaa4dd9 100755 --- a/wdl/qc.wdl +++ b/wdl/qc.wdl @@ -1,3 +1,4 @@ +#qc task runqc { String name diff --git a/wdl/statistics.wdl b/wdl/statistics.wdl index 0199cd0..6068bc7 100755 --- a/wdl/statistics.wdl +++ b/wdl/statistics.wdl @@ -1,3 +1,4 @@ +# statistics task run_statistics { String name @@ -17,7 +18,7 @@ task run_statistics { samtools stats --reference ${ref} -t ${bed} -@ 10 ${rmdupBam} > ${output_dir}/qc/${name}.rmdup.stat bamdst -p ${bed} -o ${output_dir}/qc/${name}_bamdst ${rmdupBam} qc_stat.py ${output_dir}/qc/${name}.json ${output_dir}/qc/${name}_bamdst/ ${output_dir}/qc/${name}_qc.txt - # + # InsertAndDepthStat.R \ # ${output_dir}/qc/${name}_InsertAndDepthStat \ # ${output_dir}/qc/${name}_bamdst/insertsize.plot \ diff --git a/wdl/tmb.wdl b/wdl/tmb.wdl index a0fe4dd..4b9e93f 100755 --- a/wdl/tmb.wdl +++ b/wdl/tmb.wdl @@ -1,3 +1,4 @@ +# tmb task run_tmb { String name