微调

2023-12-28 09:14:58 +08:00 · 2023-12-28 09:14:58 +08:00 · 47cbe45d8b
parent b54741ee46
commit 47cbe45d8b
18 changed files with 122 additions and 121 deletions
--- a/README.md
+++ b/README.md
@ -13,6 +13,7 @@ java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf
 ## dot 
 ```bash
 womtool graph pipeline.wdl > other/pipeline.dot
 dot -Tpng .\pipeline.dot -o pipeline.png
 ```
 ![流程图](./other/pipeline.svg)
--- a/codes/cromwell.examples.conf
+++ b/codes/cromwell.examples.conf
@ -39,7 +39,7 @@ system {
  # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode,
  # in particular clearing up all queued database writes before letting the JVM shut down.
  # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details.
-  graceful-server-shutdown = false
+  graceful-server-shutdown = true
  # Cromwell will cap the number of running workflows at N
  max-concurrent-workflows = 10000
@ -95,7 +95,7 @@ system {
    # These are the default values in Cromwell, in most circumstances there should not be a need to change them.
    # How frequently Cromwell should scan for aborts.
-    scan-frequency: 600 seconds
+    scan-frequency: 30 seconds
    # The cache of in-progress aborts. Cromwell will add entries to this cache once a WorkflowActor has been messaged to abort.
    # If on the next scan an 'Aborting' status is found for a workflow that has an entry in this cache, Cromwell will not ask
@ -134,7 +134,7 @@ workflow-options {
  default {
    # When a workflow type is not provided on workflow submission, this specifies the default type.
-    #workflow-type: WDL
+    workflow-type: WDL
    # When a workflow type version is not provided on workflow submission, this specifies the default type version.
    workflow-type-version: "draft-2"
@ -193,58 +193,6 @@ call-caching {
  # }
 }
 # Google configuration
 google {
  #application-name = "cromwell"
  # Default: just application default
  #auths = [
  # Application default
  #{
  #  name = "application-default"
  #  scheme = "application_default"
  #},
  # Use a static service account
  #{
  #  name = "service-account"
  #  scheme = "service_account"
  #  Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
  #  PEM format:
  #  service-account-id = "my-service-account"
  #  pem-file = "/path/to/file.pem"
  #  JSON format:
  #  json-file = "/path/to/file.json"
  #}
  # Use service accounts provided through workflow options
  #{
  #   name = "user-service-account"
  #   scheme = "user_service_account"
  #}
  #]
 }
 docker {
  hash-lookup {
    # Set this to match your available quota against the Google Container Engine API
    #gcr-api-queries-per-100-seconds = 1000
    # Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
    #cache-entry-ttl = "20 minutes"
    # Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
    #cache-size = 200
    # How should docker hashes be looked up. Possible values are "local" and "remote"
    # "local": Lookup hashes on the local docker daemon using the cli
    # "remote": Lookup hashes on docker hub, gcr, gar, quay
    #method = "remote"
  }
 }
 engine {
  # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
  # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
@ -261,7 +209,7 @@ engine {
    #    project = "google-billing-project"
    #  }
    local {
-      #enabled: true
+      enabled: true
    }
  }
 }
@ -373,7 +321,7 @@ backend {
        # `script-epilogue` configures a shell command to run after the execution of every command block.
        #
        # If this value is not set explicitly, the default value is `sync`, equivalent to:
-        script-epilogue = ""
+        # script-epilogue = ""
        #
        # To turn off the default `sync` behavior set this value to an empty string:
        # script-epilogue = ""
--- a/codes/filter_snpindel.pl
+++ b/codes/filter_snpindel.pl
@ -136,13 +136,13 @@ while (<IN>) {
                $hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
                $line[9] = join(":", ($gene, $hgvs));
            }
            elsif ($gene eq "MET") {
                $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
                $line[8] = 'skipping'
            }
            else {
                push @reason, 'not_need_spl';
            }
            if ($gene eq "MET" ) {
                $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
                $line[8] = 'skipping'
            }
            $protein = 'Truncating Mutations';
        }
        else {
@ -175,7 +175,7 @@ while (<IN>) {
    # tmb 流程去掉 不过滤但是修改 hgvs
    if ($pipeline eq 'tmb') {
-        @reason = grep(!/synonymous|benign/, @reason);
+        @reason = grep(!/synonymous/, @reason);
        if (($freq < 0.05) and ($sample_type eq 't')) {
            push @reason, 'lowfreq_tissue_tmb';
        }
--- a/codes/postprocess.py
+++ b/codes/postprocess.py
@ -369,7 +369,7 @@ class PostProcess:
            res = df.to_dict('records')[0]
            msi_res['msi_count'] = res['Total_Number_of_Sites']
            msi_res['msi_value'] = res['%']
-            if msi_res['msi_value'] >= 0.3:
+            if msi_res['msi_value'] >= 30:
                msi_res['msi_result'] = 'MSI-H'
                msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感'
            else:
--- a/codes/run_wdl.py
+++ b/codes/run_wdl.py
@ -94,9 +94,9 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
    # f'{"-Dcall-caching.enabled=false " if uncache else ""}'
    # f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \
-    cmd4 = f'/home/install/product/workflow/software/jdk-17.0.7+7/bin/java -DLOG_MODE=standard ' \
+    cmd4 = f'/usr/bin/java -DLOG_MODE=standard ' \
           f'-Dconfig.file=$WORKFLOW/codes/cromwell.examples.conf ' \
-           f'-jar $WORKFLOW/software/cromwell-86.jar run {wdl} --inputs {jsfile_path} '
+           f'-jar $WORKFLOW/software/cromwell-51.jar run {wdl} --inputs {jsfile_path} '
    # cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
    cmd = f'{cmd3}; {cmd4}'
--- a/pipeline.wdl
+++ b/pipeline.wdl
@ -1,18 +1,18 @@
 # pipeline
-import "wdl/qc.wdl"
+import "wdl/qc.wdl" as qc
-import "wdl/alignment.wdl"
+import "wdl/alignment.wdl" as alignment
-import "wdl/call_mutation.wdl"
+import "wdl/call_mutation.wdl" as call_mutation
-import "wdl/fusion.wdl"
+import "wdl/fusion.wdl" as fusion
-import "wdl/statistics.wdl"
+import "wdl/statistics.wdl" as statistics
-import "wdl/cnv.wdl"
+import "wdl/cnv.wdl" as cnv
-import "wdl/msi.wdl"
+import "wdl/msi.wdl" as msi
-import "wdl/chemo.wdl"
+import "wdl/chemo.wdl" as chemo
-import "wdl/hereditary.wdl"
+import "wdl/hereditary.wdl" as hereditary
-import "wdl/pollution.wdl"
+import "wdl/pollution.wdl" as pollution
-import "wdl/tmb.wdl"
+import "wdl/tmb.wdl" as tmb
-import "wdl/postprocess.wdl"
+import "wdl/postprocess.wdl" as postprocess
-import "wdl/neoantigen.wdl"
+import "wdl/neoantigen.wdl" as neoantigen
 workflow pipeline {
@ -199,6 +199,7 @@ workflow pipeline {
            fusion=call_fusion.fusion_vcf,
            cnv=call_cnv.cnv_filter,
            msi=call_msi.msi_txt,
            tmb=call_tmb.tmb_txt,
            hereditary=call_hereditary.hereditary_txt,
            chemo=call_chemo.chemo_res,
            neoantigen=call_neoantigen.neoantigen_txt,
@ -209,4 +210,8 @@ workflow pipeline {
            cancer=cancer,
            project=project
    }
    output {
        String result = "${output_dir}/report/${tumor}.merged_file.xlsx"
    }
 }
--- a/wdl/alignment.wdl
+++ b/wdl/alignment.wdl
@ -115,50 +115,85 @@ workflow alignment {
    String output_dir
    if (run) {
-        scatter(name in [tumor, normal]) {
+        # 单样本
-            if (defined(name)) {
+        if (!defined(normal)) {
-                call bwa {
+            call bwa as bwa_tumor {
                input:
-                        name=name,
+                    name=tumor,
                    ref=ref,
                    output_dir=output_dir,
-                        read1=if name==tumor then tumor_r1 else normal_r1,
+                    read1=tumor_r1,
-                        read2=if name==tumor then tumor_r2 else normal_r2
+                    read2=tumor_r2
            }
-                if (name==tumor) {
+
            if (umi) {
                call markduplicates_genecore as tumor_markduplicates_genecore {
                    input:
-                                name=name,
+                        name=tumor,
                        ref=ref,
                        output_dir=output_dir,
-                                sorted_bam=bwa.sorted_bam,
+                        sorted_bam=bwa_tumor.sorted_bam,
                }
            }
            if (!umi) {
                call markduplicates_picard as tumor_markduplicates_picard {
                    input:
-                                name=name,
+                        name=tumor,
                        ref=ref,
                        output_dir=output_dir,
-                                sorted_bam=bwa.sorted_bam,
+                        sorted_bam=bwa_tumor.sorted_bam,
                }
            }
        }
        # 双样本
        if (defined(normal)) {
            call bwa as bwa_tumor_control {
                input:
                    name=tumor,
                    ref=ref,
                    output_dir=output_dir,
                    read1=tumor_r1,
                    read2=tumor_r2
            }
            call bwa as bwa_normal_control {
                input:
                    name=normal,
                    ref=ref,
                    output_dir=output_dir,
                    read1=normal_r1,
                    read2=normal_r2
            }
            call markduplicates_picard as tumor_markduplicates_picard_control {
                input:
                    name=tumor,
                    ref=ref,
                    output_dir=output_dir,
                    sorted_bam=bwa_tumor_control.sorted_bam,
            }
            if (umi) {
                call markduplicates_genecore as normal_markduplicates_genecore {
                    input:
                        name=normal,
                        ref=ref,
                        output_dir=output_dir,
                        sorted_bam=bwa_normal_control.sorted_bam,
                }
            }
-                if (name==select_first([normal, 'None'])) {
+            if (!umi) {
                call markduplicates_picard as normal_markduplicates_picard {
                    input:
-                            name=name,
+                        name=normal,
                        ref=ref,
                        output_dir=output_dir,
-                            sorted_bam=bwa.sorted_bam,
+                        sorted_bam=bwa_normal_control.sorted_bam,
                }
            }
        }
    }
        }
    }
    output {
        String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam"
--- a/wdl/call_mutation.wdl
+++ b/wdl/call_mutation.wdl
@ -1,3 +1,4 @@
 # mutation
 task mutation_calling_umi {
    String name
@ -266,7 +267,7 @@ task mutation_calling_tissue_control {
        vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
        vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \
-        -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \
+        -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \AF[0] > 3*FORMAT/AF[1]
        -e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )'
        vcf_filter.py -i ${output_dir}/mutation/${name}.raw.snp_indel.vcf \
@ -718,6 +719,7 @@ workflow call_mutation {
                        cancer=cancer
                }
            }
            if (!umi) {
                call mutation_calling_tissue_control {
                    input:
--- a/wdl/chemo.wdl
+++ b/wdl/chemo.wdl
@ -1,3 +1,4 @@
 # chemo
 task run_chemo {
    String name
--- a/wdl/cnv.wdl
+++ b/wdl/cnv.wdl
@ -1,3 +1,4 @@
 # cnv
 task cnv_single {
    String name
--- a/wdl/fusion.wdl
+++ b/wdl/fusion.wdl
@ -1,3 +1,4 @@
 task rmdup_picard {
    String name
    String sorted_bam
--- a/wdl/hereditary.wdl
+++ b/wdl/hereditary.wdl
@ -1,3 +1,4 @@
 # hereditary
 task run_hereditary {
    String name
--- a/wdl/msi.wdl
+++ b/wdl/msi.wdl
@ -1,3 +1,4 @@
 # msi
 task msi_single {
    String name
@ -89,6 +90,3 @@ workflow call_msi {
        String msi_txt = "${output_dir}/msi/${tumor}.msi.txt"
    }
 }
--- a/wdl/pollution.wdl
+++ b/wdl/pollution.wdl
@ -1,3 +1,4 @@
 task run_pollution {
    String name
    String output_dir
--- a/wdl/postprocess.wdl
+++ b/wdl/postprocess.wdl
@ -1,9 +1,11 @@
 # postprocess
 task run_post {
    String?  mutation
    String? fusion
    String? cnv
    String? msi
    String? tmb
    String? hereditary
    String? chemo
    String? neoantigen
@ -38,6 +40,7 @@ workflow call_postprocess {
    String? fusion
    String? cnv
    String? msi
    String? tmb
    String? hereditary
    String? pollution
    String? chemo
@ -55,6 +58,7 @@ workflow call_postprocess {
                fusion=fusion,
                cnv=cnv,
                msi=msi,
                tmb=tmb,
                hereditary=hereditary,
                chemo=chemo,
                neoantigen=neoantigen,
--- a/wdl/qc.wdl
+++ b/wdl/qc.wdl
@ -1,3 +1,4 @@
 #qc
 task runqc {
    String name
--- a/wdl/statistics.wdl
+++ b/wdl/statistics.wdl
@ -1,3 +1,4 @@
 # statistics
 task run_statistics {
    String name
@ -17,7 +18,7 @@ task run_statistics {
        samtools stats --reference ${ref} -t ${bed} -@ 10 ${rmdupBam} > ${output_dir}/qc/${name}.rmdup.stat
        bamdst -p ${bed} -o ${output_dir}/qc/${name}_bamdst ${rmdupBam}
        qc_stat.py ${output_dir}/qc/${name}.json ${output_dir}/qc/${name}_bamdst/ ${output_dir}/qc/${name}_qc.txt
-        #
+
        #        InsertAndDepthStat.R \
        #        ${output_dir}/qc/${name}_InsertAndDepthStat \
        #        ${output_dir}/qc/${name}_bamdst/insertsize.plot \
--- a/wdl/tmb.wdl
+++ b/wdl/tmb.wdl
@ -1,3 +1,4 @@
 # tmb
 task run_tmb {
    String name