From ac08a562d00e5faf9465fe7417b233b55182daa1 Mon Sep 17 00:00:00 2001
From: chaopower <chao.zhang@jmdna.com>
Date: Tue, 26 Dec 2023 10:18:15 +0800
Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 codes/cromwell.examples.conf | 771 +++++++++++++++++++++++++++++------
 codes/run_wdl.py             |   4 +-
 pipeline.wdl                 |  27 +-
 wdl/chemo.wdl                |   1 +
 wdl/hereditary.wdl           |   1 +
 wdl/msi.wdl                  |   1 +
 wdl/neoantigen.wdl           |   1 +
 wdl/pollution.wdl            |   5 +-
 wdl/postprocess.wdl          |   2 +
 9 files changed, 683 insertions(+), 130 deletions(-)

diff --git a/codes/cromwell.examples.conf b/codes/cromwell.examples.conf
index f40a815..8dd5d60 100755
--- a/codes/cromwell.examples.conf
+++ b/codes/cromwell.examples.conf
@@ -1,122 +1,120 @@
-# This is an example of how you can use the LocalExample backend to define
-# a new backend provider. *This is not a complete configuration file!* The
-# content here should be copy pasted into the backend -> providers section
-# of cromwell.example.backends/cromwell.examples.conf in the root of the repository.
-# You should uncomment lines that you want to define, and read carefully to customize
-# the file. If you have any questions, please open an issue at
-# https://www.github.com/broadinstitute/cromwell/issues
+# This is a "default" Cromwell example that is intended for you you to start with
+# and edit for your needs. Specifically, you will be interested to customize
+# the configuration based on your preferred backend (see the backends section
+# below in the file). For backend-specific examples for you to copy paste here,
+# please see the cromwell.backend.examples folder in the repository. The files
+# there also include links to online documentation (if it exists)
 
-# Documentation
-# https://cromwell.readthedocs.io/en/stable/backends/Local/
+# This line is required. It pulls in default overrides from the embedded cromwell
+# `reference.conf` (in core/src/main/resources) needed for proper performance of cromwell.
+include required(classpath("application"))
 
-# Define a new backend provider.
+# Cromwell HTTP server settings
+webservice {
+  #port = 8000
+  #interface = 0.0.0.0
+  #binding-timeout = 5s
+  #instance.name = "reference"
+}
 
-LocalExample {
-
-  # The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend.
-  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
-
-  # The backend custom configuration.
-  config {
-
-    # Optional limits on the number of concurrent jobs
-    #concurrent-job-limit = 5
-
-    # If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit
-    # the job and then immediately return a scheduled job id.
-    run-in-background = true
-
-    # `temporary-directory` creates the temporary directory for commands.
-    #
-    # If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to:
-    # temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)"
-    #
-    # The expression is run from the execution directory for the script. The expression must create the directory
-    # if it does not exist, and then return the full path to the directory.
-    #
-    # To create and return a non-random temporary directory, use something like:
-    # temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)"
-
-    # `script-epilogue` configures a shell command to run after the execution of every command block.
-    #
-    # If this value is not set explicitly, the default value is `sync`, equivalent to:
-    # script-epilogue = "sync"
-    #
-    # To turn off the default `sync` behavior set this value to an empty string:
-    # script-epilogue = ""
-
-    # `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
-    # If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
-    # glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
-
-    # The list of possible runtime custom attributes.
-    runtime-attributes = """
-        String? docker
-        String? docker_user
-        """
-
-    # Submit string when there is no "docker" runtime attribute.
-    submit = "/usr/bin/env bash ${script}"
-
-    # Submit string when there is a "docker" runtime attribute.
-    submit-docker = """
-        docker run \
-          --rm -i \
-          ${"--user " + docker_user} \
-          --entrypoint ${job_shell} \
-          -v ${cwd}:${docker_cwd} \
-          ${docker} ${script}
-        """
-
-    # Root directory where Cromwell writes job results.  This directory must be
-    # visible and writeable by the Cromwell process as well as the jobs that Cromwell
-    # launches.
-    root = "cromwell-executions"
-
-    # Root directory where Cromwell writes job results in the container. This value
-    # can be used to specify where the execution folder is mounted in the container.
-    # it is used for the construction of the docker_cwd string in the submit-docker
-    # value above.
-    dockerRoot = "/cromwell-executions"
-
-    # File system configuration.
-    filesystems {
-
-      # For SFS backends, the "local" configuration specifies how files are handled.
-      local {
-
-        # Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files.
-        localization: [
-          "hard-link", "soft-link", "copy"
-        ]
-
-        # Call caching strategies
-        caching {
-          # When copying a cached result, what type of file duplication should occur.
-          # For more information check: https://cromwell.readthedocs.io/en/stable/backends/HPC/#shared-filesystem
-          duplication-strategy: [
-            "hard-link", "soft-link", "copy"
-          ]
-
-          # Strategy to determine if a file has been used before.
-          # For extended explanation and alternative strategies check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching
-          hashing-strategy: "md5"
-
-          # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
-          # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
-          check-sibling-md5: false
-        }
-      }
-    }
-
-    # The defaults for runtime attributes if not provided.
-    default-runtime-attributes {
-      failOnStderr: false
-      continueOnReturnCode: 0
+akka {
+  # Optionally set / override any akka settings
+  http {
+    server {
+      # Increasing these timeouts allow rest api responses for very large jobs
+      # to be returned to the user. When the timeout is reached the server would respond
+      # `The server was not able to produce a timely response to your request.`
+      # https://gatkforums.broadinstitute.org/wdl/discussion/10209/retrieving-metadata-for-large-workflows
+      # request-timeout = 20s
+      # idle-timeout = 20s
     }
   }
 }
 
+# Cromwell "system" settings
+system {
+  # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting
+  abort-jobs-on-terminate = true
+
+  # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode,
+  # in particular clearing up all queued database writes before letting the JVM shut down.
+  # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details.
+  graceful-server-shutdown = true
+
+  # Cromwell will cap the number of running workflows at N
+  max-concurrent-workflows = 10000
+
+  # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist
+  max-workflow-launch-count = 100
+
+  # Number of seconds between workflow launches
+  #new-workflow-poll-rate = 20
+
+  # Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers
+  #number-of-workflow-log-copy-workers = 10
+
+  # Default number of cache read workers
+  #number-of-cache-read-workers = 25
+
+  io {
+    # throttle {
+    # # Global Throttling - This is mostly useful for GCS and can be adjusted to match
+    # # the quota availble on the GCS API
+    # #number-of-requests = 100000
+    # #per = 100 seconds
+    # }
+
+    # Number of times an I/O operation should be attempted before giving up and failing it.
+    number-of-attempts = 5
+  }
+
+  # Maximum number of input file bytes allowed in order to read each type.
+  # If exceeded a FileSizeTooBig exception will be thrown.
+  input-read-limits {
+
+    #lines = 128000
+
+    #bool = 7
+
+    #int = 19
+
+    #float = 50
+
+    #string = 128000
+
+    #json = 128000
+
+    #tsv = 128000
+
+    #map = 128000
+
+    #object = 128000
+  }
+
+  abort {
+    # These are the default values in Cromwell, in most circumstances there should not be a need to change them.
+
+    # How frequently Cromwell should scan for aborts.
+    scan-frequency: 30 seconds
+
+    # The cache of in-progress aborts. Cromwell will add entries to this cache once a WorkflowActor has been messaged to abort.
+    # If on the next scan an 'Aborting' status is found for a workflow that has an entry in this cache, Cromwell will not ask
+    # the associated WorkflowActor to abort again.
+    cache {
+      enabled: true
+      # Guava cache concurrency.
+      concurrency: 1
+      # How long entries in the cache should live from the time they are added to the cache.
+      ttl: 10 minutes
+      # Maximum number of entries in the cache.
+      size: 200000
+    }
+  }
+
+  # Cromwell reads this value into the JVM's `networkaddress.cache.ttl` setting to control DNS cache expiration
+  dns-cache-ttl: 3 minutes
+}
+
 workflow-options {
   # These workflow options will be encrypted when stored in the database
   #encrypted-fields: []
@@ -139,9 +137,554 @@ workflow-options {
     #workflow-type: WDL
 
     # When a workflow type version is not provided on workflow submission, this specifies the default type version.
-    #workflow-type-version: "draft-2"
+    workflow-type-version: "draft-2"
 
     # To set a default hog group rather than defaulting to workflow ID:
-    # hogGroup: "static"
+    hogGroup: "static"
   }
+}
+
+# Optional call-caching configuration.
+call-caching {
+  # Allows re-use of existing results for jobs you've already run
+  # (default: false)
+  enabled = false
+
+  # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
+  # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
+  # (default: true)
+  #invalidate-bad-cache-results = true
+
+  # The maximum number of times Cromwell will attempt to copy cache hits before giving up and running the job.
+  #max-failed-copy-attempts = 1000000
+
+  # blacklist-cache {
+  #   # The call caching blacklist cache is off by default. This cache is used to blacklist cache hits based on cache
+  #   # hit ids or buckets of cache hit paths that Cromwell has previously failed to copy for permissions reasons.
+  #   enabled: true
+  #
+  #   # A blacklist grouping can be specified in workflow options which will inform the blacklister which workflows
+  #   # should share a blacklist cache.
+  #   groupings {
+  #     workflow-option: call-cache-blacklist-group
+  #     concurrency: 10000
+  #     ttl: 2 hours
+  #     size: 1000
+  #   }
+  #
+  #   buckets {
+  #     # Guava cache concurrency.
+  #     concurrency: 10000
+  #     # How long entries in the cache should live from the time of their last access.
+  #     ttl: 20 minutes
+  #     # Maximum number of entries in the cache.
+  #     size: 1000
+  #   }
+  #
+  #   hits {
+  #     # Guava cache concurrency.
+  #     concurrency: 10000
+  #     # How long entries in the cache should live from the time of their last access.
+  #     ttl: 20 minutes
+  #     # Maximum number of entries in the cache.
+  #     size: 100000
+  #   }
+  #
+  # }
+}
+
+# Google configuration
+google {
+
+  #application-name = "cromwell"
+
+  # Default: just application default
+  #auths = [
+
+  # Application default
+  #{
+  #  name = "application-default"
+  #  scheme = "application_default"
+  #},
+
+  # Use a static service account
+  #{
+  #  name = "service-account"
+  #  scheme = "service_account"
+  #  Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
+  #  PEM format:
+  #  service-account-id = "my-service-account"
+  #  pem-file = "/path/to/file.pem"
+  #  JSON format:
+  #  json-file = "/path/to/file.json"
+  #}
+
+  # Use service accounts provided through workflow options
+  #{
+  #   name = "user-service-account"
+  #   scheme = "user_service_account"
+  #}
+  #]
+}
+
+docker {
+  hash-lookup {
+    # Set this to match your available quota against the Google Container Engine API
+    #gcr-api-queries-per-100-seconds = 1000
+
+    # Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
+    #cache-entry-ttl = "20 minutes"
+
+    # Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
+    #cache-size = 200
+
+    # How should docker hashes be looked up. Possible values are "local" and "remote"
+    # "local": Lookup hashes on the local docker daemon using the cli
+    # "remote": Lookup hashes on docker hub, gcr, gar, quay
+    #method = "remote"
+  }
+}
+
+engine {
+  # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
+  # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
+  # If you intend to be able to run workflows with this kind of declarations:
+  # workflow {
+  #    String str = read_string("gs://bucket/my-file.txt")
+  # }
+  # You will need to provide the engine with a gcs filesystem
+  # Note that the default filesystem (local) is always available.
+  filesystems {
+    #  gcs {
+    #    auth = "application-default"
+    #    # Google project which will be billed for the requests
+    #    project = "google-billing-project"
+    #  }
+    local {
+      #enabled: true
+    }
+  }
+}
+
+# You probably don't want to override the language factories here, but the strict-validation and enabled fields might be of interest:
+#
+# `enabled`: Defaults to `true`. Set to `false` to disallow workflows of this language/version from being run.
+# `strict-validation`: Specifies whether workflows fail if the inputs JSON (or YAML) file contains values which the workflow did not ask for (and will therefore have no effect).
+languages {
+  WDL {
+    versions {
+      "draft-2" {
+        language-factory = "languages.wdl.draft2.WdlDraft2LanguageFactory"
+        config {
+          #   strict-validation: true
+          enabled: true
+          caching {
+            #     # WDL Draft 2 namespace caching is off by default, this value must be set to true to enable it.
+            enabled: true
+            #     # Guava cache concurrency
+            concurrency: 4
+            #     # How long entries in the cache should live from the time of their last access.
+            ttl: 20 minutes
+            #     # Maximum number of entries in the cache (i.e. the number of workflow source + imports => namespace entries).
+            size: 10000
+          }
+        }
+      }
+      # draft-3 is the same as 1.0 so files should be able to be submitted to Cromwell as 1.0
+      # "draft-3" {
+      # language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory"
+      # config {
+      #   strict-validation: true
+      #   enabled: true
+      # }
+      # }
+      "1.0" {
+        # 1.0 is just a rename of draft-3, so yes, they really do use the same factory:
+        # language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory"
+        # config {
+        #   strict-validation: true
+        #   enabled: true
+        # }
+      }
+    }
+  }
+}
+
+# Here is where you can define the backend providers that Cromwell understands.
+# The default is a local provider.
+# To add additional backend providers, you should copy paste additional backends
+# of interest that you can find in the cromwell.example.backends folder
+# folder at https://www.github.com/broadinstitute/cromwell
+# Other backend providers include SGE, SLURM, Docker, udocker, Singularity. etc.
+# Don't forget you will need to customize them for your particular use case.
+backend {
+  # Override the default backend.
+  default = "LocalExample"
+
+  # The list of providers.
+  providers {
+    # Copy paste the contents of a backend provider in this section
+    # Examples in cromwell.example.backends include:
+    # LocalExample: What you should use if you want to define a new backend provider
+    # AWS: Amazon Web Services
+    # TES: protocol defined by GA4GH
+    # TESK: the same, with kubernetes support
+    # Google Pipelines, v2 (PAPIv2)
+    # Docker
+    # Singularity: a container safe for HPC
+    # Singularity+Slurm: and an example on Slurm
+    # udocker: another rootless container solution
+    # udocker+slurm: also exemplified on slurm
+    # HtCondor: workload manager at UW-Madison
+    # LSF: the Platform Load Sharing Facility backend
+    # SGE: Sun Grid Engine
+    # SLURM: workload manager
+
+    # Note that these other backend examples will need tweaking and configuration.
+    # Please open an issue https://www.github.com/broadinstitute/cromwell if you have any questions
+
+    # The local provider is included by default. This is an example.
+    # Define a new backend provider.
+    LocalExample {
+      # The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend.
+      actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
+
+      # The backend custom configuration.
+      config {
+
+        # Optional limits on the number of concurrent jobs
+        #concurrent-job-limit = 5
+
+        # If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit
+        # the job and then immediately return a scheduled job id.
+        run-in-background = true
+
+        # `temporary-directory` creates the temporary directory for commands.
+        #
+        # If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to:
+        # temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)"
+        #
+        # The expression is run from the execution directory for the script. The expression must create the directory
+        # if it does not exist, and then return the full path to the directory.
+        #
+        # To create and return a non-random temporary directory, use something like:
+        # temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)"
+
+        # `script-epilogue` configures a shell command to run after the execution of every command block.
+        #
+        # If this value is not set explicitly, the default value is `sync`, equivalent to:
+        script-epilogue = ""
+        #
+        # To turn off the default `sync` behavior set this value to an empty string:
+        # script-epilogue = ""
+
+        # `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
+        # If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
+        # glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
+
+        # The list of possible runtime custom attributes.
+        runtime-attributes = """
+        String? docker
+        String? docker_user
+        """
+
+        # Submit string when there is no "docker" runtime attribute.
+        submit = "/usr/bin/env bash ${script}"
+
+        # Submit string when there is a "docker" runtime attribute.
+        submit-docker = """
+        docker run \
+          --rm -i \
+          ${"--user " + docker_user} \
+          --entrypoint ${job_shell} \
+          -v ${cwd}:${docker_cwd} \
+          ${docker} ${docker_script}
+        """
+
+        # Root directory where Cromwell writes job results.  This directory must be
+        # visible and writeable by the Cromwell process as well as the jobs that Cromwell
+        # launches.
+        root = "cromwell-executions"
+
+        # Root directory where Cromwell writes job results in the container. This value
+        # can be used to specify where the execution folder is mounted in the container.
+        # it is used for the construction of the docker_cwd string in the submit-docker
+        # value above.
+        dockerRoot = "/cromwell-executions"
+
+        # File system configuration.
+        filesystems {
+
+          # For SFS backends, the "local" configuration specifies how files are handled.
+          local {
+
+            # Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files.
+            localization: [
+              "hard-link", "soft-link", "copy"
+            ]
+            # An experimental localization strategy called "cached-copy" is also available for SFS backends.
+            # This will copy a file to a cache and then hard-link from the cache. It will copy the file to the cache again
+            # when the maximum number of hardlinks for a file is reached. The maximum number of hardlinks can be set with:
+            # max-hardlinks: 950
+
+            # Call caching strategies
+            caching {
+              # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below:
+              duplication-strategy: [
+                "hard-link", "soft-link", "copy"
+              ]
+
+              # Possible values: file, path, path+modtime
+              # "file" will compute an md5 hash of the file content.
+              # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
+              # in order to allow for the original file path to be hashed.
+              # "path+modtime" will compute an md5 hash of the file path and the last modified time. The same conditions as for "path" apply here.
+              # Default: file
+              hashing-strategy: "file"
+
+              # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
+              # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
+              check-sibling-md5: false
+            }
+          }
+        }
+
+        # The defaults for runtime attributes if not provided.
+        default-runtime-attributes {
+          failOnStderr: false
+          continueOnReturnCode: 0
+        }
+      }
+    }
+  }
+}
+
+services {
+  MetadataService {
+
+    # This class is the "default" database backed implementation:
+    # class = "cromwell.services.metadata.impl.MetadataServiceActor"
+    # config {
+    #   # For the standard MetadataService implementation, cromwell.services.metadata.impl.MetadataServiceActor:
+    #   #   Set this value to "Inf" to turn off metadata summary refresh.  The default value is currently "1 second".
+    #   metadata-summary-refresh-interval = "1 second"
+    #
+    #   #   Set this value to the maximum number of metadata rows to be considered per summarization cycle.
+    #   metadata-summary-refresh-limit = 5000
+    #
+    #   #   For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events
+    #   #   can improved by writing to the database in batches. Increasing this value can dramatically improve overall
+    #   #   performance but will both lead to a higher memory usage as well as increase the risk that metadata events
+    #   #   might not have been persisted in the event of a Cromwell crash.
+    #   #
+    #   #   For normal usage the default value of 200 should be fine but for larger/production environments we recommend a
+    #   #   value of at least 500. There'll be no one size fits all number here so we recommend benchmarking performance and
+    #   #   tuning the value to match your environment.
+    #   db-batch-size = 200
+    #
+    #   #   Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size
+    #   #   has been reached. This is to prevent situations where events wind up never being written to an incomplete batch
+    #   #   with no new events being generated. The default value is currently 5 seconds
+    #   db-flush-rate = 5 seconds
+    #
+    #   # Kill metadata SQL queries that run so long that the associated request will likely already have timed out.
+    #   # The intention is to return resources to the system within a reasonable timeframe to avoid OOM incidents.
+    #   # See also `akka.http.server.request-timeout`.
+    #   metadata-read-query-timeout = "Inf"
+    #
+    #   # Limit the number of rows from METADATA_ENTRY that will be fetched to produce metadata responses.
+    #   # This limit takes into account the effects of `includeKey`, `excludeKey` and `includeSubworkflows`
+    #   # request parameters; only the rows required to be retrieved from the database to compose the response
+    #   # count against this limit.
+    #   metadata-read-row-number-safety-threshold = 1000000
+    #
+    #  metadata-write-statistics {
+    #    # Not strictly necessary since the 'metadata-write-statistics' section itself is enough for statistics to be recorded.
+    #    # However, this can be set to 'false' to disable statistics collection without deleting the section.
+    #    enabled: true
+    #
+    #    # How many workflows to maintain statistics for concurrently. At ~4x "max-concurrent-workflows", this would be
+    #    # *relatively* resilient to large scatters of subworkflows without risking an uncapped expansion in memory usage.
+    #    # Note that cache entries expire after 4h of not being accessed, regardless of whether this limit is reached or not.
+    #    cache-size = 20000
+    #
+    #    # How many metadata rows to alert at each increment of. At 100k, there will be alert messages every time a
+    #    # workflow publishes another 100k rows of metadata.
+    #    metadata-row-alert-interval = 100000
+    #
+    #    # Whether to include subworkflow rows in both individual counts and also include them in their parent counts
+    #    # (and their parent's parent, and so on up to the root)
+    #    sub-workflow-bundling = true
+    #  }
+    # }
+
+    # Alternative 1: Pub sub implementation:
+    # class = "cromwell.services.metadata.impl.MetadataServiceActor"
+    # config {
+    #   # For the Google PubSub MetadataService implementation: cromwell.services.metadata.impl.pubsub.PubSubMetadataServiceActor:
+    #   #   Google project
+    #   project = "my-project"
+    #   #   The auth *must* be a service-account auth with JSON auth.
+    #   auth = "service-account"
+    #   #   The PubSub topic to write to. Will be created if it doesn't already exist. Defaults to "cromwell-metadata"
+    #   topic = "cromwell-metadata"
+    #   #   An optional PubSub subscription name. If supplied and if it doesn't already exist, it will be created and
+    #   #   subscribed to the topic
+    #   #   subscription = "optional-subscription"
+    #   #   An application name to set on your PubSub interaction.
+    #   appName = "cromwell"
+    # }
+  }
+
+  Instrumentation {
+    # StatsD - Send metrics to a StatsD server over UDP
+    # class = "cromwell.services.instrumentation.impl.statsd.StatsDInstrumentationServiceActor"
+    # config {
+    #   hostname = "localhost"
+    #   port = 8125
+    #   prefix = "" # can be used to prefix all metrics with an api key for example
+    #   flush-rate = 1 second # rate at which aggregated metrics will be sent to statsd
+    # }
+
+    # Stackdriver - Send metrics to Google's monitoring API
+    # class = "cromwell.services.instrumentation.impl.stackdriver.StackdriverInstrumentationServiceActor"
+    # config {
+    #   # auth scheme can be `application_default` or `service_account`
+    #   auth = "service-account"
+    #   google-project = "my-project"
+    #   # rate at which aggregated metrics will be sent to Stackdriver API, must be 1 minute or more.
+    #   flush-rate = 1 minute
+    #   # below 3 keys are attached as labels to each metric. `cromwell-perf-test-case` is specifically meant for perf env.
+    #   cromwell-instance-identifier = "cromwell-101"
+    #   cromwell-instance-role = "role"
+    #   cromwell-perf-test-case = "perf-test-1"
+    # }
+  }
+  HealthMonitor {
+    config {
+
+      #####
+      # Choose what to monitor:
+      #####
+
+      # If you want to check the availability of the PAPI or PAPIv2 services, list them here.
+      # If provided, all values here *MUST* be valid PAPI or PAPIv2 backend names in the Backends stanza.
+      # NB: requires 'google-auth-name' to be set
+      # check-papi-backends: [ PAPIv2 ]
+
+      # If you want to check connection to GCS (NB: requires 'google-auth-name' and 'gcs-bucket-to-check' to be set):
+      # check-gcs: true
+
+      # If you want to check database connectivity:
+      # check-engine-database: true
+
+      # If you want to check dockerhub availability:
+      # check-dockerhub: true
+
+      #####
+      # General health monitor configuration:
+      #####
+
+      # How long to wait between status check sweeps
+      # check-refresh-time = 5 minutes
+
+      # For any given status check, how long to wait before assuming failure
+      # check-timeout = 1 minute
+
+      # For any given status datum, the maximum time a value will be kept before reverting back to "Unknown"
+      # status-ttl = 15 minutes
+
+      # For any given status check, how many times to retry a failure before setting status to failed. Note this
+      # is the number of retries before declaring failure, not the total number of tries which is 1 more than
+      # the number of retries.
+      # check-failure-retry-count = 3
+
+      # For any given status check, how long to wait between failure retries.
+      # check-failure-retry-interval = 30 seconds
+
+      #####
+      # GCS- and PAPI-specific configuration options:
+      #####
+
+      # The name of an authentication scheme to use for e.g. pinging PAPI and GCS. This should be either an application
+      # default or service account auth, otherwise things won't work as there'll not be a refresh token where you need
+      # them.
+      # google-auth-name = application-default
+
+      # A *bucket* in GCS to periodically stat to check for connectivity. This must be accessible by the auth mode
+      # specified by google-auth-name
+      # NB: This is a *bucket name*, not a URL and not an *object*. With 'some-bucket-name', Cromwell would ping gs://some-bucket-name
+      # gcs-bucket-to-check = some-bucket-name
+    }
+  }
+  LoadController {
+    config {
+      # The load controller service will periodically look at the status of various metrics its collecting and make an
+      # assessment of the system's load. If necessary an alert will be sent to the rest of the system.
+      # This option sets how frequently this should happen
+      # To disable load control, set this to "Inf"
+      # control-frequency = 5 seconds
+    }
+  }
+}
+
+database {
+  # mysql example
+  #driver = "slick.driver.MySQLDriver$"
+
+  # see all possible parameters and default values here:
+  # http://slick.lightbend.com/doc/3.2.0/api/index.html#slick.jdbc.JdbcBackend$DatabaseFactoryDef@forConfig(String,Config,Driver):Database
+  #db {
+  #  driver = "com.mysql.jdbc.Driver"
+  #  url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true"
+  #  user = "user"
+  #  password = "pass"
+  #  connectionTimeout = 5000
+  #}
+
+  # For batch inserts the number of inserts to send to the DB at a time
+  # insert-batch-size = 2000
+
+  migration {
+    # For databases with a very large number of symbols, selecting all the rows at once can generate a variety of
+    # problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be
+    # retrieved and processed at a time, before asking for the next chunk.
+    #read-batch-size = 100000
+
+    # Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single
+    # symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out
+    # of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize.
+    #write-batch-size = 100000
+  }
+
+  # To customize the metadata database connection, create a block under `database` with the metadata database settings.
+  #
+  # For example, the default database stores all data in memory. This commented out block would store `metadata` in an
+  # hsqldb file, without modifying the internal engine database connection.
+  #
+  # The value `${uniqueSchema}` is always replaced with a unqiue UUID on each cromwell startup.
+  #
+  # This feature should be considered experimental and likely to change in the future.
+
+  #metadata {
+  #  profile = "slick.jdbc.HsqldbProfile$"
+  #  db {
+  #    driver = "org.hsqldb.jdbcDriver"
+  #    url = "jdbc:hsqldb:file:metadata-${uniqueSchema};shutdown=false;hsqldb.tx=mvcc"
+  #    connectionTimeout = 3000
+  #  }
+  #}
+
+  # Postgresql example
+  #database {
+  #  profile = "slick.jdbc.PostgresProfile$"
+  #  db {
+  #    driver = "org.postgresql.Driver"
+  #    url = "jdbc:postgresql://localhost:5432/cromwell"
+  #    user = ""
+  #    password = ""
+  #    port = 5432
+  #    connectionTimeout = 5000
+  #  }
+  #}
 }
\ No newline at end of file
diff --git a/codes/run_wdl.py b/codes/run_wdl.py
index 6289f4a..214f6a5 100755
--- a/codes/run_wdl.py
+++ b/codes/run_wdl.py
@@ -94,7 +94,9 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
     # f'{"-Dcall-caching.enabled=false " if uncache else ""}'
     # f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \
 
-    cmd4 = f'/usr/bin/java -jar $WORKFLOW/software/cromwell-51.jar run --inputs {jsfile_path} {wdl}'
+    cmd4 = f'/home/install/product/workflow/software/jdk-17.0.7+7/bin/java ' \
+           f'-Dconfig.file=$WORKFLOW/codes/cromwell.examples.conf ' \
+           f'-jar $WORKFLOW/software/cromwell-86.jar run {wdl} --inputs {jsfile_path} --metadata-output run.log'
     # cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
     cmd = f'{cmd3}; {cmd4}'
 
diff --git a/pipeline.wdl b/pipeline.wdl
index 7f92fdc..b341c7d 100644
--- a/pipeline.wdl
+++ b/pipeline.wdl
@@ -1,17 +1,18 @@
 
-import "./wdl/qc.wdl"
-import "./wdl/alignment.wdl"
-import "./wdl/call_mutation.wdl"
-import "./wdl/fusion.wdl"
-import "./wdl/statistics.wdl"
-import "./wdl/cnv.wdl"
-import "./wdl/msi.wdl"
-import "./wdl/chemo.wdl"
-import "./wdl/hereditary.wdl"
-import "./wdl/pollution.wdl"
-import "./wdl/tmb.wdl"
-import "./wdl/postprocess.wdl"
-import "./wdl/neoantigen.wdl"
+import "wdl/qc.wdl"
+import "wdl/alignment.wdl"
+import "wdl/call_mutation.wdl"
+import "wdl/fusion.wdl"
+import "wdl/statistics.wdl"
+import "wdl/cnv.wdl"
+import "wdl/msi.wdl"
+import "wdl/chemo.wdl"
+import "wdl/hereditary.wdl"
+import "wdl/pollution.wdl"
+import "wdl/tmb.wdl"
+import "wdl/postprocess.wdl"
+import "wdl/neoantigen.wdl"
+
 
 workflow pipeline {
 
diff --git a/wdl/chemo.wdl b/wdl/chemo.wdl
index d5598b9..bba4d73 100755
--- a/wdl/chemo.wdl
+++ b/wdl/chemo.wdl
@@ -13,6 +13,7 @@ task run_chemo {
         fi
 
         chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
+
     >>>
 
     output {
diff --git a/wdl/hereditary.wdl b/wdl/hereditary.wdl
index fdc0926..596f7df 100755
--- a/wdl/hereditary.wdl
+++ b/wdl/hereditary.wdl
@@ -11,6 +11,7 @@ task run_hereditary {
         fi
 
         hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
+
     >>>
 
     output {
diff --git a/wdl/msi.wdl b/wdl/msi.wdl
index 3b31417..53a76d7 100755
--- a/wdl/msi.wdl
+++ b/wdl/msi.wdl
@@ -42,6 +42,7 @@ task msi_paired {
         -e ${bed} \
         -b 10 \
         -o ${output_dir}/msi/${name}.msi.txt
+
     >>>
 
     output {
diff --git a/wdl/neoantigen.wdl b/wdl/neoantigen.wdl
index d8f01ef..a283fe0 100755
--- a/wdl/neoantigen.wdl
+++ b/wdl/neoantigen.wdl
@@ -147,6 +147,7 @@ task run_neoantigen {
 
         dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
         netchop.pl ${output_dir} ${tumor}
+
     >>>
 
 }
diff --git a/wdl/pollution.wdl b/wdl/pollution.wdl
index a52bd8f..737cf8f 100755
--- a/wdl/pollution.wdl
+++ b/wdl/pollution.wdl
@@ -18,6 +18,7 @@ task run_pollution {
         -p ${probe} \
         -b $PUBLIC/pollution/${probe}_contaminate_ref.bed \
         -c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
+
     >>>
 
     output {
@@ -39,7 +40,7 @@ workflow call_pollution {
 
     if (run) {
         if (defined(normal)) {
-            call run_pollution as  run_pollution_paired {
+            call run_pollution as run_pollution_paired {
                 input:
                     name=tumor,
                     output_dir=output_dir,
@@ -50,7 +51,7 @@ workflow call_pollution {
         }
 
         if (!defined(normal)) {
-            call run_pollution as  run_pollution_single {
+            call run_pollution as run_pollution_single {
                 input:
                     name=tumor,
                     output_dir=output_dir,
diff --git a/wdl/postprocess.wdl b/wdl/postprocess.wdl
index c19816e..0b43dfc 100755
--- a/wdl/postprocess.wdl
+++ b/wdl/postprocess.wdl
@@ -13,6 +13,7 @@ task run_post {
     String output_dir
     String cancer
     String project
+
     command <<<
 
         if [ ! -d ${output_dir}/report ];then
@@ -21,6 +22,7 @@ task run_post {
         indication.pl ${output_dir} ${cancer} ${project}
         sample_post.py -s ${name} -o ${output_dir}
         postprocess.py -n ${name} -s ${normal} -c ${output_dir} -o ${output_dir}/report/${name}.merged_file.xlsx
+
     >>>
 
     output {