master
chaopower 2023-12-28 09:14:58 +08:00
parent b54741ee46
commit 47cbe45d8b
18 changed files with 122 additions and 121 deletions

View File

@ -13,6 +13,7 @@ java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf
## dot ## dot
```bash ```bash
womtool graph pipeline.wdl > other/pipeline.dot womtool graph pipeline.wdl > other/pipeline.dot
dot -Tpng .\pipeline.dot -o pipeline.png
``` ```
![流程图](./other/pipeline.svg) ![流程图](./other/pipeline.svg)

View File

@ -39,7 +39,7 @@ system {
# If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode, # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode,
# in particular clearing up all queued database writes before letting the JVM shut down. # in particular clearing up all queued database writes before letting the JVM shut down.
# The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details. # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details.
graceful-server-shutdown = false graceful-server-shutdown = true
# Cromwell will cap the number of running workflows at N # Cromwell will cap the number of running workflows at N
max-concurrent-workflows = 10000 max-concurrent-workflows = 10000
@ -95,7 +95,7 @@ system {
# These are the default values in Cromwell, in most circumstances there should not be a need to change them. # These are the default values in Cromwell, in most circumstances there should not be a need to change them.
# How frequently Cromwell should scan for aborts. # How frequently Cromwell should scan for aborts.
scan-frequency: 600 seconds scan-frequency: 30 seconds
# The cache of in-progress aborts. Cromwell will add entries to this cache once a WorkflowActor has been messaged to abort. # The cache of in-progress aborts. Cromwell will add entries to this cache once a WorkflowActor has been messaged to abort.
# If on the next scan an 'Aborting' status is found for a workflow that has an entry in this cache, Cromwell will not ask # If on the next scan an 'Aborting' status is found for a workflow that has an entry in this cache, Cromwell will not ask
@ -134,7 +134,7 @@ workflow-options {
default { default {
# When a workflow type is not provided on workflow submission, this specifies the default type. # When a workflow type is not provided on workflow submission, this specifies the default type.
#workflow-type: WDL workflow-type: WDL
# When a workflow type version is not provided on workflow submission, this specifies the default type version. # When a workflow type version is not provided on workflow submission, this specifies the default type version.
workflow-type-version: "draft-2" workflow-type-version: "draft-2"
@ -193,58 +193,6 @@ call-caching {
# } # }
} }
# Google configuration
google {
#application-name = "cromwell"
# Default: just application default
#auths = [
# Application default
#{
# name = "application-default"
# scheme = "application_default"
#},
# Use a static service account
#{
# name = "service-account"
# scheme = "service_account"
# Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
# PEM format:
# service-account-id = "my-service-account"
# pem-file = "/path/to/file.pem"
# JSON format:
# json-file = "/path/to/file.json"
#}
# Use service accounts provided through workflow options
#{
# name = "user-service-account"
# scheme = "user_service_account"
#}
#]
}
docker {
hash-lookup {
# Set this to match your available quota against the Google Container Engine API
#gcr-api-queries-per-100-seconds = 1000
# Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
#cache-entry-ttl = "20 minutes"
# Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
#cache-size = 200
# How should docker hashes be looked up. Possible values are "local" and "remote"
# "local": Lookup hashes on the local docker daemon using the cli
# "remote": Lookup hashes on docker hub, gcr, gar, quay
#method = "remote"
}
}
engine { engine {
# This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need. # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
# For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here. # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
@ -261,7 +209,7 @@ engine {
# project = "google-billing-project" # project = "google-billing-project"
# } # }
local { local {
#enabled: true enabled: true
} }
} }
} }
@ -373,7 +321,7 @@ backend {
# `script-epilogue` configures a shell command to run after the execution of every command block. # `script-epilogue` configures a shell command to run after the execution of every command block.
# #
# If this value is not set explicitly, the default value is `sync`, equivalent to: # If this value is not set explicitly, the default value is `sync`, equivalent to:
script-epilogue = "" # script-epilogue = ""
# #
# To turn off the default `sync` behavior set this value to an empty string: # To turn off the default `sync` behavior set this value to an empty string:
# script-epilogue = "" # script-epilogue = ""

View File

@ -136,13 +136,13 @@ while (<IN>) {
$hgvs =~ s/exon(\d+)/intron$intron;exon$exon/; $hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
$line[9] = join(":", ($gene, $hgvs)); $line[9] = join(":", ($gene, $hgvs));
} }
elsif ($gene eq "MET") {
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
$line[8] = 'skipping'
}
else { else {
push @reason, 'not_need_spl'; push @reason, 'not_need_spl';
} }
if ($gene eq "MET" ) {
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
$line[8] = 'skipping'
}
$protein = 'Truncating Mutations'; $protein = 'Truncating Mutations';
} }
else { else {
@ -175,7 +175,7 @@ while (<IN>) {
# tmb 流程去掉 不过滤但是修改 hgvs # tmb 流程去掉 不过滤但是修改 hgvs
if ($pipeline eq 'tmb') { if ($pipeline eq 'tmb') {
@reason = grep(!/synonymous|benign/, @reason); @reason = grep(!/synonymous/, @reason);
if (($freq < 0.05) and ($sample_type eq 't')) { if (($freq < 0.05) and ($sample_type eq 't')) {
push @reason, 'lowfreq_tissue_tmb'; push @reason, 'lowfreq_tissue_tmb';
} }

View File

@ -369,7 +369,7 @@ class PostProcess:
res = df.to_dict('records')[0] res = df.to_dict('records')[0]
msi_res['msi_count'] = res['Total_Number_of_Sites'] msi_res['msi_count'] = res['Total_Number_of_Sites']
msi_res['msi_value'] = res['%'] msi_res['msi_value'] = res['%']
if msi_res['msi_value'] >= 0.3: if msi_res['msi_value'] >= 30:
msi_res['msi_result'] = 'MSI-H' msi_res['msi_result'] = 'MSI-H'
msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感' msi_res['msi_predict'] = '对免疫检查点抑制剂可能敏感'
else: else:

View File

@ -94,9 +94,9 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
# f'{"-Dcall-caching.enabled=false " if uncache else ""}' # f'{"-Dcall-caching.enabled=false " if uncache else ""}'
# f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \ # f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \
cmd4 = f'/home/install/product/workflow/software/jdk-17.0.7+7/bin/java -DLOG_MODE=standard ' \ cmd4 = f'/usr/bin/java -DLOG_MODE=standard ' \
f'-Dconfig.file=$WORKFLOW/codes/cromwell.examples.conf ' \ f'-Dconfig.file=$WORKFLOW/codes/cromwell.examples.conf ' \
f'-jar $WORKFLOW/software/cromwell-86.jar run {wdl} --inputs {jsfile_path} ' f'-jar $WORKFLOW/software/cromwell-51.jar run {wdl} --inputs {jsfile_path} '
# cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}' # cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
cmd = f'{cmd3}; {cmd4}' cmd = f'{cmd3}; {cmd4}'

View File

@ -1,18 +1,18 @@
# pipeline
import "wdl/qc.wdl" import "wdl/qc.wdl" as qc
import "wdl/alignment.wdl" import "wdl/alignment.wdl" as alignment
import "wdl/call_mutation.wdl" import "wdl/call_mutation.wdl" as call_mutation
import "wdl/fusion.wdl" import "wdl/fusion.wdl" as fusion
import "wdl/statistics.wdl" import "wdl/statistics.wdl" as statistics
import "wdl/cnv.wdl" import "wdl/cnv.wdl" as cnv
import "wdl/msi.wdl" import "wdl/msi.wdl" as msi
import "wdl/chemo.wdl" import "wdl/chemo.wdl" as chemo
import "wdl/hereditary.wdl" import "wdl/hereditary.wdl" as hereditary
import "wdl/pollution.wdl" import "wdl/pollution.wdl" as pollution
import "wdl/tmb.wdl" import "wdl/tmb.wdl" as tmb
import "wdl/postprocess.wdl" import "wdl/postprocess.wdl" as postprocess
import "wdl/neoantigen.wdl" import "wdl/neoantigen.wdl" as neoantigen
workflow pipeline { workflow pipeline {
@ -199,6 +199,7 @@ workflow pipeline {
fusion=call_fusion.fusion_vcf, fusion=call_fusion.fusion_vcf,
cnv=call_cnv.cnv_filter, cnv=call_cnv.cnv_filter,
msi=call_msi.msi_txt, msi=call_msi.msi_txt,
tmb=call_tmb.tmb_txt,
hereditary=call_hereditary.hereditary_txt, hereditary=call_hereditary.hereditary_txt,
chemo=call_chemo.chemo_res, chemo=call_chemo.chemo_res,
neoantigen=call_neoantigen.neoantigen_txt, neoantigen=call_neoantigen.neoantigen_txt,
@ -209,4 +210,8 @@ workflow pipeline {
cancer=cancer, cancer=cancer,
project=project project=project
} }
output {
String result = "${output_dir}/report/${tumor}.merged_file.xlsx"
}
} }

View File

@ -115,50 +115,85 @@ workflow alignment {
String output_dir String output_dir
if (run) { if (run) {
scatter(name in [tumor, normal]) { # 单样本
if (defined(name)) { if (!defined(normal)) {
call bwa { call bwa as bwa_tumor {
input: input:
name=name, name=tumor,
ref=ref, ref=ref,
output_dir=output_dir, output_dir=output_dir,
read1=if name==tumor then tumor_r1 else normal_r1, read1=tumor_r1,
read2=if name==tumor then tumor_r2 else normal_r2 read2=tumor_r2
} }
if (name==tumor) {
if (umi) { if (umi) {
call markduplicates_genecore as tumor_markduplicates_genecore { call markduplicates_genecore as tumor_markduplicates_genecore {
input: input:
name=name, name=tumor,
ref=ref, ref=ref,
output_dir=output_dir, output_dir=output_dir,
sorted_bam=bwa.sorted_bam, sorted_bam=bwa_tumor.sorted_bam,
} }
} }
if (!umi) { if (!umi) {
call markduplicates_picard as tumor_markduplicates_picard { call markduplicates_picard as tumor_markduplicates_picard {
input: input:
name=name, name=tumor,
ref=ref, ref=ref,
output_dir=output_dir, output_dir=output_dir,
sorted_bam=bwa.sorted_bam, sorted_bam=bwa_tumor.sorted_bam,
} }
} }
} }
# 双样本
if (defined(normal)) {
call bwa as bwa_tumor_control {
input:
name=tumor,
ref=ref,
output_dir=output_dir,
read1=tumor_r1,
read2=tumor_r2
}
call bwa as bwa_normal_control {
input:
name=normal,
ref=ref,
output_dir=output_dir,
read1=normal_r1,
read2=normal_r2
}
call markduplicates_picard as tumor_markduplicates_picard_control {
input:
name=tumor,
ref=ref,
output_dir=output_dir,
sorted_bam=bwa_tumor_control.sorted_bam,
}
if (umi) {
call markduplicates_genecore as normal_markduplicates_genecore {
input:
name=normal,
ref=ref,
output_dir=output_dir,
sorted_bam=bwa_normal_control.sorted_bam,
}
}
if (name==select_first([normal, 'None'])) { if (!umi) {
call markduplicates_picard as normal_markduplicates_picard { call markduplicates_picard as normal_markduplicates_picard {
input: input:
name=name, name=normal,
ref=ref, ref=ref,
output_dir=output_dir, output_dir=output_dir,
sorted_bam=bwa.sorted_bam, sorted_bam=bwa_normal_control.sorted_bam,
}
} }
} }
} }
}
}
output { output {
String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam" String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam"

View File

@ -1,3 +1,4 @@
# mutation
task mutation_calling_umi { task mutation_calling_umi {
String name String name
@ -266,7 +267,7 @@ task mutation_calling_tissue_control {
vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t vcf_add_tag_msi.pl ${output_dir}/mutation/${name}.raw.snp_indel.vcf ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf ${probe} t
vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \ vcf_filter.py -i ${output_dir}/mutation/${name}.raw.addtagmsi.snp_indel.vcf \
-o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \ -o ${output_dir}/mutation/${name}.snp_indel.somatic.vcf \AF[0] > 3*FORMAT/AF[1]
-e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )' -e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )'
vcf_filter.py -i ${output_dir}/mutation/${name}.raw.snp_indel.vcf \ vcf_filter.py -i ${output_dir}/mutation/${name}.raw.snp_indel.vcf \
@ -718,6 +719,7 @@ workflow call_mutation {
cancer=cancer cancer=cancer
} }
} }
if (!umi) { if (!umi) {
call mutation_calling_tissue_control { call mutation_calling_tissue_control {
input: input:

View File

@ -1,3 +1,4 @@
# chemo
task run_chemo { task run_chemo {
String name String name

View File

@ -1,3 +1,4 @@
# cnv
task cnv_single { task cnv_single {
String name String name

View File

@ -1,3 +1,4 @@
task rmdup_picard { task rmdup_picard {
String name String name
String sorted_bam String sorted_bam

View File

@ -1,3 +1,4 @@
# hereditary
task run_hereditary { task run_hereditary {
String name String name

View File

@ -1,3 +1,4 @@
# msi
task msi_single { task msi_single {
String name String name
@ -89,6 +90,3 @@ workflow call_msi {
String msi_txt = "${output_dir}/msi/${tumor}.msi.txt" String msi_txt = "${output_dir}/msi/${tumor}.msi.txt"
} }
} }

View File

@ -1,3 +1,4 @@
task run_pollution { task run_pollution {
String name String name
String output_dir String output_dir

View File

@ -1,9 +1,11 @@
# postprocess
task run_post { task run_post {
String? mutation String? mutation
String? fusion String? fusion
String? cnv String? cnv
String? msi String? msi
String? tmb
String? hereditary String? hereditary
String? chemo String? chemo
String? neoantigen String? neoantigen
@ -38,6 +40,7 @@ workflow call_postprocess {
String? fusion String? fusion
String? cnv String? cnv
String? msi String? msi
String? tmb
String? hereditary String? hereditary
String? pollution String? pollution
String? chemo String? chemo
@ -55,6 +58,7 @@ workflow call_postprocess {
fusion=fusion, fusion=fusion,
cnv=cnv, cnv=cnv,
msi=msi, msi=msi,
tmb=tmb,
hereditary=hereditary, hereditary=hereditary,
chemo=chemo, chemo=chemo,
neoantigen=neoantigen, neoantigen=neoantigen,

View File

@ -1,3 +1,4 @@
#qc
task runqc { task runqc {
String name String name

View File

@ -1,3 +1,4 @@
# statistics
task run_statistics { task run_statistics {
String name String name
@ -17,7 +18,7 @@ task run_statistics {
samtools stats --reference ${ref} -t ${bed} -@ 10 ${rmdupBam} > ${output_dir}/qc/${name}.rmdup.stat samtools stats --reference ${ref} -t ${bed} -@ 10 ${rmdupBam} > ${output_dir}/qc/${name}.rmdup.stat
bamdst -p ${bed} -o ${output_dir}/qc/${name}_bamdst ${rmdupBam} bamdst -p ${bed} -o ${output_dir}/qc/${name}_bamdst ${rmdupBam}
qc_stat.py ${output_dir}/qc/${name}.json ${output_dir}/qc/${name}_bamdst/ ${output_dir}/qc/${name}_qc.txt qc_stat.py ${output_dir}/qc/${name}.json ${output_dir}/qc/${name}_bamdst/ ${output_dir}/qc/${name}_qc.txt
#
# InsertAndDepthStat.R \ # InsertAndDepthStat.R \
# ${output_dir}/qc/${name}_InsertAndDepthStat \ # ${output_dir}/qc/${name}_InsertAndDepthStat \
# ${output_dir}/qc/${name}_bamdst/insertsize.plot \ # ${output_dir}/qc/${name}_bamdst/insertsize.plot \

View File

@ -1,3 +1,4 @@
# tmb
task run_tmb { task run_tmb {
String name String name