From b82438461c30e2fa297c5484b62ba35683d6d742 Mon Sep 17 00:00:00 2001 From: chaopower Date: Tue, 2 Jan 2024 02:01:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- codes/cromwell.examples.conf | 87 ++++----- codes/filter_snpindel.pl | 4 +- codes/public/msi/624_650_160_intersect_74.bed | 74 ++++++++ .../msi/624_650_intersect_depth50_177.bed | 177 ++++++++++++++++++ codes/run_wdl.py | 55 +++++- pipeline.wdl | 3 +- wdl/msi.wdl | 88 ++++----- wdl/postprocess.wdl | 1 - 8 files changed, 386 insertions(+), 103 deletions(-) create mode 100644 codes/public/msi/624_650_160_intersect_74.bed create mode 100644 codes/public/msi/624_650_intersect_depth50_177.bed diff --git a/codes/cromwell.examples.conf b/codes/cromwell.examples.conf index e164a50..74ea2e2 100755 --- a/codes/cromwell.examples.conf +++ b/codes/cromwell.examples.conf @@ -56,6 +56,7 @@ system { # Default number of cache read workers number-of-cache-read-workers = 25 + io { # throttle { # # Global Throttling - This is mostly useful for GCS and can be adjusted to match @@ -158,19 +159,19 @@ call-caching { # The maximum number of times Cromwell will attempt to copy cache hits before giving up and running the job. #max-failed-copy-attempts = 1000000 - blacklist-cache { - # # The call caching blacklist cache is off by default. This cache is used to blacklist cache hits based on cache - # # hit ids or buckets of cache hit paths that Cromwell has previously failed to copy for permissions reasons. - enabled: false - # - # # A blacklist grouping can be specified in workflow options which will inform the blacklister which workflows - # # should share a blacklist cache. - # groupings { - # workflow-option: call-cache-blacklist-group - # concurrency: 10000 - # ttl: 2 hours - # size: 1000 - } + blacklist-cache { + # # The call caching blacklist cache is off by default. This cache is used to blacklist cache hits based on cache + # # hit ids or buckets of cache hit paths that Cromwell has previously failed to copy for permissions reasons. + enabled: false + # + # # A blacklist grouping can be specified in workflow options which will inform the blacklister which workflows + # # should share a blacklist cache. + # groupings { + # workflow-option: call-cache-blacklist-group + # concurrency: 10000 + # ttl: 2 hours + # size: 1000 + } # # buckets { # # Guava cache concurrency. @@ -201,29 +202,29 @@ google { # Default: just application default #auths = [ - # Application default - #{ - # name = "application-default" - # scheme = "application_default" - #}, + # Application default + #{ + # name = "application-default" + # scheme = "application_default" + #}, - # Use a static service account - #{ - # name = "service-account" - # scheme = "service_account" - # Choose between PEM file and JSON file as a credential format. They're mutually exclusive. - # PEM format: - # service-account-id = "my-service-account" - # pem-file = "/path/to/file.pem" - # JSON format: - # json-file = "/path/to/file.json" - #} + # Use a static service account + #{ + # name = "service-account" + # scheme = "service_account" + # Choose between PEM file and JSON file as a credential format. They're mutually exclusive. + # PEM format: + # service-account-id = "my-service-account" + # pem-file = "/path/to/file.pem" + # JSON format: + # json-file = "/path/to/file.json" + #} - # Use service accounts provided through workflow options - #{ - # name = "user-service-account" - # scheme = "user_service_account" - #} + # Use service accounts provided through workflow options + #{ + # name = "user-service-account" + # scheme = "user_service_account" + #} #] } @@ -292,11 +293,11 @@ languages { } # draft-3 is the same as 1.0 so files should be able to be submitted to Cromwell as 1.0 # "draft-3" { - # language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory" - # config { - # strict-validation: true - # enabled: true - # } + # language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory" + # config { + # strict-validation: true + # enabled: true + # } # } "1.0" { # 1.0 is just a rename of draft-3, so yes, they really do use the same factory: @@ -378,11 +379,11 @@ backend { # To turn off the default `sync` behavior set this value to an empty string: # script-epilogue = "" - # `glob-link-command` specifies command used to link glob outputs, by default using hard-links. - # If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows: - # glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY" + # `glob-link-command` specifies command used to link glob outputs, by default using hard-links. + # If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows: + # glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY" - # The list of possible runtime custom attributes. + # The list of possible runtime custom attributes. runtime-attributes = """ String? docker String? docker_user diff --git a/codes/filter_snpindel.pl b/codes/filter_snpindel.pl index 0f50d25..445910f 100755 --- a/codes/filter_snpindel.pl +++ b/codes/filter_snpindel.pl @@ -8,11 +8,11 @@ die "useage:perl $0 input out tag_out project sample_type pipeline" unless @ARGV my ($input, $out, $tag_out, $project, $sample_type, $pipeline) = @ARGV; my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public"; -print "SnpIndel过滤使用public路径:$public_path\n"; +print "$pipeline 过滤使用public路径:$public_path\n"; my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase"; -print "SnpIndel过滤使用database路径:$database_path\n"; +print "$pipeline 过滤使用database路径:$database_path\n"; # open OUT, ">$**.hg19_multianno_filter.txt"; open OUT, "> $out"; diff --git a/codes/public/msi/624_650_160_intersect_74.bed b/codes/public/msi/624_650_160_intersect_74.bed new file mode 100644 index 0000000..c4b49c2 --- /dev/null +++ b/codes/public/msi/624_650_160_intersect_74.bed @@ -0,0 +1,74 @@ +chr1 161293492 161293503 +chr1 161309335 161309346 +chr1 161332091 161332105 +chr1 204495595 204495609 +chr2 29449344 29449368 +chr2 47600581 47600591 +chr2 47635523 47635536 +chr2 47641559 47641586 +chr2 47702451 47702470 +chr2 48032740 48032753 +chr2 48033890 48033908 +chr2 58390218 58390230 +chr2 58453843 58453855 +chr2 209101905 209101915 +chr2 212495336 212495347 +chr2 212578379 212578393 +chr2 215657182 215657198 +chr3 37067099 37067120 +chr3 142231062 142231080 +chr3 142241692 142241704 +chr3 142259706 142259720 +chr3 142274739 142274749 +chr4 25666785 25666805 +chr4 55598211 55598236 +chr4 55976947 55976960 +chr4 153268227 153268241 +chr5 112111309 112111322 +chr5 131924363 131924373 +chr5 131927550 131927560 +chr6 32790099 32790109 +chr6 117642992 117643012 +chr7 6037057 6037074 +chr7 116409675 116409690 +chr7 140434574 140434585 +chr7 140434586 140434596 +chr9 5073681 5073691 +chr9 80343587 80343601 +chr9 135773000 135773018 +chr10 8115668 8115686 +chr10 43595836 43595850 +chr10 89720633 89720648 +chr11 108114661 108114676 +chr11 108119615 108119629 +chr11 108121410 108121425 +chr11 108141955 108141970 +chr11 108151697 108151707 +chr11 108188266 108188279 +chr12 69214081 69214093 +chr12 133237753 133237767 +chr13 28609813 28609823 +chr13 32893197 32893207 +chr13 32907535 32907546 +chr13 48954159 48954172 +chr13 48954282 48954293 +chr13 48955363 48955373 +chr13 49039094 49039118 +chr14 68944343 68944357 +chr14 69006883 69006908 +chr14 69149592 69149604 +chr14 69149627 69149647 +chr17 7572154 7572172 +chr17 7578711 7578729 +chr17 29482977 29482987 +chr17 29508819 29508835 +chr17 29528406 29528416 +chr17 29559061 29559087 +chr17 29663614 29663625 +chr17 29663625 29663636 +chr17 37855713 37855727 +chr17 59857599 59857610 +chr20 43962585 43962595 +chr20 43962640 43962652 +chr20 43963436 43963446 +chr19 50911947 50911959 diff --git a/codes/public/msi/624_650_intersect_depth50_177.bed b/codes/public/msi/624_650_intersect_depth50_177.bed new file mode 100644 index 0000000..9cbac98 --- /dev/null +++ b/codes/public/msi/624_650_intersect_depth50_177.bed @@ -0,0 +1,177 @@ +chr1 16200729 16200739 +chr1 16248728 16248739 +chr1 78414310 78414328 +chr1 78432647 78432658 +chr1 78432658 78432668 +chr1 78435701 78435713 +chr1 161293492 161293503 +chr1 161309335 161309346 +chr1 161332091 161332105 +chr1 204495595 204495609 +chr1 243736210 243736225 +chr2 29449344 29449368 +chr2 39536689 39536716 +chr2 47600581 47600591 +chr2 47635523 47635536 +chr2 47641559 47641586 +chr2 47702451 47702470 +chr2 48032740 48032753 +chr2 48033890 48033908 +chr2 58390218 58390230 +chr2 58453843 58453855 +chr2 61143993 61144003 +chr2 61145498 61145511 +chr2 61726050 61726061 +chr2 95849361 95849384 +chr2 136873827 136873837 +chr2 141625834 141625853 +chr2 173435578 173435592 +chr2 190717517 190717527 +chr2 198257673 198257683 +chr2 198267243 198267256 +chr2 209101905 209101915 +chr2 212495336 212495347 +chr2 212578379 212578393 +chr2 215657182 215657198 +chr2 225422600 225422622 +chr3 30691871 30691881 +chr3 47103861 47103875 +chr3 52696310 52696321 +chr3 69988437 69988451 +chr3 89448876 89448888 +chr3 138400782 138400795 +chr3 142231062 142231080 +chr3 142241692 142241704 +chr3 142259706 142259720 +chr3 142274739 142274749 +chr3 156413615 156413632 +chr3 169988364 169988384 +chr3 169992975 169992993 +chr3 185009691 185009711 +chr3 185010850 185010860 +chr3 185080912 185080922 +chr3 185155430 185155441 +chr3 191888452 191888465 +chr4 1919852 1919862 +chr4 25666785 25666805 +chr4 55135777 55135789 +chr4 55135811 55135827 +chr4 55598211 55598236 +chr4 55976947 55976960 +chr4 106162144 106162154 +chr4 142950186 142950196 +chr4 153268227 153268241 +chr4 185340725 185340745 +chr5 38959431 38959441 +chr5 38978758 38978769 +chr5 67576330 67576345 +chr5 67584512 67584524 +chr5 112111309 112111322 +chr5 131924363 131924373 +chr5 131927550 131927560 +chr5 170818290 170818300 +chr5 170837513 170837526 +chr6 32790099 32790109 +chr6 106534484 106534496 +chr6 117642992 117643012 +chr6 134494708 134494721 +chr6 138192324 138192335 +chr6 162683814 162683835 +chr6 163899794 163899806 +chr6 163991476 163991492 +chr6 163991598 163991608 +chr7 6037057 6037074 +chr7 116409675 116409690 +chr7 116414203 116414214 +chr7 140421095 140421107 +chr7 140425499 140425511 +chr7 140482263 140482279 +chr7 140484450 140484462 +chr7 140487100 140487124 +chr7 140489019 140489029 +chr7 140496148 140496164 +chr7 140498359 140498380 +chr7 148543693 148543704 +chrX 39930433 39930446 +chrX 44920541 44920551 +chrX 44935924 44935937 +chrX 44949951 44949962 +chrX 123184949 123184968 +chrX 123204978 123204992 +chr8 38138963 38138974 +chr8 38175279 38175290 +chr8 38316067 38316077 +chr8 38321488 38321499 +chr8 38321830 38321853 +chr8 117864952 117864966 +chr8 117868531 117868554 +chr8 141754888 141754904 +chr9 5073681 5073691 +chr9 80343587 80343601 +chr9 87357642 87357652 +chr9 87430433 87430449 +chr9 87487518 87487529 +chr9 135773000 135773018 +chr10 8115668 8115686 +chr10 43595836 43595850 +chr10 89720633 89720648 +chr11 94212930 94212941 +chr11 102193508 102193534 +chr11 108114661 108114676 +chr11 108119615 108119629 +chr11 108121410 108121425 +chr11 108141955 108141970 +chr11 108151697 108151707 +chr11 108188266 108188279 +chr11 118369265 118369280 +chr11 119144791 119144808 +chr11 125490765 125490786 +chr12 463405 463415 +chr12 69214081 69214093 +chr12 112893675 112893692 +chr12 133237753 133237767 +chr13 26959325 26959335 +chr13 26967477 26967488 +chr13 28961945 28961960 +chr13 28962591 28962602 +chr13 28963283 28963296 +chr13 28980036 28980046 +chr13 32893197 32893207 +chr13 32907535 32907546 +chr13 48954159 48954172 +chr13 48954282 48954293 +chr13 48955363 48955373 +chr13 49039094 49039118 +chr14 23652346 23652367 +chr14 68944343 68944357 +chr14 68964202 68964214 +chr14 69006883 69006908 +chr14 69149592 69149604 +chr14 69149627 69149647 +chr15 99439963 99439973 +chr16 3808052 3808065 +chr16 9934670 9934690 +chr17 7572154 7572172 +chr17 7577678 7577694 +chr17 7578711 7578729 +chr17 29482977 29482987 +chr17 29508819 29508835 +chr17 29528406 29528416 +chr17 29559061 29559087 +chr17 29663614 29663625 +chr17 29663625 29663636 +chr17 37855713 37855727 +chr17 47696764 47696774 +chr17 59857599 59857610 +chr17 66526937 66526947 +chr17 66527166 66527177 +chr18 60969534 60969548 +chr20 43962585 43962595 +chr20 43962640 43962652 +chr20 43963436 43963446 +chr20 52188399 52188409 +chr19 1225275 1225288 +chr19 1228058 1228069 +chr19 50911947 50911959 +chr22 41545024 41545038 +chr22 41550984 41550995 diff --git a/codes/run_wdl.py b/codes/run_wdl.py index 3d7e61b..9ef956f 100755 --- a/codes/run_wdl.py +++ b/codes/run_wdl.py @@ -3,6 +3,7 @@ import argparse import json import os +import re import subprocess import time from datetime import datetime @@ -98,7 +99,6 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl # 记录开始时间 start_time = time.time() - print(cmd) ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") pidnum = ret.pid with open(os.path.join(output_dir, 'pid'), 'w') as pidfile: @@ -106,16 +106,61 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl # ret.wait() # 等待进程完成,并获取输出和错误 output, error = ret.communicate() + print(output, error) + + stdout = open(os.path.join(output_dir, f'{args.barcode}_{logname}_stdout.log'), 'w') + stderr = open(os.path.join(output_dir, f'{args.barcode}_{logname}_stderr.log'), 'w') + # 记录结束时间 end_time = time.time() # 计算运行时间 elapsed_time = end_time - start_time - print("\n运行时间:{:.2f} 秒".format(elapsed_time)) + # 使用正则表达式提取UUID + match = re.search(r'UUID\(([^)]+)\)', output, re.MULTILINE) - print(output, error) - print('#' * 50) - print('读取日志') + workflow_id = '' + if match: + workflow_id = match.group(1) + stdout.write(f"任务 ID: {workflow_id}\n\n") + else: + stderr.write("未提取到任务 ID\n\n") + + stdout.write("任务耗时: {:.2f} 秒\n\n".format(elapsed_time)) + + workflow_path = os.path.join(output_dir, 'cromwell-executions', 'pipeline', workflow_id) + stdout.write(f'workflow 地址: {workflow_path}\n\n') + + stdout_files = list() + stderr_files = list() + for root, dirs, files in os.walk(workflow_path): + for file in files: + if str(file).endswith('stdout'): + file_path = os.path.join(root, file) + stdout_files.append(str(file_path)) + if str(file).endswith('stderr'): + file_path = os.path.join(root, file) + stderr_files.append(str(file_path)) + + # 按照文件的修改时间倒序排序 + sorted_stdout_files = sorted(stdout_files, key=lambda x: os.path.getmtime(x), reverse=True) + sorted_stderr_files = sorted(stderr_files, key=lambda x: os.path.getmtime(x), reverse=True) + + for file_path in sorted_stdout_files: + with open(file_path, 'r') as f: + stdout.write('#' * 50 + '\n') + stdout.write(file_path + '\n') + stdout.write(f.read()) + stdout.write('\n\n') + + for file_path in sorted_stderr_files: + with open(file_path, 'r') as f: + stderr.write('#' * 50 + '\n') + stderr.write(file_path + '\n') + stderr.write(f.read()) + stderr.write('\n\n') + stdout.close() + stderr.close() if __name__ == '__main__': diff --git a/pipeline.wdl b/pipeline.wdl index 8535217..d9a4b81 100644 --- a/pipeline.wdl +++ b/pipeline.wdl @@ -140,9 +140,8 @@ workflow pipeline { run=catecode['addMsi'], tumor=tumor, tumor_rmdup_bam=alignment.tumor_rmdup_bam, - normal=normal, normal_rmdup_bam=alignment.normal_rmdup_bam, - bed=bed, + probe=probe, output_dir=workdir } diff --git a/wdl/msi.wdl b/wdl/msi.wdl index d615c9d..f8e15a5 100755 --- a/wdl/msi.wdl +++ b/wdl/msi.wdl @@ -1,29 +1,8 @@ # msi -task msi_single { - String name - String bed - String output_dir - String rmdup_bam - - command <<< - if [ ! -d ${output_dir}/msi ];then - mkdir ${output_dir}/msi - fi - - msisensor2 msi \ - -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \ - -t ${output_dir}/alignment/${name}.rmdup.bam \ - -e ${bed} \ - -b 10 \ - -o ${output_dir}/msi/${name}.msi.txt - >>> - -} - -task msi_paired { - String bed +task run_msi { String name + String probe String output_dir String tumor_rmdup_bam String normal_rmdup_bam @@ -33,14 +12,36 @@ task msi_paired { mkdir ${output_dir}/msi fi + if [ "${probe}" == "682" ]; then + + echo "msi 682探针 " msisensor2 msi \ - -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \ - -n ${normal_rmdup_bam} \ - -t ${tumor_rmdup_bam} \ - -e ${bed} \ - -b 10 \ + -f 0.01 \ + -d /dataseq/jmdna/software/msisensor2/msi.10-50.homosite.list \ + -n ${normal_rmdup_bam} -t ${tumor_rmdup_bam} \ + -e $PUBLIC/msi/624_650_intersect_depth50_177.bed \ + -b 10 -c 30 -o ${output_dir}/msi/${name}.msi.txt + + elif [ "${probe}" == "624" ]; then + + echo "msi 624 探针 " + msisensor2 msi \ + -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \ + -t ${tumor_rmdup_bam} -e $PUBLIC/msi//624_650_intersect_depth50_177.bed -b 10 \ -o ${output_dir}/msi/${name}.msi.txt + elif [ "${probe}" == "160" ]; then + + echo "msi 624 探针 " + msisensor2 msi \ + -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \ + -t -e $PUBLIC/msi/624_650_160_intersect_74.bed \ + -b 10 ${output_dir}/msi/${name}.msi.txt + + else + echo "未知的选项: ${probe}" + fi + >>> } @@ -48,34 +49,21 @@ task msi_paired { workflow call_msi { Boolean run=true + String probe String tumor String tumor_rmdup_bam - String? normal String? normal_rmdup_bam - String bed String output_dir if (run) { - # 单样本 - if (!defined(normal)) { - call msi_single { - input: - name=tumor, - output_dir=output_dir, - rmdup_bam=tumor_rmdup_bam, - bed=bed - } - } - # 双样本 - if (defined(normal)) { - call msi_paired { - input: - name=tumor, - bed=bed, - output_dir=output_dir, - tumor_rmdup_bam=tumor_rmdup_bam, - normal_rmdup_bam=normal_rmdup_bam - } + + call run_msi { + input: + name=tumor, + probe=probe, + output_dir=output_dir, + tumor_rmdup_bam=tumor_rmdup_bam, + normal_rmdup_bam=normal_rmdup_bam } } diff --git a/wdl/postprocess.wdl b/wdl/postprocess.wdl index bc5c7a6..033dd66 100755 --- a/wdl/postprocess.wdl +++ b/wdl/postprocess.wdl @@ -23,7 +23,6 @@ task run_post { indication.pl ${output_dir} ${cancer} ${project} sample_post.py -s ${name} -o ${output_dir} postprocess.py -n ${name} -s ${normal} -c ${output_dir} -o ${output_dir}/report/${name}.merged_file.xlsx - sleep 2m } output {