master
chaopower 2024-01-02 02:01:20 +08:00
parent ab356a15f0
commit b82438461c
8 changed files with 386 additions and 103 deletions

View File

@ -56,6 +56,7 @@ system {
# Default number of cache read workers
number-of-cache-read-workers = 25
io {
# throttle {
# # Global Throttling - This is mostly useful for GCS and can be adjusted to match
@ -158,19 +159,19 @@ call-caching {
# The maximum number of times Cromwell will attempt to copy cache hits before giving up and running the job.
#max-failed-copy-attempts = 1000000
blacklist-cache {
# # The call caching blacklist cache is off by default. This cache is used to blacklist cache hits based on cache
# # hit ids or buckets of cache hit paths that Cromwell has previously failed to copy for permissions reasons.
enabled: false
#
# # A blacklist grouping can be specified in workflow options which will inform the blacklister which workflows
# # should share a blacklist cache.
# groupings {
# workflow-option: call-cache-blacklist-group
# concurrency: 10000
# ttl: 2 hours
# size: 1000
}
blacklist-cache {
# # The call caching blacklist cache is off by default. This cache is used to blacklist cache hits based on cache
# # hit ids or buckets of cache hit paths that Cromwell has previously failed to copy for permissions reasons.
enabled: false
#
# # A blacklist grouping can be specified in workflow options which will inform the blacklister which workflows
# # should share a blacklist cache.
# groupings {
# workflow-option: call-cache-blacklist-group
# concurrency: 10000
# ttl: 2 hours
# size: 1000
}
#
# buckets {
# # Guava cache concurrency.
@ -201,29 +202,29 @@ google {
# Default: just application default
#auths = [
# Application default
#{
# name = "application-default"
# scheme = "application_default"
#},
# Application default
#{
# name = "application-default"
# scheme = "application_default"
#},
# Use a static service account
#{
# name = "service-account"
# scheme = "service_account"
# Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
# PEM format:
# service-account-id = "my-service-account"
# pem-file = "/path/to/file.pem"
# JSON format:
# json-file = "/path/to/file.json"
#}
# Use a static service account
#{
# name = "service-account"
# scheme = "service_account"
# Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
# PEM format:
# service-account-id = "my-service-account"
# pem-file = "/path/to/file.pem"
# JSON format:
# json-file = "/path/to/file.json"
#}
# Use service accounts provided through workflow options
#{
# name = "user-service-account"
# scheme = "user_service_account"
#}
# Use service accounts provided through workflow options
#{
# name = "user-service-account"
# scheme = "user_service_account"
#}
#]
}
@ -292,11 +293,11 @@ languages {
}
# draft-3 is the same as 1.0 so files should be able to be submitted to Cromwell as 1.0
# "draft-3" {
# language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory"
# config {
# strict-validation: true
# enabled: true
# }
# language-factory = "languages.wdl.draft3.WdlDraft3LanguageFactory"
# config {
# strict-validation: true
# enabled: true
# }
# }
"1.0" {
# 1.0 is just a rename of draft-3, so yes, they really do use the same factory:
@ -378,11 +379,11 @@ backend {
# To turn off the default `sync` behavior set this value to an empty string:
# script-epilogue = ""
# `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
# If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
# glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
# `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
# If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
# glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
# The list of possible runtime custom attributes.
# The list of possible runtime custom attributes.
runtime-attributes = """
String? docker
String? docker_user

View File

@ -8,11 +8,11 @@ die "useage:perl $0 input out tag_out project sample_type pipeline" unless @ARGV
my ($input, $out, $tag_out, $project, $sample_type, $pipeline) = @ARGV;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public";
print "SnpIndel过滤使用public路径$public_path\n";
print "$pipeline 过滤使用public路径$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "SnpIndel过滤使用database路径$database_path\n";
print "$pipeline 过滤使用database路径$database_path\n";
# open OUT, ">$**.hg19_multianno_filter.txt";
open OUT, "> $out";

View File

@ -0,0 +1,74 @@
chr1 161293492 161293503
chr1 161309335 161309346
chr1 161332091 161332105
chr1 204495595 204495609
chr2 29449344 29449368
chr2 47600581 47600591
chr2 47635523 47635536
chr2 47641559 47641586
chr2 47702451 47702470
chr2 48032740 48032753
chr2 48033890 48033908
chr2 58390218 58390230
chr2 58453843 58453855
chr2 209101905 209101915
chr2 212495336 212495347
chr2 212578379 212578393
chr2 215657182 215657198
chr3 37067099 37067120
chr3 142231062 142231080
chr3 142241692 142241704
chr3 142259706 142259720
chr3 142274739 142274749
chr4 25666785 25666805
chr4 55598211 55598236
chr4 55976947 55976960
chr4 153268227 153268241
chr5 112111309 112111322
chr5 131924363 131924373
chr5 131927550 131927560
chr6 32790099 32790109
chr6 117642992 117643012
chr7 6037057 6037074
chr7 116409675 116409690
chr7 140434574 140434585
chr7 140434586 140434596
chr9 5073681 5073691
chr9 80343587 80343601
chr9 135773000 135773018
chr10 8115668 8115686
chr10 43595836 43595850
chr10 89720633 89720648
chr11 108114661 108114676
chr11 108119615 108119629
chr11 108121410 108121425
chr11 108141955 108141970
chr11 108151697 108151707
chr11 108188266 108188279
chr12 69214081 69214093
chr12 133237753 133237767
chr13 28609813 28609823
chr13 32893197 32893207
chr13 32907535 32907546
chr13 48954159 48954172
chr13 48954282 48954293
chr13 48955363 48955373
chr13 49039094 49039118
chr14 68944343 68944357
chr14 69006883 69006908
chr14 69149592 69149604
chr14 69149627 69149647
chr17 7572154 7572172
chr17 7578711 7578729
chr17 29482977 29482987
chr17 29508819 29508835
chr17 29528406 29528416
chr17 29559061 29559087
chr17 29663614 29663625
chr17 29663625 29663636
chr17 37855713 37855727
chr17 59857599 59857610
chr20 43962585 43962595
chr20 43962640 43962652
chr20 43963436 43963446
chr19 50911947 50911959

View File

@ -0,0 +1,177 @@
chr1 16200729 16200739
chr1 16248728 16248739
chr1 78414310 78414328
chr1 78432647 78432658
chr1 78432658 78432668
chr1 78435701 78435713
chr1 161293492 161293503
chr1 161309335 161309346
chr1 161332091 161332105
chr1 204495595 204495609
chr1 243736210 243736225
chr2 29449344 29449368
chr2 39536689 39536716
chr2 47600581 47600591
chr2 47635523 47635536
chr2 47641559 47641586
chr2 47702451 47702470
chr2 48032740 48032753
chr2 48033890 48033908
chr2 58390218 58390230
chr2 58453843 58453855
chr2 61143993 61144003
chr2 61145498 61145511
chr2 61726050 61726061
chr2 95849361 95849384
chr2 136873827 136873837
chr2 141625834 141625853
chr2 173435578 173435592
chr2 190717517 190717527
chr2 198257673 198257683
chr2 198267243 198267256
chr2 209101905 209101915
chr2 212495336 212495347
chr2 212578379 212578393
chr2 215657182 215657198
chr2 225422600 225422622
chr3 30691871 30691881
chr3 47103861 47103875
chr3 52696310 52696321
chr3 69988437 69988451
chr3 89448876 89448888
chr3 138400782 138400795
chr3 142231062 142231080
chr3 142241692 142241704
chr3 142259706 142259720
chr3 142274739 142274749
chr3 156413615 156413632
chr3 169988364 169988384
chr3 169992975 169992993
chr3 185009691 185009711
chr3 185010850 185010860
chr3 185080912 185080922
chr3 185155430 185155441
chr3 191888452 191888465
chr4 1919852 1919862
chr4 25666785 25666805
chr4 55135777 55135789
chr4 55135811 55135827
chr4 55598211 55598236
chr4 55976947 55976960
chr4 106162144 106162154
chr4 142950186 142950196
chr4 153268227 153268241
chr4 185340725 185340745
chr5 38959431 38959441
chr5 38978758 38978769
chr5 67576330 67576345
chr5 67584512 67584524
chr5 112111309 112111322
chr5 131924363 131924373
chr5 131927550 131927560
chr5 170818290 170818300
chr5 170837513 170837526
chr6 32790099 32790109
chr6 106534484 106534496
chr6 117642992 117643012
chr6 134494708 134494721
chr6 138192324 138192335
chr6 162683814 162683835
chr6 163899794 163899806
chr6 163991476 163991492
chr6 163991598 163991608
chr7 6037057 6037074
chr7 116409675 116409690
chr7 116414203 116414214
chr7 140421095 140421107
chr7 140425499 140425511
chr7 140482263 140482279
chr7 140484450 140484462
chr7 140487100 140487124
chr7 140489019 140489029
chr7 140496148 140496164
chr7 140498359 140498380
chr7 148543693 148543704
chrX 39930433 39930446
chrX 44920541 44920551
chrX 44935924 44935937
chrX 44949951 44949962
chrX 123184949 123184968
chrX 123204978 123204992
chr8 38138963 38138974
chr8 38175279 38175290
chr8 38316067 38316077
chr8 38321488 38321499
chr8 38321830 38321853
chr8 117864952 117864966
chr8 117868531 117868554
chr8 141754888 141754904
chr9 5073681 5073691
chr9 80343587 80343601
chr9 87357642 87357652
chr9 87430433 87430449
chr9 87487518 87487529
chr9 135773000 135773018
chr10 8115668 8115686
chr10 43595836 43595850
chr10 89720633 89720648
chr11 94212930 94212941
chr11 102193508 102193534
chr11 108114661 108114676
chr11 108119615 108119629
chr11 108121410 108121425
chr11 108141955 108141970
chr11 108151697 108151707
chr11 108188266 108188279
chr11 118369265 118369280
chr11 119144791 119144808
chr11 125490765 125490786
chr12 463405 463415
chr12 69214081 69214093
chr12 112893675 112893692
chr12 133237753 133237767
chr13 26959325 26959335
chr13 26967477 26967488
chr13 28961945 28961960
chr13 28962591 28962602
chr13 28963283 28963296
chr13 28980036 28980046
chr13 32893197 32893207
chr13 32907535 32907546
chr13 48954159 48954172
chr13 48954282 48954293
chr13 48955363 48955373
chr13 49039094 49039118
chr14 23652346 23652367
chr14 68944343 68944357
chr14 68964202 68964214
chr14 69006883 69006908
chr14 69149592 69149604
chr14 69149627 69149647
chr15 99439963 99439973
chr16 3808052 3808065
chr16 9934670 9934690
chr17 7572154 7572172
chr17 7577678 7577694
chr17 7578711 7578729
chr17 29482977 29482987
chr17 29508819 29508835
chr17 29528406 29528416
chr17 29559061 29559087
chr17 29663614 29663625
chr17 29663625 29663636
chr17 37855713 37855727
chr17 47696764 47696774
chr17 59857599 59857610
chr17 66526937 66526947
chr17 66527166 66527177
chr18 60969534 60969548
chr20 43962585 43962595
chr20 43962640 43962652
chr20 43963436 43963446
chr20 52188399 52188409
chr19 1225275 1225288
chr19 1228058 1228069
chr19 50911947 50911959
chr22 41545024 41545038
chr22 41550984 41550995

View File

@ -3,6 +3,7 @@
import argparse
import json
import os
import re
import subprocess
import time
from datetime import datetime
@ -98,7 +99,6 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
# 记录开始时间
start_time = time.time()
print(cmd)
ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8")
pidnum = ret.pid
with open(os.path.join(output_dir, 'pid'), 'w') as pidfile:
@ -106,16 +106,61 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
# ret.wait()
# 等待进程完成,并获取输出和错误
output, error = ret.communicate()
print(output, error)
stdout = open(os.path.join(output_dir, f'{args.barcode}_{logname}_stdout.log'), 'w')
stderr = open(os.path.join(output_dir, f'{args.barcode}_{logname}_stderr.log'), 'w')
# 记录结束时间
end_time = time.time()
# 计算运行时间
elapsed_time = end_time - start_time
print("\n运行时间:{:.2f}".format(elapsed_time))
# 使用正则表达式提取UUID
match = re.search(r'UUID\(([^)]+)\)', output, re.MULTILINE)
print(output, error)
print('#' * 50)
print('读取日志')
workflow_id = ''
if match:
workflow_id = match.group(1)
stdout.write(f"任务 ID: {workflow_id}\n\n")
else:
stderr.write("未提取到任务 ID\n\n")
stdout.write("任务耗时: {:.2f}\n\n".format(elapsed_time))
workflow_path = os.path.join(output_dir, 'cromwell-executions', 'pipeline', workflow_id)
stdout.write(f'workflow 地址: {workflow_path}\n\n')
stdout_files = list()
stderr_files = list()
for root, dirs, files in os.walk(workflow_path):
for file in files:
if str(file).endswith('stdout'):
file_path = os.path.join(root, file)
stdout_files.append(str(file_path))
if str(file).endswith('stderr'):
file_path = os.path.join(root, file)
stderr_files.append(str(file_path))
# 按照文件的修改时间倒序排序
sorted_stdout_files = sorted(stdout_files, key=lambda x: os.path.getmtime(x), reverse=True)
sorted_stderr_files = sorted(stderr_files, key=lambda x: os.path.getmtime(x), reverse=True)
for file_path in sorted_stdout_files:
with open(file_path, 'r') as f:
stdout.write('#' * 50 + '\n')
stdout.write(file_path + '\n')
stdout.write(f.read())
stdout.write('\n\n')
for file_path in sorted_stderr_files:
with open(file_path, 'r') as f:
stderr.write('#' * 50 + '\n')
stderr.write(file_path + '\n')
stderr.write(f.read())
stderr.write('\n\n')
stdout.close()
stderr.close()
if __name__ == '__main__':

View File

@ -140,9 +140,8 @@ workflow pipeline {
run=catecode['addMsi'],
tumor=tumor,
tumor_rmdup_bam=alignment.tumor_rmdup_bam,
normal=normal,
normal_rmdup_bam=alignment.normal_rmdup_bam,
bed=bed,
probe=probe,
output_dir=workdir
}

View File

@ -1,29 +1,8 @@
# msi
task msi_single {
String name
String bed
String output_dir
String rmdup_bam
command <<<
if [ ! -d ${output_dir}/msi ];then
mkdir ${output_dir}/msi
fi
msisensor2 msi \
-M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \
-t ${output_dir}/alignment/${name}.rmdup.bam \
-e ${bed} \
-b 10 \
-o ${output_dir}/msi/${name}.msi.txt
>>>
}
task msi_paired {
String bed
task run_msi {
String name
String probe
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
@ -33,14 +12,36 @@ task msi_paired {
mkdir ${output_dir}/msi
fi
if [ "${probe}" == "682" ]; then
echo "msi 682探针 "
msisensor2 msi \
-d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \
-n ${normal_rmdup_bam} \
-t ${tumor_rmdup_bam} \
-e ${bed} \
-b 10 \
-f 0.01 \
-d /dataseq/jmdna/software/msisensor2/msi.10-50.homosite.list \
-n ${normal_rmdup_bam} -t ${tumor_rmdup_bam} \
-e $PUBLIC/msi/624_650_intersect_depth50_177.bed \
-b 10 -c 30 -o ${output_dir}/msi/${name}.msi.txt
elif [ "${probe}" == "624" ]; then
echo "msi 624 探针 "
msisensor2 msi \
-M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \
-t ${tumor_rmdup_bam} -e $PUBLIC/msi//624_650_intersect_depth50_177.bed -b 10 \
-o ${output_dir}/msi/${name}.msi.txt
elif [ "${probe}" == "160" ]; then
echo "msi 624 探针 "
msisensor2 msi \
-M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \
-t -e $PUBLIC/msi/624_650_160_intersect_74.bed \
-b 10 ${output_dir}/msi/${name}.msi.txt
else
echo "未知的选项: ${probe}"
fi
>>>
}
@ -48,34 +49,21 @@ task msi_paired {
workflow call_msi {
Boolean run=true
String probe
String tumor
String tumor_rmdup_bam
String? normal
String? normal_rmdup_bam
String bed
String output_dir
if (run) {
# 单样本
if (!defined(normal)) {
call msi_single {
input:
name=tumor,
output_dir=output_dir,
rmdup_bam=tumor_rmdup_bam,
bed=bed
}
}
# 双样本
if (defined(normal)) {
call msi_paired {
input:
name=tumor,
bed=bed,
output_dir=output_dir,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
call run_msi {
input:
name=tumor,
probe=probe,
output_dir=output_dir,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}

View File

@ -23,7 +23,6 @@ task run_post {
indication.pl ${output_dir} ${cancer} ${project}
sample_post.py -s ${name} -o ${output_dir}
postprocess.py -n ${name} -s ${normal} -c ${output_dir} -o ${output_dir}/report/${name}.merged_file.xlsx
sleep 2m
}
output {