添加pollution
parent
2a9eff2226
commit
b9685fd29e
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
13
pipeline.wdl
13
pipeline.wdl
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
import "./wdl/catecode.wdl"
|
||||
import "./wdl/qc.wdl"
|
||||
import "./wdl/alignment.wdl"
|
||||
import "./wdl/call_mutation.wdl"
|
||||
|
|
@ -12,6 +11,7 @@ import "./wdl/hereditary.wdl"
|
|||
import "./wdl/tmb.wdl"
|
||||
import "./wdl/postprocess.wdl"
|
||||
|
||||
|
||||
workflow pipeline {
|
||||
|
||||
String tumor
|
||||
|
|
@ -168,6 +168,17 @@ workflow pipeline {
|
|||
project=project
|
||||
}
|
||||
|
||||
call pollution.call_pollution as call_pollution {
|
||||
input:
|
||||
name=tumor,
|
||||
normal=normal,
|
||||
output_dir=workdir,
|
||||
probe=probe,
|
||||
raw_vcf=call_mutation.raw_vcf,
|
||||
somatic_vcf=call_mutation.somatic_vcf,
|
||||
germline_vcf=call_mutation.germline_vcf
|
||||
}
|
||||
|
||||
call postprocess.call_postprocess as call_postprocess {
|
||||
input:
|
||||
run=catecode['addAutoReport'],
|
||||
|
|
|
|||
|
|
@ -225,8 +225,7 @@ class ChemoRun:
|
|||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Chemotherapy Process Script")
|
||||
|
||||
parser.add_argument('-d', '--database', help="Path to chemo_drug's database",
|
||||
default='/home/zhangchao/project/pipeline/workflow/script/public/chemo_database.xlsx')
|
||||
parser.add_argument('-d', '--database', help="Path to chemo_drug's database", required=True)
|
||||
parser.add_argument('-probe', '--probe', help="Probe name", required=True)
|
||||
parser.add_argument('-n', '--name', help="Name for sample", required=True)
|
||||
parser.add_argument('-v', '--vcf', help="germline vcf", required=True)
|
||||
|
|
|
|||
|
|
@ -7,9 +7,10 @@ open IN, "$ARGV[0]";
|
|||
open OUT, ">$ARGV[1]";
|
||||
my $project = $ARGV[2];
|
||||
|
||||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
||||
|
||||
print "Cnv过滤使用public路径:$public_path\n";
|
||||
print "Cnv过滤使用database路径:$database_path\n";
|
||||
|
||||
my $cnv = info();
|
||||
my @cnv_list = @$cnv;
|
||||
|
|
@ -22,7 +23,7 @@ print OUT join("\t", (@head[0 .. 4], "ref_gene", "copy", @head[5 .. 9])), "\n";
|
|||
while (<IN>) {
|
||||
chomp;
|
||||
my @line = split(/\t/, $_);
|
||||
my $cn = sprintf("%.2f", 2 ** (1 + $line[4]));
|
||||
my $cn = sprintf("%.1f", 2 ** (1 + $line[4]));
|
||||
my @gene_list = split(/,/, $line[3]);
|
||||
my %uniq;
|
||||
foreach my $element (@gene_list) {
|
||||
|
|
@ -38,7 +39,7 @@ while (<IN>) {
|
|||
}
|
||||
|
||||
sub info {
|
||||
open INFO, "$public_path/info.csv";
|
||||
open INFO, "$database_path/info.csv";
|
||||
# 读取并解析表头
|
||||
my $header = <INFO>;
|
||||
chomp($header);
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@ die "usage:perl $0 depth_file input out project" unless @ARGV == 4;
|
|||
my ($depth_file, $input, $out, $project) = @ARGV;
|
||||
|
||||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
||||
|
||||
print "Fusion过滤使用public路径:$public_path\n";
|
||||
print "Fusion过滤使用database路径:$database_path\n";
|
||||
|
||||
open IN, "$input";
|
||||
open OUT1, "> $out";
|
||||
|
|
@ -189,7 +191,7 @@ sub gene2strand {
|
|||
}
|
||||
|
||||
sub info {
|
||||
open INFO, "$public_path/info.csv";
|
||||
open INFO, "$database_path/info.csv";
|
||||
# 读取并解析表头
|
||||
my $header = <INFO>;
|
||||
chomp($header);
|
||||
|
|
|
|||
|
|
@ -4,9 +4,12 @@ use warnings;
|
|||
|
||||
die "usage:perl $0 input out project" unless @ARGV == 3;
|
||||
my ($input, $out, $project) = @ARGV;
|
||||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
#
|
||||
# print "LongIndel过滤使用public路径:$public_path\n";
|
||||
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
||||
|
||||
print "LongIndel过滤使用public路径:$public_path\n";
|
||||
print "longindel过滤使用database路径:$database_path\n";
|
||||
|
||||
open IN, "$input";
|
||||
open LONGINDEL, "> $out";
|
||||
|
|
@ -40,7 +43,7 @@ while (<IN>) {
|
|||
}
|
||||
|
||||
sub info {
|
||||
open INFO, "$public_path/info.csv";
|
||||
open INFO, "$database_path/info.csv";
|
||||
# 读取并解析表头
|
||||
my $header = <INFO>;
|
||||
chomp($header);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
|
||||
my ($name, $output_dir, $project) = @ARGV;
|
||||
die "useage:perl $0 codes_dir name output_dir project" unless @ARGV == 4;
|
||||
|
||||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||
open INFO, "$public_path/info.csv";
|
||||
<INFO>;
|
||||
my @promoter;
|
||||
while (<INFO>) {
|
||||
chomp;
|
||||
my @line = split(/\t/, $_);
|
||||
if ($line[0] eq $project) {
|
||||
if ($line[4] ne "NA") {
|
||||
@promoter = split(/\//, $line[4]);
|
||||
open PRMT, ">$output_dir/mutation/${name}.target.promoter.txt";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my @prmt;
|
||||
|
||||
open IN2, "$output_dir/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt";
|
||||
<IN2>;
|
||||
while (<IN2>) {
|
||||
chomp;
|
||||
my @line = split(/\t/, $_);
|
||||
|
||||
if (@promoter and grep {$line[6] eq $_} @promoter) {
|
||||
if ($line[5] =~ /UTR3|upstream/ and $line[17] < 0.01
|
||||
and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01) {
|
||||
my @var = split(/;/, $line[9]);
|
||||
my $freq = (split(/:/, $line[-2]))[5];
|
||||
my $dp4 = join(",", (split(/:/, $line[-2]))[2, 4, -1]);
|
||||
my $predict_benign = 0;
|
||||
$predict_benign++ if ($line[50] eq "N" or $line[50] eq "P");
|
||||
$predict_benign++ if $line[56] eq "T";
|
||||
$predict_benign++ if $line[64] eq "T";
|
||||
if ($line[6] eq 'TERT') {
|
||||
push @prmt, join("\t", @line[0 .. 9], $freq, $dp4, $predict_benign, @line[10, 11, 16, 18, $#line])
|
||||
if (($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A'));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (@prmt) {
|
||||
print PRMT join("\t", @head[0 .. 9]), "\tFreq", "\tDP-AD-DP4", "\tpredict_benign(MutationTaster/FATHMM/MetaSVM)\t", join("\t", @head[10, 11, 16, 18]), "\tSTR", "\n";
|
||||
print PRMT join("\n", @prmt) . "\n";
|
||||
}
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ sub blacklist {
|
|||
|
||||
sub info {
|
||||
|
||||
open INFO, "$public_path/info.csv";
|
||||
open INFO, "$database_path/info.csv";
|
||||
# 读取并解析表头
|
||||
my $header = <INFO>;
|
||||
chomp($header);
|
||||
|
|
|
|||
|
|
@ -65,8 +65,7 @@ class HereditaryRun:
|
|||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="hereditary Process Script")
|
||||
|
||||
parser.add_argument('-d', '--database', help="Path to hereditary_mut's database",
|
||||
default='/home/zhangchao/project/pipeline/workflow/script/public/hereditary_database.xlsx')
|
||||
parser.add_argument('-d', '--database', help="Path to hereditary_mut's database", required=True)
|
||||
parser.add_argument('-p', '--project', help="Project name", required=True)
|
||||
parser.add_argument('-n', '--name', help="Name for sample", required=True)
|
||||
parser.add_argument('-f', '--file', help="germline filter file", required=True)
|
||||
|
|
|
|||
|
|
@ -26,16 +26,16 @@ while(<DIS>){
|
|||
}
|
||||
=cut
|
||||
|
||||
open DIS,"/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
|
||||
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
|
||||
<DIS>;
|
||||
my (%dis, @id, %dis2);
|
||||
while (<DIS>) {
|
||||
chomp;
|
||||
my @line = split(/\t/);
|
||||
$dis{lc $line[2]} = $line[3];
|
||||
$dis{lc$line[4]}=$line[5];
|
||||
# $dis{lc $line[4]} = $line[5];
|
||||
push @{$dis2{$line[0]}}, lc $line[2];
|
||||
push @{$dis2{$line[0]}},lc$line[4];
|
||||
# push @{$dis2{$line[0]}}, lc $line[4];
|
||||
push @id, $line[0];
|
||||
}
|
||||
foreach my $ID ($cancer_type) {
|
||||
|
|
@ -66,9 +66,11 @@ while(<THERAPY>){
|
|||
push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
|
||||
if ($line[1] =~ /fusion/i) {
|
||||
push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
|
||||
}elsif($line[1] eq "Deletion" or $line[1]=~ /Amplification/){
|
||||
}
|
||||
elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
|
||||
push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
|
||||
}else{
|
||||
}
|
||||
else {
|
||||
push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,287 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def single_monitoring(name, vcf, bed, freq_range, output_dir):
|
||||
vcf_header = [] # 用于存储VCF文件头部分
|
||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
||||
p_value_list = [] # 用于存储符合条件的 p-value
|
||||
|
||||
# 按行处理,保存为列表
|
||||
with open(vcf, 'r') as vcf_file:
|
||||
for line in vcf_file:
|
||||
if line.startswith("#"):
|
||||
vcf_header.append(line)
|
||||
else:
|
||||
vcf_data.append(line)
|
||||
|
||||
# bed 处理
|
||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
||||
position_list = list(df_position[2])
|
||||
chr_list = list(df_position[0])
|
||||
result_data = []
|
||||
|
||||
for i in range(len(position_list)):
|
||||
filtered_lines = [line for line in vcf_data if
|
||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
||||
for line in filtered_lines:
|
||||
p_value_str = line.split()[9].split(":")[4]
|
||||
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
||||
if 0.1 <= p_value <= 0.9:
|
||||
result_data.append(line)
|
||||
p_value_list.append(p_value)
|
||||
select_vcf = os.path.join(output_dir, f'{name}_select.vcf')
|
||||
with open(select_vcf, 'w') as output:
|
||||
for header_line in vcf_header:
|
||||
output.write(header_line)
|
||||
for data_line in result_data:
|
||||
output.write(data_line)
|
||||
|
||||
count_normal = 0
|
||||
count_exception = 0
|
||||
for p_value in p_value_list:
|
||||
if freq_range[0] <= p_value <= freq_range[1]:
|
||||
count_normal += 1
|
||||
else:
|
||||
count_exception += 1
|
||||
count_all = count_exception + count_normal
|
||||
if count_all == 0:
|
||||
z_score = 0
|
||||
else:
|
||||
z_score = count_exception / count_all
|
||||
|
||||
res = dict(
|
||||
barcode=name,
|
||||
count_normal=count_normal,
|
||||
count_exception=count_exception,
|
||||
z_score=z_score
|
||||
)
|
||||
pd.DataFrame([res]).to_csv(os.path.join(f'{name}_pollution.csv'), sep="\t", index=False)
|
||||
|
||||
|
||||
# 根据小bed筛选vcf
|
||||
def select_position(vcf, bed, matched_file, unmatched_file):
|
||||
vcf_header = [] # 用于存储VCF文件头部分
|
||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
||||
|
||||
# 按行处理,保存为列表
|
||||
with open(vcf, 'r') as vcf_file:
|
||||
for line in vcf_file:
|
||||
if line.startswith("#"):
|
||||
vcf_header.append(line)
|
||||
else:
|
||||
vcf_data.append(line)
|
||||
|
||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
||||
position_list = list(df_position[2])
|
||||
chr_list = list(df_position[0])
|
||||
result_data = []
|
||||
unmatched_data = [] # 用于存储未匹配的数据
|
||||
|
||||
for i in range(len(position_list)):
|
||||
filtered_lines = [line for line in vcf_data if
|
||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
||||
if not filtered_lines: # 如果没有匹配的点,添加到未匹配数据列表
|
||||
unmatched_data.append(f"{chr_list[i]}\t{position_list[i]}\t.\t.\t.\t.\t.\n")
|
||||
result_data.extend(filtered_lines)
|
||||
|
||||
with open(matched_file, 'w') as output:
|
||||
for header_line in vcf_header:
|
||||
output.write(header_line)
|
||||
for data_line in result_data:
|
||||
output.write(data_line)
|
||||
|
||||
with open(unmatched_file, 'w') as unmatched_output:
|
||||
unmatched_output.writelines(unmatched_data)
|
||||
|
||||
return matched_file, unmatched_file
|
||||
|
||||
|
||||
# 处理体系、胚系的vcf,得到目标信息
|
||||
def process_judge_vcf(input_vcf, output_vcf):
|
||||
with open(input_vcf, 'r') as input_file, open(output_vcf, 'w') as output_file:
|
||||
for line in input_file:
|
||||
if not line.startswith("#"):
|
||||
fields = line.strip().split('\t')
|
||||
info = fields[9].split(":")
|
||||
percentage = float(info[4])
|
||||
|
||||
if 0.1 <= percentage <= 0.9:
|
||||
b = 0.5
|
||||
elif percentage < 0.1:
|
||||
b = 0
|
||||
elif percentage > 0.9:
|
||||
b = 1
|
||||
|
||||
# 构建新的行数据
|
||||
new_line = '\t'.join([fields[0], fields[1], fields[3], fields[4], info[4], str(b), info[2]])
|
||||
output_file.write(new_line + '\n')
|
||||
return output_vcf
|
||||
|
||||
|
||||
def merge_and_sort_files(matched_file, unmatched_file, output_file):
|
||||
# 检查 unmatched_file 是否为空
|
||||
if os.stat(unmatched_file).st_size == 0:
|
||||
# 对 matched_file 进行排序并写入 output_file
|
||||
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
||||
sorted_df = matched_df.sort_values(by=[0, 1])
|
||||
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
|
||||
return output_file
|
||||
|
||||
# 如果 unmatched_file 不为空,继续合并和排序操作
|
||||
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
||||
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
|
||||
|
||||
# 合并数据帧
|
||||
combined_df = pd.concat([matched_df, unmatched_df])
|
||||
|
||||
# 根据第一列和第二列排序
|
||||
sorted_df = combined_df.sort_values(by=[0, 1])
|
||||
|
||||
# 将排序后的数据写入输出文件
|
||||
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
|
||||
return output_file
|
||||
|
||||
|
||||
# 合并体系,胚系
|
||||
def merge_and_compare_files(somatic_file, germline_file, output_merged_file, output_final_file):
|
||||
# 合并两个文件
|
||||
with open(somatic_file, 'r') as somatic, open(germline_file, 'r') as germline:
|
||||
merged_lines = [f"{somatic_line.strip()}\t{germline_line.strip()}" for somatic_line, germline_line in
|
||||
zip(somatic, germline)]
|
||||
|
||||
# 将合并后的数据写入输出文件
|
||||
with open(output_merged_file, 'w') as output_file:
|
||||
output_file.write('\n'.join(merged_lines))
|
||||
|
||||
# 比较两列数据并添加比较结果列
|
||||
with open(output_merged_file, 'r') as merged, open(output_final_file, 'w') as final_output:
|
||||
for line in merged:
|
||||
fields = line.strip().split('\t')
|
||||
if fields[5] == fields[12]:
|
||||
comparison_result = "yes"
|
||||
else:
|
||||
comparison_result = "no"
|
||||
final_output.write(f"{line.strip()}\t{comparison_result}\n")
|
||||
return output_merged_file, output_final_file
|
||||
|
||||
|
||||
# 根据大bed筛选vcf,作cnvkit的图
|
||||
def select_cnvkit_vcf(vcf, bed, output_file):
|
||||
vcf_header = [] # 用于存储VCF文件头部分
|
||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
||||
p_value_list = [] # 用于存储符合条件的 p-value
|
||||
|
||||
# 按行处理,保存为列表
|
||||
with open(vcf, 'r') as vcf_file:
|
||||
for line in vcf_file:
|
||||
if line.startswith("#"):
|
||||
vcf_header.append(line)
|
||||
else:
|
||||
vcf_data.append(line)
|
||||
|
||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
||||
position_list = list(df_position[2])
|
||||
chr_list = list(df_position[0])
|
||||
result_data = []
|
||||
for i in range(len(position_list)):
|
||||
filtered_lines = [line for line in vcf_data if
|
||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
||||
for line in filtered_lines:
|
||||
p_value_str = line.split()[9].split(":")[4]
|
||||
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
||||
if 0.1 <= p_value <= 0.9:
|
||||
result_data.append(line)
|
||||
|
||||
with open(output_file, 'w') as output:
|
||||
for header_line in vcf_header:
|
||||
output.write(header_line)
|
||||
for data_line in result_data:
|
||||
output.write(data_line)
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed, output_dir):
|
||||
# 处理体系,根据bed筛选
|
||||
select_position_output_file1 = os.path.join(output_dir, f'{name}_somatic_matched.vcf')
|
||||
select_position_output_file2 = os.path.join(output_dir, f'{name}_somatic_unmatched.vcf')
|
||||
somatic_matched_file, somatic_unmatched_file = select_position(somatic_vcf, ref_bed, select_position_output_file1,
|
||||
select_position_output_file2)
|
||||
# 处理胚系,根据bed筛选
|
||||
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
|
||||
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
|
||||
Germline_matched_file, Germline_unmatched_file = select_position(germline_vcf, ref_bed,
|
||||
select_position_output_file3,
|
||||
select_position_output_file4)
|
||||
# 处理体系,数值转换
|
||||
process_judge_vcf_file1 = os.path.join(output_dir, f'{name}_somatic_matched_add_judge.vcf')
|
||||
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
|
||||
# 处理胚系,数值转换
|
||||
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
|
||||
germline_matched_add_judge_file = process_judge_vcf(Germline_matched_file, process_judge_vcf_file2)
|
||||
# 合并体系,将匹配到的和未匹配到bed的的合并
|
||||
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
|
||||
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
|
||||
merge_and_sort_files_file1)
|
||||
# 合并胚系,将匹配到的和未匹配到bed的的合并
|
||||
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
|
||||
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, Germline_unmatched_file,
|
||||
merge_and_sort_files_file2)
|
||||
# 合并胚系,体系,将体系,胚系两个合并文件再合并
|
||||
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')
|
||||
result_file = os.path.join(output_dir, f'{name}_contaminate_result.txt')
|
||||
merge_and_compare_files(somatic_merged_file, Germline_merged_file, result_pro_file, result_file)
|
||||
##筛选作图vcf
|
||||
cnvkit_output_file = os.path.join(output_dir, f'{name}_select_cnvkit.vcf')
|
||||
select_cnvkit_vcf(germline_vcf, cnvkit_ref_bed, cnvkit_output_file)
|
||||
##删除中间文件
|
||||
os.remove(select_position_output_file1)
|
||||
os.remove(select_position_output_file2)
|
||||
os.remove(select_position_output_file3)
|
||||
os.remove(select_position_output_file4)
|
||||
os.remove(process_judge_vcf_file1)
|
||||
os.remove(process_judge_vcf_file2)
|
||||
os.remove(merge_and_sort_files_file1)
|
||||
os.remove(merge_and_sort_files_file2)
|
||||
os.remove(result_pro_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description="Pollution Process Script")
|
||||
|
||||
parser.add_argument('-n', '--name', help="Name for sample", required=True)
|
||||
parser.add_argument('-b', '--ref_bed', help="ref_bed", required=True)
|
||||
parser.add_argument('-v', '--vcf', help="raw vcf for prbe 160 or 17 ; somatic vcf for prbe 682 or 624",
|
||||
required=True)
|
||||
parser.add_argument('-v2', '--vcf2', help="germline vcf; required when prbe 682 or 624")
|
||||
parser.add_argument('-c', '--cnvkit_bed', help="cnvkit_bed; required when prbe 682 or 624")
|
||||
parser.add_argument('-p', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
||||
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
|
||||
args = parser.parse_args()
|
||||
|
||||
bed_path = os.path.realpath(args.ref_bed)
|
||||
print(f'污染检测使用ref_bed: {bed_path}')
|
||||
probe = args.probe
|
||||
|
||||
if probe == '160' or probe == '17':
|
||||
freq_range = {"17": [0.3452, 0.6512], "160": [0.2930, 0.6753]}.get(probe)
|
||||
single_monitoring(args.name, args.vcf, bed_path, freq_range, args.output_dir)
|
||||
elif probe == '682' or probe == '624':
|
||||
if not args.vcf2:
|
||||
parser.error('--vcf2 is required in prbe 682 or 624')
|
||||
if not args.cnvkit_bed:
|
||||
parser.error('--cnvkit_bed is required in prbe 682 or 624')
|
||||
cnvkit_bed_path = os.path.realpath(args.cnvkit_bed)
|
||||
print(f'污染检测使用cnvkit_bed: {cnvkit_bed_path}')
|
||||
paired_monitoring(args.name, args.vcf, args.vcf2, bed_path, args.cnvkit_bed, args.output_dir)
|
||||
else:
|
||||
parser.error('probe error. 682, 624, 160, 17 for now')
|
||||
|
||||
|
|
@ -17,7 +17,9 @@ def database():
|
|||
增持匹配信息
|
||||
"""
|
||||
|
||||
path = os.environ.get('DATABASE', '/home/zhangchao/project/pipeline/workflow/database/')
|
||||
path = os.environ.get('DATABASE')
|
||||
if not path:
|
||||
raise UserWarning('未设置DATABASE环境变量')
|
||||
# gene function 基因功能描述信息
|
||||
gene_function_path = os.path.join(path, 'gene_function.txt')
|
||||
gene_function_df = pd.read_csv(gene_function_path, sep='\t')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,634 @@
|
|||
chr5 224633 224633
|
||||
chr5 226160 226160
|
||||
chr5 228362 228362
|
||||
chr5 230980 230980
|
||||
chr5 231111 231111
|
||||
chr5 231143 231143
|
||||
chr5 233734 233734
|
||||
chr5 251784 251784
|
||||
chr5 256451 256451
|
||||
chr5 256472 256472
|
||||
chr11 534242 534242
|
||||
chr18 673447 673447
|
||||
chr18 673449 673449
|
||||
chr19 1208289 1208289
|
||||
chr19 1220321 1220321
|
||||
chr19 1221161 1221161
|
||||
chr19 1222012 1222012
|
||||
chr4 1801064 1801064
|
||||
chr4 1805478 1805478
|
||||
chr4 1806519 1806519
|
||||
chr4 1807894 1807894
|
||||
chr16 2103408 2103408
|
||||
chr16 2120402 2120402
|
||||
chr16 2138219 2138219
|
||||
chr19 3110349 3110349
|
||||
chr19 3113305 3113305
|
||||
chr19 3114954 3114954
|
||||
chr19 3115124 3115124
|
||||
chr19 3119184 3119184
|
||||
chr19 3119239 3119239
|
||||
chr11 4104007 4104007
|
||||
chr11 4104060 4104060
|
||||
chr11 4104087 4104087
|
||||
chr11 4113395 4113395
|
||||
chr11 4113470 4113470
|
||||
chr11 4113471 4113471
|
||||
chr11 4115487 4115487
|
||||
chr11 4115541 4115541
|
||||
chr11 4159457 4159457
|
||||
chr11 4159466 4159466
|
||||
chr12 4388084 4388084
|
||||
chr9 5050706 5050706
|
||||
chr9 5078291 5078291
|
||||
chr9 5081780 5081780
|
||||
chr9 5090810 5090810
|
||||
chr9 5090934 5090934
|
||||
chr9 5557672 5557672
|
||||
chr9 5557708 5557708
|
||||
chr7 6013049 6013049
|
||||
chr7 6022626 6022626
|
||||
chr7 6022629 6022629
|
||||
chr7 6026384 6026384
|
||||
chr7 6026775 6026775
|
||||
chr7 6026942 6026942
|
||||
chr7 6026988 6026988
|
||||
chr7 6031102 6031102
|
||||
chr7 6036980 6036980
|
||||
chr7 6038722 6038722
|
||||
chr7 6043386 6043386
|
||||
chr7 6045627 6045627
|
||||
chr17 7578645 7578645
|
||||
chr17 7579472 7579472
|
||||
chr5 7870973 7870973
|
||||
chr10 8111409 8111409
|
||||
chr1 11174331 11174331
|
||||
chr1 11174851 11174851
|
||||
chr1 11181327 11181327
|
||||
chr1 11181457 11181457
|
||||
chr1 11190646 11190646
|
||||
chr1 11190730 11190730
|
||||
chr1 11199518 11199518
|
||||
chr1 11199541 11199541
|
||||
chr1 11205058 11205058
|
||||
chr1 11272468 11272468
|
||||
chr1 11288758 11288758
|
||||
chr1 11300338 11300338
|
||||
chr1 11301714 11301714
|
||||
chr1 11303153 11303153
|
||||
chr1 11303383 11303383
|
||||
chr1 11854457 11854457
|
||||
chr1 11854476 11854476
|
||||
chr1 11856378 11856378
|
||||
chr3 14187449 14187449
|
||||
chr1 17380497 17380497
|
||||
chr2 17938501 17938501
|
||||
chr19 17942005 17942005
|
||||
chr19 17946054 17946054
|
||||
chr19 17952609 17952609
|
||||
chr19 17953321 17953321
|
||||
chr19 17955021 17955021
|
||||
chr1 20915507 20915507
|
||||
chr1 20915531 20915531
|
||||
chr1 20915590 20915590
|
||||
chr1 20915701 20915701
|
||||
chr12 21331599 21331599
|
||||
chr12 21331625 21331625
|
||||
chr9 21968199 21968199
|
||||
chr9 21968712 21968712
|
||||
chr12 25362777 25362777
|
||||
chr12 25368462 25368462
|
||||
chr2 25469502 25469502
|
||||
chr2 25536827 25536827
|
||||
chr4 25666099 25666099
|
||||
chr1 27089690 27089690
|
||||
chr13 28592546 28592546
|
||||
chr13 28608459 28608459
|
||||
chr13 28609825 28609825
|
||||
chr13 28610183 28610183
|
||||
chr13 28622544 28622544
|
||||
chr13 28623699 28623699
|
||||
chr13 28623759 28623759
|
||||
chr13 28624294 28624294
|
||||
chr13 28636084 28636084
|
||||
chr13 28674628 28674628
|
||||
chr22 29104955 29104955
|
||||
chr22 29104959 29104959
|
||||
chr22 29130458 29130458
|
||||
chr2 29416366 29416366
|
||||
chr2 29416481 29416481
|
||||
chr2 29416572 29416572
|
||||
chr2 29420550 29420550
|
||||
chr2 29443617 29443617
|
||||
chr2 29443749 29443749
|
||||
chr2 29444076 29444076
|
||||
chr2 29444095 29444095
|
||||
chr2 29445458 29445458
|
||||
chr2 29445602 29445602
|
||||
chr2 29446178 29446178
|
||||
chr2 29446184 29446184
|
||||
chr2 29446701 29446701
|
||||
chr2 29446721 29446721
|
||||
chr2 29446880 29446880
|
||||
chr2 29447108 29447108
|
||||
chr2 29447253 29447253
|
||||
chr2 29448760 29448760
|
||||
chr2 29449005 29449005
|
||||
chr2 29449526 29449526
|
||||
chr2 29449819 29449819
|
||||
chr2 29455267 29455267
|
||||
chr17 29508775 29508775
|
||||
chr17 29541437 29541437
|
||||
chr2 29543663 29543663
|
||||
chr17 29553485 29553485
|
||||
chr17 29653293 29653293
|
||||
chr17 29663624 29663624
|
||||
chr17 29663625 29663625
|
||||
chr17 29670190 29670190
|
||||
chr17 29679246 29679246
|
||||
chr17 29686024 29686024
|
||||
chr17 29705947 29705947
|
||||
chr2 29940529 29940529
|
||||
chr2 30143499 30143499
|
||||
chr6 32782149 32782149
|
||||
chr6 32782387 32782387
|
||||
chr6 32784645 32784645
|
||||
chr6 32796653 32796653
|
||||
chr6 32796685 32796685
|
||||
chr6 32796751 32796751
|
||||
chr6 32796793 32796793
|
||||
chr6 32797168 32797168
|
||||
chr6 32797297 32797297
|
||||
chr6 32797361 32797361
|
||||
chr6 32797684 32797684
|
||||
chr6 32797773 32797773
|
||||
chr6 32797809 32797809
|
||||
chr6 32797876 32797876
|
||||
chr6 32798608 32798608
|
||||
chr6 32800224 32800224
|
||||
chr6 32800412 32800412
|
||||
chr6 32805470 32805470
|
||||
chr6 32805849 32805849
|
||||
chr6 32814975 32814975
|
||||
chr6 32818774 32818774
|
||||
chr6 32819865 32819865
|
||||
chr6 32821447 32821447
|
||||
chr13 32890572 32890572
|
||||
chr13 32906480 32906480
|
||||
chr13 32906729 32906729
|
||||
chr13 32906980 32906980
|
||||
chr13 32910351 32910351
|
||||
chr13 32910721 32910721
|
||||
chr13 32911463 32911463
|
||||
chr13 32911888 32911888
|
||||
chr13 32913055 32913055
|
||||
chr13 32915005 32915005
|
||||
chr13 32929232 32929232
|
||||
chr13 32929387 32929387
|
||||
chr13 32936646 32936646
|
||||
chr15 32965139 32965139
|
||||
chr15 32965147 32965147
|
||||
chr15 32981978 32981978
|
||||
chr17 33433487 33433487
|
||||
chr17 33445549 33445549
|
||||
chr20 36030939 36030939
|
||||
chr20 36031097 36031097
|
||||
chr3 37067050 37067050
|
||||
chr3 37083740 37083740
|
||||
chr21 37518706 37518706
|
||||
chr17 37868715 37868715
|
||||
chr17 37879588 37879588
|
||||
chr17 37879762 37879762
|
||||
chr17 37884037 37884037
|
||||
chr8 38286811 38286811
|
||||
chr2 38298150 38298150
|
||||
chr2 38298203 38298203
|
||||
chr17 41223094 41223094
|
||||
chr17 41234470 41234470
|
||||
chr17 41244000 41244000
|
||||
chr17 41244435 41244435
|
||||
chr17 41244936 41244936
|
||||
chr17 41244982 41244982
|
||||
chr17 41245237 41245237
|
||||
chr17 41245466 41245466
|
||||
chr17 41267763 41267763
|
||||
chr17 41277187 41277187
|
||||
chr3 41280827 41280827
|
||||
chr6 41903782 41903782
|
||||
chr22 42526694 42526694
|
||||
chr10 43572832 43572832
|
||||
chr10 43595968 43595968
|
||||
chr10 43600689 43600689
|
||||
chr10 43604950 43604950
|
||||
chr10 43605247 43605247
|
||||
chr10 43605392 43605392
|
||||
chr10 43605860 43605860
|
||||
chr10 43605902 43605902
|
||||
chr10 43606687 43606687
|
||||
chr10 43606856 43606856
|
||||
chr10 43607756 43607756
|
||||
chr10 43607760 43607760
|
||||
chr10 43610366 43610366
|
||||
chr10 43610455 43610455
|
||||
chr10 43610558 43610558
|
||||
chr10 43611708 43611708
|
||||
chr10 43611865 43611865
|
||||
chr10 43613843 43613843
|
||||
chr10 43615505 43615505
|
||||
chr10 43622217 43622217
|
||||
chr20 43961971 43961971
|
||||
chr20 43961997 43961997
|
||||
chr20 43963047 43963047
|
||||
chr20 43963489 43963489
|
||||
chr20 43963735 43963735
|
||||
chr20 43963739 43963739
|
||||
chr20 43964288 43964288
|
||||
chr20 43964407 43964407
|
||||
chr19 44055726 44055726
|
||||
chr22 44324676 44324676
|
||||
chr22 44324727 44324727
|
||||
chr22 44324730 44324730
|
||||
chr14 45606287 45606287
|
||||
chr14 45642287 45642287
|
||||
chr14 45644589 45644589
|
||||
chr14 45650900 45650900
|
||||
chr14 45652938 45652938
|
||||
chr14 45658156 45658156
|
||||
chr14 45664721 45664721
|
||||
chr14 45665468 45665468
|
||||
chr1 45796269 45796269
|
||||
chr1 45797505 45797505
|
||||
chr1 45798555 45798555
|
||||
chr19 45854919 45854919
|
||||
chr19 45867259 45867259
|
||||
chr19 45912736 45912736
|
||||
chr19 45923653 45923653
|
||||
chr1 46726951 46726951
|
||||
chr1 46736386 46736386
|
||||
chr1 46740268 46740268
|
||||
chr1 46743900 46743900
|
||||
chr21 46957794 46957794
|
||||
chr2 47601106 47601106
|
||||
chr2 47604176 47604176
|
||||
chr2 47613768 47613768
|
||||
chr2 47630550 47630550
|
||||
chr2 47637337 47637337
|
||||
chr2 47693959 47693959
|
||||
chr2 47703500 47703500
|
||||
chr2 47739551 47739551
|
||||
chr2 48010488 48010488
|
||||
chr2 48029894 48029894
|
||||
chr2 48030692 48030692
|
||||
chr2 48030838 48030838
|
||||
chr18 48577091 48577091
|
||||
chr18 48577782 48577782
|
||||
chr13 49051012 49051012
|
||||
chr13 49051481 49051481
|
||||
chr19 50902164 50902164
|
||||
chr19 50902331 50902331
|
||||
chr19 50905042 50905042
|
||||
chr19 50905074 50905074
|
||||
chr19 50905089 50905089
|
||||
chr19 50905189 50905189
|
||||
chr19 50909765 50909765
|
||||
chr19 50919797 50919797
|
||||
chr19 50919828 50919828
|
||||
chr15 51502844 51502844
|
||||
chr15 51630835 51630835
|
||||
chr15 51631279 51631279
|
||||
chr3 52439508 52439508
|
||||
chr2 54395247 54395247
|
||||
chr4 55130078 55130078
|
||||
chr4 55130154 55130154
|
||||
chr4 55133726 55133726
|
||||
chr4 55139771 55139771
|
||||
chr4 55141055 55141055
|
||||
chr4 55143577 55143577
|
||||
chr4 55152040 55152040
|
||||
chr4 55161254 55161254
|
||||
chr4 55161391 55161391
|
||||
chr7 55214348 55214348
|
||||
chr7 55214443 55214443
|
||||
chr7 55221655 55221655
|
||||
chr7 55229255 55229255
|
||||
chr7 55238087 55238087
|
||||
chr7 55238874 55238874
|
||||
chr7 55249063 55249063
|
||||
chr7 55266417 55266417
|
||||
chr7 55268916 55268916
|
||||
chr4 55602765 55602765
|
||||
chr4 55948108 55948108
|
||||
chr4 55961159 55961159
|
||||
chr4 55968053 55968053
|
||||
chr4 55968651 55968651
|
||||
chr4 55972974 55972974
|
||||
chr12 56477694 56477694
|
||||
chr12 56491740 56491740
|
||||
chr12 56493822 56493822
|
||||
chr12 56494991 56494991
|
||||
chr17 56769979 56769979
|
||||
chr20 57600655 57600655
|
||||
chr12 58145156 58145156
|
||||
chr2 58388696 58388696
|
||||
chr17 59760996 59760996
|
||||
chr17 59763114 59763114
|
||||
chr17 59763347 59763347
|
||||
chr17 59763465 59763465
|
||||
chr17 59808391 59808391
|
||||
chr17 59857809 59857809
|
||||
chr17 59859299 59859299
|
||||
chr17 59892541 59892541
|
||||
chr1 65310489 65310489
|
||||
chr1 65311262 65311262
|
||||
chr1 65321388 65321388
|
||||
chr1 65325970 65325970
|
||||
chr1 65330682 65330682
|
||||
chr1 65351896 65351896
|
||||
chr15 66745890 66745890
|
||||
chr15 66774267 66774267
|
||||
chr15 66782048 66782048
|
||||
chr14 68331675 68331675
|
||||
chr16 68771372 68771372
|
||||
chr16 68857441 68857441
|
||||
chr16 68862165 68862165
|
||||
chr14 69006913 69006913
|
||||
chr14 69006922 69006922
|
||||
chr16 69143577 69143577
|
||||
chr14 69149814 69149814
|
||||
chr8 69389217 69389217
|
||||
chr11 69462910 69462910
|
||||
chr16 69745145 69745145
|
||||
chr15 75012985 75012985
|
||||
chr14 75483812 75483812
|
||||
chr14 75485489 75485489
|
||||
chr14 75497239 75497239
|
||||
chr14 75513534 75513534
|
||||
chr14 75513828 75513828
|
||||
chr14 75513883 75513883
|
||||
chr9 80409345 80409345
|
||||
chr7 80532112 80532112
|
||||
chr9 86900926 86900926
|
||||
chr7 87138645 87138645
|
||||
chr7 87160561 87160561
|
||||
chr7 87160618 87160618
|
||||
chr7 87171152 87171152
|
||||
chr7 87179601 87179601
|
||||
chr7 87180198 87180198
|
||||
chr9 87359220 87359220
|
||||
chr9 87360107 87360107
|
||||
chr9 87360111 87360111
|
||||
chr9 87360153 87360153
|
||||
chr9 87360720 87360720
|
||||
chr9 87481287 87481287
|
||||
chr9 87483094 87483094
|
||||
chr15 88423463 88423463
|
||||
chr15 88476365 88476365
|
||||
chr15 88479504 88479504
|
||||
chr15 88479928 88479928
|
||||
chr15 88480751 88480751
|
||||
chr15 88481623 88481623
|
||||
chr15 88481913 88481913
|
||||
chr15 88482810 88482810
|
||||
chr15 88483456 88483456
|
||||
chr15 88483594 88483594
|
||||
chr15 88575886 88575886
|
||||
chr15 88576185 88576185
|
||||
chr10 88648670 88648670
|
||||
chr10 88648672 88648672
|
||||
chr15 88679785 88679785
|
||||
chr4 89052323 89052323
|
||||
chr4 89052361 89052361
|
||||
chr10 89624218 89624218
|
||||
chr16 89805261 89805261
|
||||
chr16 89805914 89805914
|
||||
chr16 89805977 89805977
|
||||
chr16 89807233 89807233
|
||||
chr16 89809171 89809171
|
||||
chr16 89809319 89809319
|
||||
chr16 89816314 89816314
|
||||
chr16 89816333 89816333
|
||||
chr16 89825065 89825065
|
||||
chr16 89825157 89825157
|
||||
chr16 89828437 89828437
|
||||
chr16 89831243 89831243
|
||||
chr16 89831520 89831520
|
||||
chr16 89836323 89836323
|
||||
chr16 89838078 89838078
|
||||
chr16 89839766 89839766
|
||||
chr16 89845194 89845194
|
||||
chr16 89845287 89845287
|
||||
chr16 89849480 89849480
|
||||
chr16 89857935 89857935
|
||||
chr16 89857964 89857964
|
||||
chr16 89858417 89858417
|
||||
chr16 89858505 89858505
|
||||
chr16 89858525 89858525
|
||||
chr16 89866043 89866043
|
||||
chr16 89869761 89869761
|
||||
chr16 89882807 89882807
|
||||
chr16 89882826 89882826
|
||||
chr15 90628537 90628537
|
||||
chr8 90948273 90948273
|
||||
chr8 90955253 90955253
|
||||
chr8 90958422 90958422
|
||||
chr8 90958530 90958530
|
||||
chr8 90967711 90967711
|
||||
chr8 90970935 90970935
|
||||
chr8 90982803 90982803
|
||||
chr8 90990479 90990479
|
||||
chr8 90995019 90995019
|
||||
chr7 92244422 92244422
|
||||
chr10 96798524 96798524
|
||||
chr1 97915624 97915624
|
||||
chr1 97981395 97981395
|
||||
chr1 98348885 98348885
|
||||
chr7 99382096 99382096
|
||||
chr10 101542578 101542578
|
||||
chr11 103418158 103418158
|
||||
chr11 103418177 103418177
|
||||
chr11 103418196 103418196
|
||||
chr10 104897985 104897985
|
||||
chr14 105239894 105239894
|
||||
chr14 105246407 105246407
|
||||
chr11 108114632 108114632
|
||||
chr11 108128353 108128353
|
||||
chr11 108137867 108137867
|
||||
chr11 108139120 108139120
|
||||
chr11 108159732 108159732
|
||||
chr11 108183167 108183167
|
||||
chr11 108192078 108192078
|
||||
chr11 108236783 108236783
|
||||
chr2 111885244 111885244
|
||||
chr5 112162854 112162854
|
||||
chr5 112164561 112164561
|
||||
chr5 112175770 112175770
|
||||
chr5 112176325 112176325
|
||||
chr5 112176559 112176559
|
||||
chr5 112176756 112176756
|
||||
chr5 112177171 112177171
|
||||
chr6 114265587 114265587
|
||||
chr6 114281236 114281236
|
||||
chr10 115438204 115438204
|
||||
chr10 115439530 115439530
|
||||
chr10 115439569 115439569
|
||||
chr10 115489152 115489152
|
||||
chr10 115489167 115489167
|
||||
chr10 115489589 115489589
|
||||
chr10 115489650 115489650
|
||||
chr7 116335811 116335811
|
||||
chr7 116339672 116339672
|
||||
chr7 116340262 116340262
|
||||
chr7 116397572 116397572
|
||||
chr7 116435768 116435768
|
||||
chr7 116436022 116436022
|
||||
chr7 116436097 116436097
|
||||
chr6 117622184 117622184
|
||||
chr6 117622188 117622188
|
||||
chr6 117622233 117622233
|
||||
chr6 117639419 117639419
|
||||
chr6 117641016 117641016
|
||||
chr6 117641246 117641246
|
||||
chr6 117641330 117641330
|
||||
chr6 117641474 117641474
|
||||
chr6 117641819 117641819
|
||||
chr6 117642418 117642418
|
||||
chr6 117642495 117642495
|
||||
chr6 117643433 117643433
|
||||
chr6 117643659 117643659
|
||||
chr6 117643897 117643897
|
||||
chr6 117643994 117643994
|
||||
chr6 117644054 117644054
|
||||
chr6 117644132 117644132
|
||||
chr6 117644841 117644841
|
||||
chr6 117644866 117644866
|
||||
chr6 117645041 117645041
|
||||
chr6 117645239 117645239
|
||||
chr6 117645604 117645604
|
||||
chr6 117646289 117646289
|
||||
chr6 117646380 117646380
|
||||
chr6 117646772 117646772
|
||||
chr6 117648064 117648064
|
||||
chr6 117648340 117648340
|
||||
chr6 117649173 117649173
|
||||
chr6 117649988 117649988
|
||||
chr6 117650131 117650131
|
||||
chr6 117658193 117658193
|
||||
chr6 117662682 117662682
|
||||
chr6 117678083 117678083
|
||||
chr6 117686943 117686943
|
||||
chr6 117710661 117710661
|
||||
chr6 117724462 117724462
|
||||
chr6 117725448 117725448
|
||||
chr6 117725578 117725578
|
||||
chr6 117730819 117730819
|
||||
chr10 123239112 123239112
|
||||
chr10 123247644 123247644
|
||||
chr10 123263478 123263478
|
||||
chr10 123274846 123274846
|
||||
chr10 123298158 123298158
|
||||
chr10 123310871 123310871
|
||||
chr3 124456742 124456742
|
||||
chr11 125514573 125514573
|
||||
chr11 125525195 125525195
|
||||
chr7 128845088 128845088
|
||||
chr7 128846207 128846207
|
||||
chr7 128846328 128846328
|
||||
chr7 128846469 128846469
|
||||
chr5 131892979 131892979
|
||||
chr5 131915213 131915213
|
||||
chr5 131923393 131923393
|
||||
chr12 133202215 133202215
|
||||
chr12 133208886 133208886
|
||||
chr12 133208979 133208979
|
||||
chr12 133210985 133210985
|
||||
chr12 133212582 133212582
|
||||
chr12 133214768 133214768
|
||||
chr12 133218277 133218277
|
||||
chr12 133219831 133219831
|
||||
chr12 133219989 133219989
|
||||
chr12 133233705 133233705
|
||||
chr12 133236000 133236000
|
||||
chr12 133238076 133238076
|
||||
chr12 133240782 133240782
|
||||
chr12 133250118 133250118
|
||||
chr12 133250197 133250197
|
||||
chr12 133253995 133253995
|
||||
chr12 133257887 133257887
|
||||
chr12 133263825 133263825
|
||||
chr9 135781239 135781239
|
||||
chr5 138132255 138132255
|
||||
chr9 139391338 139391338
|
||||
chr9 139391543 139391543
|
||||
chr9 139391636 139391636
|
||||
chr9 139397707 139397707
|
||||
chr9 139402663 139402663
|
||||
chr9 139405261 139405261
|
||||
chr9 139407932 139407932
|
||||
chr9 139412197 139412197
|
||||
chr9 139418260 139418260
|
||||
chr7 140426257 140426257
|
||||
chr7 140434463 140434463
|
||||
chr7 140449071 140449071
|
||||
chr7 140449150 140449150
|
||||
chr3 142168331 142168331
|
||||
chr3 142178144 142178144
|
||||
chr3 142215178 142215178
|
||||
chr3 142222284 142222284
|
||||
chr3 142277427 142277427
|
||||
chr3 142277536 142277536
|
||||
chr3 142277575 142277575
|
||||
chr3 142281612 142281612
|
||||
chr5 149433857 149433857
|
||||
chr5 149435759 149435759
|
||||
chr5 149439458 149439458
|
||||
chr5 149450132 149450132
|
||||
chr5 149456811 149456811
|
||||
chr5 149457678 149457678
|
||||
chr5 149460343 149460343
|
||||
chr5 149460553 149460553
|
||||
chr5 149495537 149495537
|
||||
chr5 149499672 149499672
|
||||
chr5 149500427 149500427
|
||||
chr5 149513626 149513626
|
||||
chr5 149782420 149782420
|
||||
chr5 149783085 149783085
|
||||
chr5 149783694 149783694
|
||||
chr5 149783879 149783879
|
||||
chr7 152351316 152351316
|
||||
chr4 153252061 153252061
|
||||
chr1 156785617 156785617
|
||||
chr1 156843264 156843264
|
||||
chr1 156844979 156844979
|
||||
chr1 156845095 156845095
|
||||
chr1 156845848 156845848
|
||||
chr1 156846233 156846233
|
||||
chr1 156848995 156848995
|
||||
chr1 156849780 156849780
|
||||
chr6 159189454 159189454
|
||||
chr6 159190019 159190019
|
||||
chr6 159190184 159190184
|
||||
chr6 159191788 159191788
|
||||
chr6 160113872 160113872
|
||||
chr1 162737116 162737116
|
||||
chr1 162740327 162740327
|
||||
chr1 162743418 162743418
|
||||
chr3 178922274 178922274
|
||||
chr3 178938747 178938747
|
||||
chr3 178942431 178942431
|
||||
chr1 204501383 204501383
|
||||
chr1 204512100 204512100
|
||||
chr1 204516025 204516025
|
||||
chr2 212251864 212251864
|
||||
chr2 212652698 212652698
|
||||
chr2 215593233 215593233
|
||||
chr2 215595645 215595645
|
||||
chr2 215632192 215632192
|
||||
chr2 215632255 215632255
|
||||
chr2 215632256 215632256
|
||||
chr2 215634055 215634055
|
||||
chr2 215645464 215645464
|
||||
chr2 215645545 215645545
|
||||
chr2 215674224 215674224
|
||||
chr2 215674323 215674323
|
||||
chr2 216212339 216212339
|
||||
chr2 234669144 234669144
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,128 @@
|
|||
chr1 2488153 2488153
|
||||
chr1 23885599 23885599
|
||||
chr1 65321388 65321388
|
||||
chr1 120612006 120612006
|
||||
chr1 193094375 193094375
|
||||
chr1 226570840 226570840
|
||||
chr2 47693959 47693959
|
||||
chr2 48030838 48030838
|
||||
chr2 61749716 61749716
|
||||
chr2 141032088 141032088
|
||||
chr2 215645464 215645464
|
||||
chr3 30686414 30686414
|
||||
chr3 47125385 47125385
|
||||
chr3 134644636 134644636
|
||||
chr3 142281612 142281612
|
||||
chr3 192053274 192053274
|
||||
chr4 55599436 55599436
|
||||
chr4 143007419 143007419
|
||||
chr4 153252061 153252061
|
||||
chr4 185310218 185310218
|
||||
chr4 25666099 25666099
|
||||
chr5 35857177 35857177
|
||||
chr5 38955796 38955796
|
||||
chr5 39074296 39074296
|
||||
chr5 67522722 67522722
|
||||
chr5 112162854 112162854
|
||||
chr5 149435759 149435759
|
||||
chr5 161119125 161119125
|
||||
chr5 170819887 170819887
|
||||
chr5 180046344 180046344
|
||||
chr6 30858857 30858857
|
||||
chr6 32797876 32797876
|
||||
chr6 36645696 36645696
|
||||
chr6 117678083 117678083
|
||||
chr6 117714346 117714346
|
||||
chr7 2946461 2946461
|
||||
chr7 2962753 2962753
|
||||
chr7 87138645 87138645
|
||||
chr7 151970931 151970931
|
||||
chr8 90955624 90955624
|
||||
chr8 90970935 90970935
|
||||
chr8 90995019 90995019
|
||||
chr8 68864728 68864728
|
||||
chr8 38322346 38322346
|
||||
chr8 145742879 145742879
|
||||
chr9 5081780 5081780
|
||||
chr9 8389364 8389364
|
||||
chr9 93657761 93657761
|
||||
chr9 98209594 98209594
|
||||
chr9 21991923 21991923
|
||||
chr9 37020622 37020622
|
||||
chr9 21975017 21975017
|
||||
chr10 43600689 43600689
|
||||
chr10 89720907 89720907
|
||||
chr10 104596981 104596981
|
||||
chr10 104849468 104849468
|
||||
chr10 104855656 104855656
|
||||
chr10 123239112 123239112
|
||||
chr11 32410774 32410774
|
||||
chr11 69462910 69462910
|
||||
chr11 94197260 94197260
|
||||
chr11 94225807 94225807
|
||||
chr11 94225920 94225920
|
||||
chr11 125497466 125497466
|
||||
chr12 1040373 1040373
|
||||
chr12 4553383 4553383
|
||||
chr12 6711147 6711147
|
||||
chr12 21331625 21331625
|
||||
chr12 49427652 49427652
|
||||
chr12 49444545 49444545
|
||||
chr12 121416622 121416622
|
||||
chr12 133202215 133202215
|
||||
chr13 21562948 21562948
|
||||
chr13 28609825 28609825
|
||||
chr13 32936646 32936646
|
||||
chr13 113889474 113889474
|
||||
chr13 113907391 113907391
|
||||
chr14 105239894 105239894
|
||||
chr14 105246325 105246325
|
||||
chr14 38064215 38064215
|
||||
chr14 20820537 20820537
|
||||
chr14 35871217 35871217
|
||||
chr15 41865488 41865488
|
||||
chr15 51502986 51502986
|
||||
chr15 51529112 51529112
|
||||
chr15 99456253 99456253
|
||||
chr15 99478225 99478225
|
||||
chr15 51558731 51558731
|
||||
chr16 68857441 68857441
|
||||
chr16 359953 359953
|
||||
chr16 89805977 89805977
|
||||
chr16 89838078 89838078
|
||||
chr16 89857964 89857964
|
||||
chr17 7983969 7983969
|
||||
chr17 29486152 29486152
|
||||
chr17 29508775 29508775
|
||||
chr17 29546175 29546175
|
||||
chr17 29559932 29559932
|
||||
chr17 29653293 29653293
|
||||
chr17 37879762 37879762
|
||||
chr17 62007498 62007498
|
||||
chr17 78919558 78919558
|
||||
chr19 3110349 3110349
|
||||
chr19 3119239 3119239
|
||||
chr19 4101062 4101062
|
||||
chr19 11136215 11136215
|
||||
chr19 15289613 15289613
|
||||
chr19 15295134 15295134
|
||||
chr19 41725271 41725271
|
||||
chr20 43956527 43956527
|
||||
chr20 43956636 43956636
|
||||
chr20 43958850 43958850
|
||||
chr20 43958872 43958872
|
||||
chr20 54959296 54959296
|
||||
chr20 54961541 54961541
|
||||
chr21 37518706 37518706
|
||||
chr21 39752673 39752673
|
||||
chr21 39753375 39753375
|
||||
chr22 41568480 41568480
|
||||
chr22 30038152 30038152
|
||||
chr22 30079213 30079213
|
||||
chrX 76937963 76937963
|
||||
chrX 39922359 39922359
|
||||
chrX 39932907 39932907
|
||||
chrX 44938563 44938563
|
||||
chrX 100608191 100608191
|
||||
chrX 100611285 100611285
|
||||
chrX 76940534 76940534
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,128 @@
|
|||
chr1 2488153 2488153
|
||||
chr1 23885599 23885599
|
||||
chr1 65321388 65321388
|
||||
chr1 120612006 120612006
|
||||
chr1 193094375 193094375
|
||||
chr1 226570840 226570840
|
||||
chr2 47693959 47693959
|
||||
chr2 48030838 48030838
|
||||
chr2 61749716 61749716
|
||||
chr2 141032088 141032088
|
||||
chr2 215645464 215645464
|
||||
chr3 30686414 30686414
|
||||
chr3 47125385 47125385
|
||||
chr3 134644636 134644636
|
||||
chr3 142281612 142281612
|
||||
chr3 192053274 192053274
|
||||
chr4 55599436 55599436
|
||||
chr4 143007419 143007419
|
||||
chr4 153252061 153252061
|
||||
chr4 185310218 185310218
|
||||
chr4 25666099 25666099
|
||||
chr5 35857177 35857177
|
||||
chr5 38955796 38955796
|
||||
chr5 39074296 39074296
|
||||
chr5 67522722 67522722
|
||||
chr5 112162854 112162854
|
||||
chr5 149435759 149435759
|
||||
chr5 161119125 161119125
|
||||
chr5 170819887 170819887
|
||||
chr5 180046344 180046344
|
||||
chr6 30858857 30858857
|
||||
chr6 32797876 32797876
|
||||
chr6 36645696 36645696
|
||||
chr6 117678083 117678083
|
||||
chr6 117714346 117714346
|
||||
chr7 2946461 2946461
|
||||
chr7 2962753 2962753
|
||||
chr7 87138645 87138645
|
||||
chr7 151970931 151970931
|
||||
chr8 90955624 90955624
|
||||
chr8 90970935 90970935
|
||||
chr8 90995019 90995019
|
||||
chr8 68864728 68864728
|
||||
chr8 38322346 38322346
|
||||
chr8 145742879 145742879
|
||||
chr9 5081780 5081780
|
||||
chr9 8389364 8389364
|
||||
chr9 93657761 93657761
|
||||
chr9 98209594 98209594
|
||||
chr9 21991923 21991923
|
||||
chr9 37020622 37020622
|
||||
chr9 21975017 21975017
|
||||
chr10 43600689 43600689
|
||||
chr10 89720907 89720907
|
||||
chr10 104596981 104596981
|
||||
chr10 104849468 104849468
|
||||
chr10 104855656 104855656
|
||||
chr10 123239112 123239112
|
||||
chr11 32410774 32410774
|
||||
chr11 69462910 69462910
|
||||
chr11 94197260 94197260
|
||||
chr11 94225807 94225807
|
||||
chr11 94225920 94225920
|
||||
chr11 125497466 125497466
|
||||
chr12 1040373 1040373
|
||||
chr12 4553383 4553383
|
||||
chr12 6711147 6711147
|
||||
chr12 21331625 21331625
|
||||
chr12 49427652 49427652
|
||||
chr12 49444545 49444545
|
||||
chr12 121416622 121416622
|
||||
chr12 133202215 133202215
|
||||
chr13 21562948 21562948
|
||||
chr13 28609825 28609825
|
||||
chr13 32936646 32936646
|
||||
chr13 113889474 113889474
|
||||
chr13 113907391 113907391
|
||||
chr14 105239894 105239894
|
||||
chr14 105246325 105246325
|
||||
chr14 38064215 38064215
|
||||
chr14 20820537 20820537
|
||||
chr14 35871217 35871217
|
||||
chr15 41865488 41865488
|
||||
chr15 51502986 51502986
|
||||
chr15 51529112 51529112
|
||||
chr15 99456253 99456253
|
||||
chr15 99478225 99478225
|
||||
chr15 51558731 51558731
|
||||
chr16 68857441 68857441
|
||||
chr16 359953 359953
|
||||
chr16 89805977 89805977
|
||||
chr16 89838078 89838078
|
||||
chr16 89857964 89857964
|
||||
chr17 7983969 7983969
|
||||
chr17 29486152 29486152
|
||||
chr17 29508775 29508775
|
||||
chr17 29546175 29546175
|
||||
chr17 29559932 29559932
|
||||
chr17 29653293 29653293
|
||||
chr17 37879762 37879762
|
||||
chr17 62007498 62007498
|
||||
chr17 78919558 78919558
|
||||
chr19 3110349 3110349
|
||||
chr19 3119239 3119239
|
||||
chr19 4101062 4101062
|
||||
chr19 11136215 11136215
|
||||
chr19 15289613 15289613
|
||||
chr19 15295134 15295134
|
||||
chr19 41725271 41725271
|
||||
chr20 43956527 43956527
|
||||
chr20 43956636 43956636
|
||||
chr20 43958850 43958850
|
||||
chr20 43958872 43958872
|
||||
chr20 54959296 54959296
|
||||
chr20 54961541 54961541
|
||||
chr21 37518706 37518706
|
||||
chr21 39752673 39752673
|
||||
chr21 39753375 39753375
|
||||
chr22 41568480 41568480
|
||||
chr22 30038152 30038152
|
||||
chr22 30079213 30079213
|
||||
chrX 76937963 76937963
|
||||
chrX 39922359 39922359
|
||||
chrX 39932907 39932907
|
||||
chrX 44938563 44938563
|
||||
chrX 100608191 100608191
|
||||
chrX 100611285 100611285
|
||||
chrX 76940534 76940534
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
chr1 2488153 2488153
|
||||
chr1 23885599 23885599
|
||||
chr1 65321388 65321388
|
||||
chr1 120612006 120612006
|
||||
chr1 193094375 193094375
|
||||
chr1 226570840 226570840
|
||||
chr2 47693959 47693959
|
||||
chr2 48030838 48030838
|
||||
chr2 61749716 61749716
|
||||
chr2 141032088 141032088
|
||||
chr2 215645464 215645464
|
||||
chr3 30686414 30686414
|
||||
chr3 47125385 47125385
|
||||
chr3 134644636 134644636
|
||||
chr3 142281612 142281612
|
||||
chr3 192053274 192053274
|
||||
chr4 55599436 55599436
|
||||
chr4 143007419 143007419
|
||||
chr4 153252061 153252061
|
||||
chr4 185310218 185310218
|
||||
chr4 25666099 25666099
|
||||
chr5 35857177 35857177
|
||||
chr5 38955796 38955796
|
||||
chr5 39074296 39074296
|
||||
chr5 67522722 67522722
|
||||
chr5 112162854 112162854
|
||||
chr5 149435759 149435759
|
||||
chr5 161119125 161119125
|
||||
chr5 170819887 170819887
|
||||
chr5 180046344 180046344
|
||||
chr6 30858857 30858857
|
||||
chr6 32797876 32797876
|
||||
chr6 36645696 36645696
|
||||
chr6 117678083 117678083
|
||||
chr6 117714346 117714346
|
||||
chr7 2946461 2946461
|
||||
chr7 2962753 2962753
|
||||
chr7 87138645 87138645
|
||||
chr7 151970931 151970931
|
||||
chr8 90955624 90955624
|
||||
chr8 90970935 90970935
|
||||
chr8 90995019 90995019
|
||||
chr8 68864728 68864728
|
||||
chr8 38322346 38322346
|
||||
chr8 145742879 145742879
|
||||
chr9 5081780 5081780
|
||||
chr9 8389364 8389364
|
||||
chr9 93657761 93657761
|
||||
chr9 98209594 98209594
|
||||
chr9 21991923 21991923
|
||||
chr9 37020622 37020622
|
||||
chr9 21975017 21975017
|
||||
chr10 43600689 43600689
|
||||
chr10 89720907 89720907
|
||||
chr10 104596981 104596981
|
||||
chr10 104849468 104849468
|
||||
chr10 104855656 104855656
|
||||
chr10 123239112 123239112
|
||||
chr11 32410774 32410774
|
||||
chr11 69462910 69462910
|
||||
chr11 94197260 94197260
|
||||
chr11 94225807 94225807
|
||||
chr11 94225920 94225920
|
||||
chr11 125497466 125497466
|
||||
chr12 1040373 1040373
|
||||
chr12 4553383 4553383
|
||||
chr12 6711147 6711147
|
||||
chr12 21331625 21331625
|
||||
chr12 49427652 49427652
|
||||
chr12 49444545 49444545
|
||||
chr12 121416622 121416622
|
||||
chr12 133202215 133202215
|
||||
chr13 21562948 21562948
|
||||
chr13 28609825 28609825
|
||||
chr13 32936646 32936646
|
||||
chr13 113889474 113889474
|
||||
chr13 113907391 113907391
|
||||
chr14 105239894 105239894
|
||||
chr14 105246325 105246325
|
||||
chr14 38064215 38064215
|
||||
chr14 20820537 20820537
|
||||
chr14 35871217 35871217
|
||||
chr15 41865488 41865488
|
||||
chr15 51502986 51502986
|
||||
chr15 51529112 51529112
|
||||
chr15 99456253 99456253
|
||||
chr15 99478225 99478225
|
||||
chr15 51558731 51558731
|
||||
chr16 68857441 68857441
|
||||
chr16 359953 359953
|
||||
chr16 89805977 89805977
|
||||
chr16 89838078 89838078
|
||||
chr16 89857964 89857964
|
||||
chr17 7983969 7983969
|
||||
chr17 29486152 29486152
|
||||
chr17 29508775 29508775
|
||||
chr17 29546175 29546175
|
||||
chr17 29559932 29559932
|
||||
chr17 29653293 29653293
|
||||
chr17 37879762 37879762
|
||||
chr17 62007498 62007498
|
||||
chr17 78919558 78919558
|
||||
chr19 3110349 3110349
|
||||
chr19 3119239 3119239
|
||||
chr19 4101062 4101062
|
||||
chr19 11136215 11136215
|
||||
chr19 15289613 15289613
|
||||
chr19 15295134 15295134
|
||||
chr19 41725271 41725271
|
||||
chr20 43956527 43956527
|
||||
chr20 43956636 43956636
|
||||
chr20 43958850 43958850
|
||||
chr20 43958872 43958872
|
||||
chr20 54959296 54959296
|
||||
chr20 54961541 54961541
|
||||
chr21 37518706 37518706
|
||||
chr21 39752673 39752673
|
||||
chr21 39753375 39753375
|
||||
chr22 41568480 41568480
|
||||
chr22 30038152 30038152
|
||||
chr22 30079213 30079213
|
||||
chrX 76937963 76937963
|
||||
chrX 39922359 39922359
|
||||
chrX 39932907 39932907
|
||||
chrX 44938563 44938563
|
||||
chrX 100608191 100608191
|
||||
chrX 100611285 100611285
|
||||
chrX 76940534 76940534
|
||||
|
|
@ -5,6 +5,15 @@ import os
|
|||
|
||||
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
|
||||
|
||||
|
||||
def environ_test():
|
||||
workflow_path = os.environ.get('WORKFLOW', None)
|
||||
public_path = os.environ.get('PUBLIC', None)
|
||||
database_path = os.environ.get('DATABASE', None)
|
||||
if not (workflow_path and public_path and database_path):
|
||||
raise UserWarning('未设置WORKFLOW, PUBLIC, DATABASE环境')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description="JM to run pipeline")
|
||||
|
||||
|
|
@ -20,10 +29,13 @@ if __name__ == '__main__':
|
|||
parser.add_argument('-node', '--start_node',
|
||||
help="node begain to run; 'addQc', 'addAlignment', "
|
||||
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
|
||||
" 'addHcs, addTmb, addAutoReport' and also run more than one node ,like this 'addTarget,addFusion'",
|
||||
" 'addHcs, addTmb, addAutoReport' and also run more than one "
|
||||
"node ,like this 'addTarget,addFusion'",
|
||||
default='addQc')
|
||||
args = parser.parse_args()
|
||||
|
||||
environ_test()
|
||||
|
||||
res_path = os.path.realpath(os.path.join(args.output_dir, args.barcode))
|
||||
|
||||
if not os.path.exists(res_path):
|
||||
|
|
@ -36,6 +48,6 @@ if __name__ == '__main__':
|
|||
f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \
|
||||
f'> {res_path}/{args.barcode}_run.log ' \
|
||||
f'2>> {res_path}/{args.barcode}_run.log &'
|
||||
with open(os.path.join(res_path, 'exec'), 'w') as execfile:
|
||||
execfile.write(cmd + '\n')
|
||||
# with open(os.path.join(res_path, 'exec'), 'w') as execfile:
|
||||
# execfile.write(cmd + '\n')
|
||||
os.system(cmd)
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ def get_catecode(project, start_node='addQc'):
|
|||
}
|
||||
|
||||
runcode = get_branch_nodes(dag, start_node)
|
||||
public = os.environ.get('PUBLIC', '/home/zhangchao/project/pipeline/workflow/script/public')
|
||||
public = os.environ.get('DATABASE')
|
||||
path = os.path.join(public, 'info.csv')
|
||||
info = pd.read_csv(path)
|
||||
project = info[info['project'] == project]['CateCode']
|
||||
|
|
@ -83,15 +83,16 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
|
|||
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
|
||||
|
||||
# run pipeline
|
||||
cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
|
||||
cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public'
|
||||
# cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
|
||||
# cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public'
|
||||
cmd3 = f'cd {output_dir}'
|
||||
|
||||
# f'{"-Dcall-caching.enabled=false " if uncache else ""}'
|
||||
# f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \
|
||||
|
||||
cmd4 = f'/usr/bin/java -jar /home/zhangchao/project/pipeline/workflow/software/cromwell-51.jar run --inputs {jsfile_path} {wdl}'
|
||||
cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
|
||||
cmd4 = f'/usr/bin/java -jar $WORKFLOW/software/cromwell-51.jar run --inputs {jsfile_path} {wdl}'
|
||||
# cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
|
||||
cmd = f'{cmd3}; {cmd4}'
|
||||
|
||||
# 记录开始时间
|
||||
start_time = time.time()
|
||||
|
|
@ -128,7 +129,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('-node', '--start_node',
|
||||
help="node begain to run; 'addQc', 'addAlignment', "
|
||||
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
|
||||
" 'addHcs, 'addTmb', addAutoReport' and also run more than one node ,like this 'addTarget,addFusion'",
|
||||
" 'addHcs, 'addTmb', addAutoReport' and also run more than one node ,"
|
||||
"like this 'addTarget,addFusion'",
|
||||
default='addQc')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
# 目录环境
|
||||
export WORKFLOW=/home/zhangchao/project/pipeline/workflow
|
||||
export PUBLIC=$WORKFLOW/script/public
|
||||
export DATABASE=$WORKFLOW/database
|
||||
|
||||
# python
|
||||
export PATH=/dataseq/product/workflow/software/Python-3.8.11/bin:$PATH
|
||||
|
||||
# script
|
||||
export PATH=$WORKFLOW/script:$PATH
|
||||
|
||||
# software
|
||||
export PATH=$WORKFLOW/software/bin:$PATH
|
||||
|
|
@ -9,7 +9,7 @@ my ($input, $output, $cancer_type) = @ARGV;
|
|||
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
||||
print "Cnv药物注释使用路径:$database_path\n";
|
||||
|
||||
open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
|
||||
open THERAPY, "$database_path/targetTherapy.txt";
|
||||
my $h1 = <THERAPY>;
|
||||
chomp $h1;
|
||||
my %therapy;
|
||||
|
|
@ -21,7 +21,7 @@ while (<THERAPY>) {
|
|||
|
||||
|
||||
##药物翻译信息
|
||||
open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt";
|
||||
open DRUG, "$database_path/target_drug.txt";
|
||||
my %drug;
|
||||
<DRUG>;
|
||||
while (<DRUG>) {
|
||||
|
|
@ -55,7 +55,7 @@ sub drug {
|
|||
}
|
||||
|
||||
|
||||
open DIS, "/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
|
||||
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
|
||||
<DIS>;
|
||||
my (%dis, @id, %dis2);
|
||||
while (<DIS>) {
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ while (<IN>) {
|
|||
($codon =~ /dup/) ? ("Exon $exon insertion") :
|
||||
("Exon $exon mutation");
|
||||
}
|
||||
elsif ($record{'AAChange_refGene'} =~ /splicing/) {
|
||||
elsif ($record{'Func_refGene'} =~ /splicing/) {
|
||||
$protein = 'Truncating Mutations';
|
||||
$mut_type = '';
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ while (<IN>) {
|
|||
$mut_type = '';
|
||||
}
|
||||
else {
|
||||
print $_;
|
||||
print "药物注释未匹配到正确的protein或者mut_type";
|
||||
}
|
||||
|
||||
##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral,写入@neg;若基因不存在于%therapy,写入@vus;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ task run_chemo {
|
|||
if [ ! -d ${output_dir}/chemo ];then
|
||||
mkdir ${output_dir}/chemo
|
||||
fi
|
||||
chemo.py -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
|
||||
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
|
||||
>>>
|
||||
|
||||
output {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ task run_hereditary {
|
|||
if [ ! -d ${output_dir}/hereditary ];then
|
||||
mkdir ${output_dir}/hereditary
|
||||
fi
|
||||
hereditary.py -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
|
||||
hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
|
||||
>>>
|
||||
|
||||
output {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
task run_pollution {
|
||||
String name
|
||||
String output_dir
|
||||
String probe
|
||||
String vcf
|
||||
String? vcf2
|
||||
|
||||
command <<<
|
||||
|
||||
if [ ! -d ${output_dir}/pollution ];then
|
||||
mkdir ${output_dir}/pollution
|
||||
fi
|
||||
pollution.py -n ${name} \
|
||||
-v ${vcf} \
|
||||
-v2 ${vcf2} \
|
||||
-o ${output_dir}/pollution \
|
||||
-p ${probe} \
|
||||
-b $PUBLIC/pollution/${probe}_contaminate_ref.bed \
|
||||
-c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
|
||||
>>>
|
||||
|
||||
output {
|
||||
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
|
||||
}
|
||||
}
|
||||
|
||||
workflow call_pollution {
|
||||
|
||||
Boolean run=true
|
||||
|
||||
String tumor
|
||||
String? normal
|
||||
String output_dir
|
||||
String probe
|
||||
String raw_vcf
|
||||
String somatic_vcf
|
||||
String germline_vcf
|
||||
|
||||
if (run) {
|
||||
if (defined(normal)) {
|
||||
call run_pollution as run_pollution_paired {
|
||||
input:
|
||||
name=tumor,
|
||||
output_dir=output_dir,
|
||||
probe=probe,
|
||||
vcf=somatic_vcf,
|
||||
vcf2=germline_vcf
|
||||
}
|
||||
}
|
||||
|
||||
if (!defined(normal)) {
|
||||
call run_pollution as run_pollution_single {
|
||||
input:
|
||||
name=tumor,
|
||||
output_dir=output_dir,
|
||||
probe=probe,
|
||||
vcf=raw_vcf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
|
||||
}
|
||||
}
|
||||
|
|
@ -33,6 +33,7 @@ workflow call_postprocess {
|
|||
String? cnv
|
||||
String? msi
|
||||
String? hereditary
|
||||
String? pollution
|
||||
String? chemo
|
||||
String name
|
||||
String? normal
|
||||
|
|
@ -48,9 +49,9 @@ workflow call_postprocess {
|
|||
msi=msi,
|
||||
hereditary=hereditary,
|
||||
chemo=chemo,
|
||||
pollution=pollution,
|
||||
name=name,
|
||||
normal=normal,
|
||||
normal=normal,
|
||||
output_dir=output_dir,
|
||||
cancer=cancer
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue