添加pollution

master
chaopower 2023-11-30 15:31:35 +08:00
parent 2a9eff2226
commit b9685fd29e
38 changed files with 9400 additions and 120 deletions

Binary file not shown.

Binary file not shown.

80
database/info.csv 100644 → 100755

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,4 @@
import "./wdl/catecode.wdl"
import "./wdl/qc.wdl"
import "./wdl/alignment.wdl"
import "./wdl/call_mutation.wdl"
@ -12,6 +11,7 @@ import "./wdl/hereditary.wdl"
import "./wdl/tmb.wdl"
import "./wdl/postprocess.wdl"
workflow pipeline {
String tumor
@ -33,10 +33,10 @@ workflow pipeline {
Map[String, Boolean] catecode
# call mytask.create_dir as create_dir {
# input:
# workdir=workdir
# }
# call mytask.create_dir as create_dir {
# input:
# workdir=workdir
# }
call qc.qc as qc {
input:
@ -168,6 +168,17 @@ workflow pipeline {
project=project
}
call pollution.call_pollution as call_pollution {
input:
name=tumor,
normal=normal,
output_dir=workdir,
probe=probe,
raw_vcf=call_mutation.raw_vcf,
somatic_vcf=call_mutation.somatic_vcf,
germline_vcf=call_mutation.germline_vcf
}
call postprocess.call_postprocess as call_postprocess {
input:
run=catecode['addAutoReport'],

View File

@ -225,8 +225,7 @@ class ChemoRun:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Chemotherapy Process Script")
parser.add_argument('-d', '--database', help="Path to chemo_drug's database",
default='/home/zhangchao/project/pipeline/workflow/script/public/chemo_database.xlsx')
parser.add_argument('-d', '--database', help="Path to chemo_drug's database", required=True)
parser.add_argument('-probe', '--probe', help="Probe name", required=True)
parser.add_argument('-n', '--name', help="Name for sample", required=True)
parser.add_argument('-v', '--vcf', help="germline vcf", required=True)

View File

@ -7,9 +7,10 @@ open IN, "$ARGV[0]";
open OUT, ">$ARGV[1]";
my $project = $ARGV[2];
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Cnv过滤使用public路径$public_path\n";
print "Cnv过滤使用database路径$database_path\n";
my $cnv = info();
my @cnv_list = @$cnv;
@ -22,7 +23,7 @@ print OUT join("\t", (@head[0 .. 4], "ref_gene", "copy", @head[5 .. 9])), "\n";
while (<IN>) {
chomp;
my @line = split(/\t/, $_);
my $cn = sprintf("%.2f", 2 ** (1 + $line[4]));
my $cn = sprintf("%.1f", 2 ** (1 + $line[4]));
my @gene_list = split(/,/, $line[3]);
my %uniq;
foreach my $element (@gene_list) {
@ -38,7 +39,7 @@ while (<IN>) {
}
sub info {
open INFO, "$public_path/info.csv";
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);

View File

@ -6,8 +6,10 @@ die "usage:perl $0 depth_file input out project" unless @ARGV == 4;
my ($depth_file, $input, $out, $project) = @ARGV;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Fusion过滤使用public路径$public_path\n";
print "Fusion过滤使用database路径$database_path\n";
open IN, "$input";
open OUT1, "> $out";
@ -189,7 +191,7 @@ sub gene2strand {
}
sub info {
open INFO, "$public_path/info.csv";
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);

View File

@ -4,9 +4,12 @@ use warnings;
die "usage:perl $0 input out project" unless @ARGV == 3;
my ($input, $out, $project) = @ARGV;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
# my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
#
# print "LongIndel过滤使用public路径$public_path\n";
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "LongIndel过滤使用public路径$public_path\n";
print "longindel过滤使用database路径$database_path\n";
open IN, "$input";
open LONGINDEL, "> $out";
@ -40,7 +43,7 @@ while (<IN>) {
}
sub info {
open INFO, "$public_path/info.csv";
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);

View File

@ -0,0 +1,52 @@
#!/usr/bin/env perl
use strict;
my ($name, $output_dir, $project) = @ARGV;
die "useage:perl $0 codes_dir name output_dir project" unless @ARGV == 4;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
open INFO, "$public_path/info.csv";
<INFO>;
my @promoter;
while (<INFO>) {
chomp;
my @line = split(/\t/, $_);
if ($line[0] eq $project) {
if ($line[4] ne "NA") {
@promoter = split(/\//, $line[4]);
open PRMT, ">$output_dir/mutation/${name}.target.promoter.txt";
}
}
}
my @prmt;
open IN2, "$output_dir/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt";
<IN2>;
while (<IN2>) {
chomp;
my @line = split(/\t/, $_);
if (@promoter and grep {$line[6] eq $_} @promoter) {
if ($line[5] =~ /UTR3|upstream/ and $line[17] < 0.01
and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01) {
my @var = split(/;/, $line[9]);
my $freq = (split(/:/, $line[-2]))[5];
my $dp4 = join(",", (split(/:/, $line[-2]))[2, 4, -1]);
my $predict_benign = 0;
$predict_benign++ if ($line[50] eq "N" or $line[50] eq "P");
$predict_benign++ if $line[56] eq "T";
$predict_benign++ if $line[64] eq "T";
if ($line[6] eq 'TERT') {
push @prmt, join("\t", @line[0 .. 9], $freq, $dp4, $predict_benign, @line[10, 11, 16, 18, $#line])
if (($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A'));
}
}
}
}
if (@prmt) {
print PRMT join("\t", @head[0 .. 9]), "\tFreq", "\tDP-AD-DP4", "\tpredict_benign(MutationTaster/FATHMM/MetaSVM)\t", join("\t", @head[10, 11, 16, 18]), "\tSTR", "\n";
print PRMT join("\n", @prmt) . "\n";
}

View File

@ -243,7 +243,7 @@ sub blacklist {
sub info {
open INFO, "$public_path/info.csv";
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);

View File

@ -65,8 +65,7 @@ class HereditaryRun:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="hereditary Process Script")
parser.add_argument('-d', '--database', help="Path to hereditary_mut's database",
default='/home/zhangchao/project/pipeline/workflow/script/public/hereditary_database.xlsx')
parser.add_argument('-d', '--database', help="Path to hereditary_mut's database", required=True)
parser.add_argument('-p', '--project', help="Project name", required=True)
parser.add_argument('-n', '--name', help="Name for sample", required=True)
parser.add_argument('-f', '--file', help="germline filter file", required=True)

View File

@ -2,14 +2,14 @@
use strict;
use warnings;
my ($output_dir,$cancer_type)=@ARGV;
die "useage:perl $0 output_dir cancer_type" unless @ARGV==2;
my ($output_dir, $cancer_type) = @ARGV;
die "useage:perl $0 output_dir cancer_type" unless @ARGV == 2;
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Indication药物注释使用路径$database_path\n";
open OUT,">$output_dir/mutation/indication.txt";
open OUT, ">$output_dir/mutation/indication.txt";
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";
##本癌种FDA/NMPA/NCCN批准基因检测
=pod
@ -26,56 +26,58 @@ while(<DIS>){
}
=cut
open DIS,"/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis,@id,%dis2);
while(<DIS>){
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line=split(/\t/);
$dis{lc$line[2]}=$line[3];
$dis{lc$line[4]}=$line[5];
push @{$dis2{$line[0]}},lc$line[2];
push @{$dis2{$line[0]}},lc$line[4];
push @id,$line[0];
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
# $dis{lc $line[4]} = $line[5];
push @{$dis2{$line[0]}}, lc $line[2];
# push @{$dis2{$line[0]}}, lc $line[4];
push @id, $line[0];
}
foreach my $ID($cancer_type){
foreach my $ID ($cancer_type) {
my @family;
my @ids=split("",$ID);
for (my $i=1;$i<@ids;$i=$i+2){
push @family,join("",@ids[0..$i]);
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family,(grep{/^$ID/}@id);
foreach my $t(@family){
push @{$dis2{$ID}},@{$dis2{$t}};
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
}
foreach my $key(keys(%dis2)){
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}}=grep{++$uniq{$_}<2}@{$dis2{$key}};
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
##靶向用药信息
open THERAPY,"$database_path/targetTherapy.txt";
<THERAPY>;
open THERAPY, "$database_path/targetTherapy.txt";
<THERAPY>;
my %therapy;
my %cancer;
while(<THERAPY>){
while (<THERAPY>) {
chomp;
my @line=split("\t");
if($line[9] eq 'V' and $line[14] eq 'A' and (grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}) and $line[0]!~/,/){
push @{$cancer{$line[0]}},$dis{lc$line[2]} if !(grep{$_ eq $dis{lc$line[2]}}@{$cancer{$line[0]}});
if($line[1]=~/fusion/i){
push @{$therapy{$line[0]}},'融合' if !(grep{$_ eq '融合'}@{$therapy{$line[0]}});
}elsif($line[1] eq "Deletion" or $line[1]=~ /Amplification/){
push @{$therapy{$line[0]}},'扩增' if !(grep{$_ eq '扩增'}@{$therapy{$line[0]}});
}else{
push @{$therapy{$line[0]}},'突变' if !(grep{$_ eq '突变'}@{$therapy{$line[0]}});;
my @line = split("\t");
if ($line[9] eq 'V' and $line[14] eq 'A' and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) and $line[0] !~ /,/) {
push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
if ($line[1] =~ /fusion/i) {
push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
}
elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
}
else {
push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
}
}
}
for my $gene(sort keys %therapy){
print OUT "$gene\t",join("/",@{$therapy{$gene}}),"\t未检出变异\t",join("/",@{$cancer{$gene}}),"\n";
for my $gene (sort keys %therapy) {
print OUT "$gene\t", join("/", @{$therapy{$gene}}), "\t未检出变异\t", join("/", @{$cancer{$gene}}), "\n";
}

287
script/pollution.py 100755
View File

@ -0,0 +1,287 @@
#!/usr/bin/env python3
import argparse
import os
import pandas as pd
def single_monitoring(name, vcf, bed, freq_range, output_dir):
vcf_header = [] # 用于存储VCF文件头部分
vcf_data = [] # 用于存储筛选后的VCF数据
p_value_list = [] # 用于存储符合条件的 p-value
# 按行处理,保存为列表
with open(vcf, 'r') as vcf_file:
for line in vcf_file:
if line.startswith("#"):
vcf_header.append(line)
else:
vcf_data.append(line)
# bed 处理
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
position_list = list(df_position[2])
chr_list = list(df_position[0])
result_data = []
for i in range(len(position_list)):
filtered_lines = [line for line in vcf_data if
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
line.split()[3]) < 2 and len(line.split()[4]) < 2]
for line in filtered_lines:
p_value_str = line.split()[9].split(":")[4]
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
if 0.1 <= p_value <= 0.9:
result_data.append(line)
p_value_list.append(p_value)
select_vcf = os.path.join(output_dir, f'{name}_select.vcf')
with open(select_vcf, 'w') as output:
for header_line in vcf_header:
output.write(header_line)
for data_line in result_data:
output.write(data_line)
count_normal = 0
count_exception = 0
for p_value in p_value_list:
if freq_range[0] <= p_value <= freq_range[1]:
count_normal += 1
else:
count_exception += 1
count_all = count_exception + count_normal
if count_all == 0:
z_score = 0
else:
z_score = count_exception / count_all
res = dict(
barcode=name,
count_normal=count_normal,
count_exception=count_exception,
z_score=z_score
)
pd.DataFrame([res]).to_csv(os.path.join(f'{name}_pollution.csv'), sep="\t", index=False)
# 根据小bed筛选vcf
def select_position(vcf, bed, matched_file, unmatched_file):
vcf_header = [] # 用于存储VCF文件头部分
vcf_data = [] # 用于存储筛选后的VCF数据
# 按行处理,保存为列表
with open(vcf, 'r') as vcf_file:
for line in vcf_file:
if line.startswith("#"):
vcf_header.append(line)
else:
vcf_data.append(line)
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
position_list = list(df_position[2])
chr_list = list(df_position[0])
result_data = []
unmatched_data = [] # 用于存储未匹配的数据
for i in range(len(position_list)):
filtered_lines = [line for line in vcf_data if
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
line.split()[3]) < 2 and len(line.split()[4]) < 2]
if not filtered_lines: # 如果没有匹配的点,添加到未匹配数据列表
unmatched_data.append(f"{chr_list[i]}\t{position_list[i]}\t.\t.\t.\t.\t.\n")
result_data.extend(filtered_lines)
with open(matched_file, 'w') as output:
for header_line in vcf_header:
output.write(header_line)
for data_line in result_data:
output.write(data_line)
with open(unmatched_file, 'w') as unmatched_output:
unmatched_output.writelines(unmatched_data)
return matched_file, unmatched_file
# 处理体系、胚系的vcf得到目标信息
def process_judge_vcf(input_vcf, output_vcf):
with open(input_vcf, 'r') as input_file, open(output_vcf, 'w') as output_file:
for line in input_file:
if not line.startswith("#"):
fields = line.strip().split('\t')
info = fields[9].split(":")
percentage = float(info[4])
if 0.1 <= percentage <= 0.9:
b = 0.5
elif percentage < 0.1:
b = 0
elif percentage > 0.9:
b = 1
# 构建新的行数据
new_line = '\t'.join([fields[0], fields[1], fields[3], fields[4], info[4], str(b), info[2]])
output_file.write(new_line + '\n')
return output_vcf
def merge_and_sort_files(matched_file, unmatched_file, output_file):
# 检查 unmatched_file 是否为空
if os.stat(unmatched_file).st_size == 0:
# 对 matched_file 进行排序并写入 output_file
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
sorted_df = matched_df.sort_values(by=[0, 1])
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
return output_file
# 如果 unmatched_file 不为空,继续合并和排序操作
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
# 合并数据帧
combined_df = pd.concat([matched_df, unmatched_df])
# 根据第一列和第二列排序
sorted_df = combined_df.sort_values(by=[0, 1])
# 将排序后的数据写入输出文件
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
return output_file
# 合并体系,胚系
def merge_and_compare_files(somatic_file, germline_file, output_merged_file, output_final_file):
# 合并两个文件
with open(somatic_file, 'r') as somatic, open(germline_file, 'r') as germline:
merged_lines = [f"{somatic_line.strip()}\t{germline_line.strip()}" for somatic_line, germline_line in
zip(somatic, germline)]
# 将合并后的数据写入输出文件
with open(output_merged_file, 'w') as output_file:
output_file.write('\n'.join(merged_lines))
# 比较两列数据并添加比较结果列
with open(output_merged_file, 'r') as merged, open(output_final_file, 'w') as final_output:
for line in merged:
fields = line.strip().split('\t')
if fields[5] == fields[12]:
comparison_result = "yes"
else:
comparison_result = "no"
final_output.write(f"{line.strip()}\t{comparison_result}\n")
return output_merged_file, output_final_file
# 根据大bed筛选vcf作cnvkit的图
def select_cnvkit_vcf(vcf, bed, output_file):
vcf_header = [] # 用于存储VCF文件头部分
vcf_data = [] # 用于存储筛选后的VCF数据
p_value_list = [] # 用于存储符合条件的 p-value
# 按行处理,保存为列表
with open(vcf, 'r') as vcf_file:
for line in vcf_file:
if line.startswith("#"):
vcf_header.append(line)
else:
vcf_data.append(line)
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
position_list = list(df_position[2])
chr_list = list(df_position[0])
result_data = []
for i in range(len(position_list)):
filtered_lines = [line for line in vcf_data if
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
line.split()[3]) < 2 and len(line.split()[4]) < 2]
for line in filtered_lines:
p_value_str = line.split()[9].split(":")[4]
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
if 0.1 <= p_value <= 0.9:
result_data.append(line)
with open(output_file, 'w') as output:
for header_line in vcf_header:
output.write(header_line)
for data_line in result_data:
output.write(data_line)
return output_file
def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed, output_dir):
# 处理体系根据bed筛选
select_position_output_file1 = os.path.join(output_dir, f'{name}_somatic_matched.vcf')
select_position_output_file2 = os.path.join(output_dir, f'{name}_somatic_unmatched.vcf')
somatic_matched_file, somatic_unmatched_file = select_position(somatic_vcf, ref_bed, select_position_output_file1,
select_position_output_file2)
# 处理胚系根据bed筛选
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
Germline_matched_file, Germline_unmatched_file = select_position(germline_vcf, ref_bed,
select_position_output_file3,
select_position_output_file4)
# 处理体系,数值转换
process_judge_vcf_file1 = os.path.join(output_dir, f'{name}_somatic_matched_add_judge.vcf')
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
# 处理胚系,数值转换
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
germline_matched_add_judge_file = process_judge_vcf(Germline_matched_file, process_judge_vcf_file2)
# 合并体系将匹配到的和未匹配到bed的的合并
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
merge_and_sort_files_file1)
# 合并胚系将匹配到的和未匹配到bed的的合并
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, Germline_unmatched_file,
merge_and_sort_files_file2)
# 合并胚系,体系,将体系,胚系两个合并文件再合并
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')
result_file = os.path.join(output_dir, f'{name}_contaminate_result.txt')
merge_and_compare_files(somatic_merged_file, Germline_merged_file, result_pro_file, result_file)
##筛选作图vcf
cnvkit_output_file = os.path.join(output_dir, f'{name}_select_cnvkit.vcf')
select_cnvkit_vcf(germline_vcf, cnvkit_ref_bed, cnvkit_output_file)
##删除中间文件
os.remove(select_position_output_file1)
os.remove(select_position_output_file2)
os.remove(select_position_output_file3)
os.remove(select_position_output_file4)
os.remove(process_judge_vcf_file1)
os.remove(process_judge_vcf_file2)
os.remove(merge_and_sort_files_file1)
os.remove(merge_and_sort_files_file2)
os.remove(result_pro_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Pollution Process Script")
parser.add_argument('-n', '--name', help="Name for sample", required=True)
parser.add_argument('-b', '--ref_bed', help="ref_bed", required=True)
parser.add_argument('-v', '--vcf', help="raw vcf for prbe 160 or 17 ; somatic vcf for prbe 682 or 624",
required=True)
parser.add_argument('-v2', '--vcf2', help="germline vcf; required when prbe 682 or 624")
parser.add_argument('-c', '--cnvkit_bed', help="cnvkit_bed; required when prbe 682 or 624")
parser.add_argument('-p', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
args = parser.parse_args()
bed_path = os.path.realpath(args.ref_bed)
print(f'污染检测使用ref_bed: {bed_path}')
probe = args.probe
if probe == '160' or probe == '17':
freq_range = {"17": [0.3452, 0.6512], "160": [0.2930, 0.6753]}.get(probe)
single_monitoring(args.name, args.vcf, bed_path, freq_range, args.output_dir)
elif probe == '682' or probe == '624':
if not args.vcf2:
parser.error('--vcf2 is required in prbe 682 or 624')
if not args.cnvkit_bed:
parser.error('--cnvkit_bed is required in prbe 682 or 624')
cnvkit_bed_path = os.path.realpath(args.cnvkit_bed)
print(f'污染检测使用cnvkit_bed: {cnvkit_bed_path}')
paired_monitoring(args.name, args.vcf, args.vcf2, bed_path, args.cnvkit_bed, args.output_dir)
else:
parser.error('probe error. 682, 624, 160, 17 for now')

View File

@ -17,7 +17,9 @@ def database():
增持匹配信息
"""
path = os.environ.get('DATABASE', '/home/zhangchao/project/pipeline/workflow/database/')
path = os.environ.get('DATABASE')
if not path:
raise UserWarning('未设置DATABASE环境变量')
# gene function 基因功能描述信息
gene_function_path = os.path.join(path, 'gene_function.txt')
gene_function_df = pd.read_csv(gene_function_path, sep='\t')

View File

@ -0,0 +1,634 @@
chr5 224633 224633
chr5 226160 226160
chr5 228362 228362
chr5 230980 230980
chr5 231111 231111
chr5 231143 231143
chr5 233734 233734
chr5 251784 251784
chr5 256451 256451
chr5 256472 256472
chr11 534242 534242
chr18 673447 673447
chr18 673449 673449
chr19 1208289 1208289
chr19 1220321 1220321
chr19 1221161 1221161
chr19 1222012 1222012
chr4 1801064 1801064
chr4 1805478 1805478
chr4 1806519 1806519
chr4 1807894 1807894
chr16 2103408 2103408
chr16 2120402 2120402
chr16 2138219 2138219
chr19 3110349 3110349
chr19 3113305 3113305
chr19 3114954 3114954
chr19 3115124 3115124
chr19 3119184 3119184
chr19 3119239 3119239
chr11 4104007 4104007
chr11 4104060 4104060
chr11 4104087 4104087
chr11 4113395 4113395
chr11 4113470 4113470
chr11 4113471 4113471
chr11 4115487 4115487
chr11 4115541 4115541
chr11 4159457 4159457
chr11 4159466 4159466
chr12 4388084 4388084
chr9 5050706 5050706
chr9 5078291 5078291
chr9 5081780 5081780
chr9 5090810 5090810
chr9 5090934 5090934
chr9 5557672 5557672
chr9 5557708 5557708
chr7 6013049 6013049
chr7 6022626 6022626
chr7 6022629 6022629
chr7 6026384 6026384
chr7 6026775 6026775
chr7 6026942 6026942
chr7 6026988 6026988
chr7 6031102 6031102
chr7 6036980 6036980
chr7 6038722 6038722
chr7 6043386 6043386
chr7 6045627 6045627
chr17 7578645 7578645
chr17 7579472 7579472
chr5 7870973 7870973
chr10 8111409 8111409
chr1 11174331 11174331
chr1 11174851 11174851
chr1 11181327 11181327
chr1 11181457 11181457
chr1 11190646 11190646
chr1 11190730 11190730
chr1 11199518 11199518
chr1 11199541 11199541
chr1 11205058 11205058
chr1 11272468 11272468
chr1 11288758 11288758
chr1 11300338 11300338
chr1 11301714 11301714
chr1 11303153 11303153
chr1 11303383 11303383
chr1 11854457 11854457
chr1 11854476 11854476
chr1 11856378 11856378
chr3 14187449 14187449
chr1 17380497 17380497
chr2 17938501 17938501
chr19 17942005 17942005
chr19 17946054 17946054
chr19 17952609 17952609
chr19 17953321 17953321
chr19 17955021 17955021
chr1 20915507 20915507
chr1 20915531 20915531
chr1 20915590 20915590
chr1 20915701 20915701
chr12 21331599 21331599
chr12 21331625 21331625
chr9 21968199 21968199
chr9 21968712 21968712
chr12 25362777 25362777
chr12 25368462 25368462
chr2 25469502 25469502
chr2 25536827 25536827
chr4 25666099 25666099
chr1 27089690 27089690
chr13 28592546 28592546
chr13 28608459 28608459
chr13 28609825 28609825
chr13 28610183 28610183
chr13 28622544 28622544
chr13 28623699 28623699
chr13 28623759 28623759
chr13 28624294 28624294
chr13 28636084 28636084
chr13 28674628 28674628
chr22 29104955 29104955
chr22 29104959 29104959
chr22 29130458 29130458
chr2 29416366 29416366
chr2 29416481 29416481
chr2 29416572 29416572
chr2 29420550 29420550
chr2 29443617 29443617
chr2 29443749 29443749
chr2 29444076 29444076
chr2 29444095 29444095
chr2 29445458 29445458
chr2 29445602 29445602
chr2 29446178 29446178
chr2 29446184 29446184
chr2 29446701 29446701
chr2 29446721 29446721
chr2 29446880 29446880
chr2 29447108 29447108
chr2 29447253 29447253
chr2 29448760 29448760
chr2 29449005 29449005
chr2 29449526 29449526
chr2 29449819 29449819
chr2 29455267 29455267
chr17 29508775 29508775
chr17 29541437 29541437
chr2 29543663 29543663
chr17 29553485 29553485
chr17 29653293 29653293
chr17 29663624 29663624
chr17 29663625 29663625
chr17 29670190 29670190
chr17 29679246 29679246
chr17 29686024 29686024
chr17 29705947 29705947
chr2 29940529 29940529
chr2 30143499 30143499
chr6 32782149 32782149
chr6 32782387 32782387
chr6 32784645 32784645
chr6 32796653 32796653
chr6 32796685 32796685
chr6 32796751 32796751
chr6 32796793 32796793
chr6 32797168 32797168
chr6 32797297 32797297
chr6 32797361 32797361
chr6 32797684 32797684
chr6 32797773 32797773
chr6 32797809 32797809
chr6 32797876 32797876
chr6 32798608 32798608
chr6 32800224 32800224
chr6 32800412 32800412
chr6 32805470 32805470
chr6 32805849 32805849
chr6 32814975 32814975
chr6 32818774 32818774
chr6 32819865 32819865
chr6 32821447 32821447
chr13 32890572 32890572
chr13 32906480 32906480
chr13 32906729 32906729
chr13 32906980 32906980
chr13 32910351 32910351
chr13 32910721 32910721
chr13 32911463 32911463
chr13 32911888 32911888
chr13 32913055 32913055
chr13 32915005 32915005
chr13 32929232 32929232
chr13 32929387 32929387
chr13 32936646 32936646
chr15 32965139 32965139
chr15 32965147 32965147
chr15 32981978 32981978
chr17 33433487 33433487
chr17 33445549 33445549
chr20 36030939 36030939
chr20 36031097 36031097
chr3 37067050 37067050
chr3 37083740 37083740
chr21 37518706 37518706
chr17 37868715 37868715
chr17 37879588 37879588
chr17 37879762 37879762
chr17 37884037 37884037
chr8 38286811 38286811
chr2 38298150 38298150
chr2 38298203 38298203
chr17 41223094 41223094
chr17 41234470 41234470
chr17 41244000 41244000
chr17 41244435 41244435
chr17 41244936 41244936
chr17 41244982 41244982
chr17 41245237 41245237
chr17 41245466 41245466
chr17 41267763 41267763
chr17 41277187 41277187
chr3 41280827 41280827
chr6 41903782 41903782
chr22 42526694 42526694
chr10 43572832 43572832
chr10 43595968 43595968
chr10 43600689 43600689
chr10 43604950 43604950
chr10 43605247 43605247
chr10 43605392 43605392
chr10 43605860 43605860
chr10 43605902 43605902
chr10 43606687 43606687
chr10 43606856 43606856
chr10 43607756 43607756
chr10 43607760 43607760
chr10 43610366 43610366
chr10 43610455 43610455
chr10 43610558 43610558
chr10 43611708 43611708
chr10 43611865 43611865
chr10 43613843 43613843
chr10 43615505 43615505
chr10 43622217 43622217
chr20 43961971 43961971
chr20 43961997 43961997
chr20 43963047 43963047
chr20 43963489 43963489
chr20 43963735 43963735
chr20 43963739 43963739
chr20 43964288 43964288
chr20 43964407 43964407
chr19 44055726 44055726
chr22 44324676 44324676
chr22 44324727 44324727
chr22 44324730 44324730
chr14 45606287 45606287
chr14 45642287 45642287
chr14 45644589 45644589
chr14 45650900 45650900
chr14 45652938 45652938
chr14 45658156 45658156
chr14 45664721 45664721
chr14 45665468 45665468
chr1 45796269 45796269
chr1 45797505 45797505
chr1 45798555 45798555
chr19 45854919 45854919
chr19 45867259 45867259
chr19 45912736 45912736
chr19 45923653 45923653
chr1 46726951 46726951
chr1 46736386 46736386
chr1 46740268 46740268
chr1 46743900 46743900
chr21 46957794 46957794
chr2 47601106 47601106
chr2 47604176 47604176
chr2 47613768 47613768
chr2 47630550 47630550
chr2 47637337 47637337
chr2 47693959 47693959
chr2 47703500 47703500
chr2 47739551 47739551
chr2 48010488 48010488
chr2 48029894 48029894
chr2 48030692 48030692
chr2 48030838 48030838
chr18 48577091 48577091
chr18 48577782 48577782
chr13 49051012 49051012
chr13 49051481 49051481
chr19 50902164 50902164
chr19 50902331 50902331
chr19 50905042 50905042
chr19 50905074 50905074
chr19 50905089 50905089
chr19 50905189 50905189
chr19 50909765 50909765
chr19 50919797 50919797
chr19 50919828 50919828
chr15 51502844 51502844
chr15 51630835 51630835
chr15 51631279 51631279
chr3 52439508 52439508
chr2 54395247 54395247
chr4 55130078 55130078
chr4 55130154 55130154
chr4 55133726 55133726
chr4 55139771 55139771
chr4 55141055 55141055
chr4 55143577 55143577
chr4 55152040 55152040
chr4 55161254 55161254
chr4 55161391 55161391
chr7 55214348 55214348
chr7 55214443 55214443
chr7 55221655 55221655
chr7 55229255 55229255
chr7 55238087 55238087
chr7 55238874 55238874
chr7 55249063 55249063
chr7 55266417 55266417
chr7 55268916 55268916
chr4 55602765 55602765
chr4 55948108 55948108
chr4 55961159 55961159
chr4 55968053 55968053
chr4 55968651 55968651
chr4 55972974 55972974
chr12 56477694 56477694
chr12 56491740 56491740
chr12 56493822 56493822
chr12 56494991 56494991
chr17 56769979 56769979
chr20 57600655 57600655
chr12 58145156 58145156
chr2 58388696 58388696
chr17 59760996 59760996
chr17 59763114 59763114
chr17 59763347 59763347
chr17 59763465 59763465
chr17 59808391 59808391
chr17 59857809 59857809
chr17 59859299 59859299
chr17 59892541 59892541
chr1 65310489 65310489
chr1 65311262 65311262
chr1 65321388 65321388
chr1 65325970 65325970
chr1 65330682 65330682
chr1 65351896 65351896
chr15 66745890 66745890
chr15 66774267 66774267
chr15 66782048 66782048
chr14 68331675 68331675
chr16 68771372 68771372
chr16 68857441 68857441
chr16 68862165 68862165
chr14 69006913 69006913
chr14 69006922 69006922
chr16 69143577 69143577
chr14 69149814 69149814
chr8 69389217 69389217
chr11 69462910 69462910
chr16 69745145 69745145
chr15 75012985 75012985
chr14 75483812 75483812
chr14 75485489 75485489
chr14 75497239 75497239
chr14 75513534 75513534
chr14 75513828 75513828
chr14 75513883 75513883
chr9 80409345 80409345
chr7 80532112 80532112
chr9 86900926 86900926
chr7 87138645 87138645
chr7 87160561 87160561
chr7 87160618 87160618
chr7 87171152 87171152
chr7 87179601 87179601
chr7 87180198 87180198
chr9 87359220 87359220
chr9 87360107 87360107
chr9 87360111 87360111
chr9 87360153 87360153
chr9 87360720 87360720
chr9 87481287 87481287
chr9 87483094 87483094
chr15 88423463 88423463
chr15 88476365 88476365
chr15 88479504 88479504
chr15 88479928 88479928
chr15 88480751 88480751
chr15 88481623 88481623
chr15 88481913 88481913
chr15 88482810 88482810
chr15 88483456 88483456
chr15 88483594 88483594
chr15 88575886 88575886
chr15 88576185 88576185
chr10 88648670 88648670
chr10 88648672 88648672
chr15 88679785 88679785
chr4 89052323 89052323
chr4 89052361 89052361
chr10 89624218 89624218
chr16 89805261 89805261
chr16 89805914 89805914
chr16 89805977 89805977
chr16 89807233 89807233
chr16 89809171 89809171
chr16 89809319 89809319
chr16 89816314 89816314
chr16 89816333 89816333
chr16 89825065 89825065
chr16 89825157 89825157
chr16 89828437 89828437
chr16 89831243 89831243
chr16 89831520 89831520
chr16 89836323 89836323
chr16 89838078 89838078
chr16 89839766 89839766
chr16 89845194 89845194
chr16 89845287 89845287
chr16 89849480 89849480
chr16 89857935 89857935
chr16 89857964 89857964
chr16 89858417 89858417
chr16 89858505 89858505
chr16 89858525 89858525
chr16 89866043 89866043
chr16 89869761 89869761
chr16 89882807 89882807
chr16 89882826 89882826
chr15 90628537 90628537
chr8 90948273 90948273
chr8 90955253 90955253
chr8 90958422 90958422
chr8 90958530 90958530
chr8 90967711 90967711
chr8 90970935 90970935
chr8 90982803 90982803
chr8 90990479 90990479
chr8 90995019 90995019
chr7 92244422 92244422
chr10 96798524 96798524
chr1 97915624 97915624
chr1 97981395 97981395
chr1 98348885 98348885
chr7 99382096 99382096
chr10 101542578 101542578
chr11 103418158 103418158
chr11 103418177 103418177
chr11 103418196 103418196
chr10 104897985 104897985
chr14 105239894 105239894
chr14 105246407 105246407
chr11 108114632 108114632
chr11 108128353 108128353
chr11 108137867 108137867
chr11 108139120 108139120
chr11 108159732 108159732
chr11 108183167 108183167
chr11 108192078 108192078
chr11 108236783 108236783
chr2 111885244 111885244
chr5 112162854 112162854
chr5 112164561 112164561
chr5 112175770 112175770
chr5 112176325 112176325
chr5 112176559 112176559
chr5 112176756 112176756
chr5 112177171 112177171
chr6 114265587 114265587
chr6 114281236 114281236
chr10 115438204 115438204
chr10 115439530 115439530
chr10 115439569 115439569
chr10 115489152 115489152
chr10 115489167 115489167
chr10 115489589 115489589
chr10 115489650 115489650
chr7 116335811 116335811
chr7 116339672 116339672
chr7 116340262 116340262
chr7 116397572 116397572
chr7 116435768 116435768
chr7 116436022 116436022
chr7 116436097 116436097
chr6 117622184 117622184
chr6 117622188 117622188
chr6 117622233 117622233
chr6 117639419 117639419
chr6 117641016 117641016
chr6 117641246 117641246
chr6 117641330 117641330
chr6 117641474 117641474
chr6 117641819 117641819
chr6 117642418 117642418
chr6 117642495 117642495
chr6 117643433 117643433
chr6 117643659 117643659
chr6 117643897 117643897
chr6 117643994 117643994
chr6 117644054 117644054
chr6 117644132 117644132
chr6 117644841 117644841
chr6 117644866 117644866
chr6 117645041 117645041
chr6 117645239 117645239
chr6 117645604 117645604
chr6 117646289 117646289
chr6 117646380 117646380
chr6 117646772 117646772
chr6 117648064 117648064
chr6 117648340 117648340
chr6 117649173 117649173
chr6 117649988 117649988
chr6 117650131 117650131
chr6 117658193 117658193
chr6 117662682 117662682
chr6 117678083 117678083
chr6 117686943 117686943
chr6 117710661 117710661
chr6 117724462 117724462
chr6 117725448 117725448
chr6 117725578 117725578
chr6 117730819 117730819
chr10 123239112 123239112
chr10 123247644 123247644
chr10 123263478 123263478
chr10 123274846 123274846
chr10 123298158 123298158
chr10 123310871 123310871
chr3 124456742 124456742
chr11 125514573 125514573
chr11 125525195 125525195
chr7 128845088 128845088
chr7 128846207 128846207
chr7 128846328 128846328
chr7 128846469 128846469
chr5 131892979 131892979
chr5 131915213 131915213
chr5 131923393 131923393
chr12 133202215 133202215
chr12 133208886 133208886
chr12 133208979 133208979
chr12 133210985 133210985
chr12 133212582 133212582
chr12 133214768 133214768
chr12 133218277 133218277
chr12 133219831 133219831
chr12 133219989 133219989
chr12 133233705 133233705
chr12 133236000 133236000
chr12 133238076 133238076
chr12 133240782 133240782
chr12 133250118 133250118
chr12 133250197 133250197
chr12 133253995 133253995
chr12 133257887 133257887
chr12 133263825 133263825
chr9 135781239 135781239
chr5 138132255 138132255
chr9 139391338 139391338
chr9 139391543 139391543
chr9 139391636 139391636
chr9 139397707 139397707
chr9 139402663 139402663
chr9 139405261 139405261
chr9 139407932 139407932
chr9 139412197 139412197
chr9 139418260 139418260
chr7 140426257 140426257
chr7 140434463 140434463
chr7 140449071 140449071
chr7 140449150 140449150
chr3 142168331 142168331
chr3 142178144 142178144
chr3 142215178 142215178
chr3 142222284 142222284
chr3 142277427 142277427
chr3 142277536 142277536
chr3 142277575 142277575
chr3 142281612 142281612
chr5 149433857 149433857
chr5 149435759 149435759
chr5 149439458 149439458
chr5 149450132 149450132
chr5 149456811 149456811
chr5 149457678 149457678
chr5 149460343 149460343
chr5 149460553 149460553
chr5 149495537 149495537
chr5 149499672 149499672
chr5 149500427 149500427
chr5 149513626 149513626
chr5 149782420 149782420
chr5 149783085 149783085
chr5 149783694 149783694
chr5 149783879 149783879
chr7 152351316 152351316
chr4 153252061 153252061
chr1 156785617 156785617
chr1 156843264 156843264
chr1 156844979 156844979
chr1 156845095 156845095
chr1 156845848 156845848
chr1 156846233 156846233
chr1 156848995 156848995
chr1 156849780 156849780
chr6 159189454 159189454
chr6 159190019 159190019
chr6 159190184 159190184
chr6 159191788 159191788
chr6 160113872 160113872
chr1 162737116 162737116
chr1 162740327 162740327
chr1 162743418 162743418
chr3 178922274 178922274
chr3 178938747 178938747
chr3 178942431 178942431
chr1 204501383 204501383
chr1 204512100 204512100
chr1 204516025 204516025
chr2 212251864 212251864
chr2 212652698 212652698
chr2 215593233 215593233
chr2 215595645 215595645
chr2 215632192 215632192
chr2 215632255 215632255
chr2 215632256 215632256
chr2 215634055 215634055
chr2 215645464 215645464
chr2 215645545 215645545
chr2 215674224 215674224
chr2 215674323 215674323
chr2 216212339 216212339
chr2 234669144 234669144

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
chr1 2488153 2488153
chr1 23885599 23885599
chr1 65321388 65321388
chr1 120612006 120612006
chr1 193094375 193094375
chr1 226570840 226570840
chr2 47693959 47693959
chr2 48030838 48030838
chr2 61749716 61749716
chr2 141032088 141032088
chr2 215645464 215645464
chr3 30686414 30686414
chr3 47125385 47125385
chr3 134644636 134644636
chr3 142281612 142281612
chr3 192053274 192053274
chr4 55599436 55599436
chr4 143007419 143007419
chr4 153252061 153252061
chr4 185310218 185310218
chr4 25666099 25666099
chr5 35857177 35857177
chr5 38955796 38955796
chr5 39074296 39074296
chr5 67522722 67522722
chr5 112162854 112162854
chr5 149435759 149435759
chr5 161119125 161119125
chr5 170819887 170819887
chr5 180046344 180046344
chr6 30858857 30858857
chr6 32797876 32797876
chr6 36645696 36645696
chr6 117678083 117678083
chr6 117714346 117714346
chr7 2946461 2946461
chr7 2962753 2962753
chr7 87138645 87138645
chr7 151970931 151970931
chr8 90955624 90955624
chr8 90970935 90970935
chr8 90995019 90995019
chr8 68864728 68864728
chr8 38322346 38322346
chr8 145742879 145742879
chr9 5081780 5081780
chr9 8389364 8389364
chr9 93657761 93657761
chr9 98209594 98209594
chr9 21991923 21991923
chr9 37020622 37020622
chr9 21975017 21975017
chr10 43600689 43600689
chr10 89720907 89720907
chr10 104596981 104596981
chr10 104849468 104849468
chr10 104855656 104855656
chr10 123239112 123239112
chr11 32410774 32410774
chr11 69462910 69462910
chr11 94197260 94197260
chr11 94225807 94225807
chr11 94225920 94225920
chr11 125497466 125497466
chr12 1040373 1040373
chr12 4553383 4553383
chr12 6711147 6711147
chr12 21331625 21331625
chr12 49427652 49427652
chr12 49444545 49444545
chr12 121416622 121416622
chr12 133202215 133202215
chr13 21562948 21562948
chr13 28609825 28609825
chr13 32936646 32936646
chr13 113889474 113889474
chr13 113907391 113907391
chr14 105239894 105239894
chr14 105246325 105246325
chr14 38064215 38064215
chr14 20820537 20820537
chr14 35871217 35871217
chr15 41865488 41865488
chr15 51502986 51502986
chr15 51529112 51529112
chr15 99456253 99456253
chr15 99478225 99478225
chr15 51558731 51558731
chr16 68857441 68857441
chr16 359953 359953
chr16 89805977 89805977
chr16 89838078 89838078
chr16 89857964 89857964
chr17 7983969 7983969
chr17 29486152 29486152
chr17 29508775 29508775
chr17 29546175 29546175
chr17 29559932 29559932
chr17 29653293 29653293
chr17 37879762 37879762
chr17 62007498 62007498
chr17 78919558 78919558
chr19 3110349 3110349
chr19 3119239 3119239
chr19 4101062 4101062
chr19 11136215 11136215
chr19 15289613 15289613
chr19 15295134 15295134
chr19 41725271 41725271
chr20 43956527 43956527
chr20 43956636 43956636
chr20 43958850 43958850
chr20 43958872 43958872
chr20 54959296 54959296
chr20 54961541 54961541
chr21 37518706 37518706
chr21 39752673 39752673
chr21 39753375 39753375
chr22 41568480 41568480
chr22 30038152 30038152
chr22 30079213 30079213
chrX 76937963 76937963
chrX 39922359 39922359
chrX 39932907 39932907
chrX 44938563 44938563
chrX 100608191 100608191
chrX 100611285 100611285
chrX 76940534 76940534

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
chr1 2488153 2488153
chr1 23885599 23885599
chr1 65321388 65321388
chr1 120612006 120612006
chr1 193094375 193094375
chr1 226570840 226570840
chr2 47693959 47693959
chr2 48030838 48030838
chr2 61749716 61749716
chr2 141032088 141032088
chr2 215645464 215645464
chr3 30686414 30686414
chr3 47125385 47125385
chr3 134644636 134644636
chr3 142281612 142281612
chr3 192053274 192053274
chr4 55599436 55599436
chr4 143007419 143007419
chr4 153252061 153252061
chr4 185310218 185310218
chr4 25666099 25666099
chr5 35857177 35857177
chr5 38955796 38955796
chr5 39074296 39074296
chr5 67522722 67522722
chr5 112162854 112162854
chr5 149435759 149435759
chr5 161119125 161119125
chr5 170819887 170819887
chr5 180046344 180046344
chr6 30858857 30858857
chr6 32797876 32797876
chr6 36645696 36645696
chr6 117678083 117678083
chr6 117714346 117714346
chr7 2946461 2946461
chr7 2962753 2962753
chr7 87138645 87138645
chr7 151970931 151970931
chr8 90955624 90955624
chr8 90970935 90970935
chr8 90995019 90995019
chr8 68864728 68864728
chr8 38322346 38322346
chr8 145742879 145742879
chr9 5081780 5081780
chr9 8389364 8389364
chr9 93657761 93657761
chr9 98209594 98209594
chr9 21991923 21991923
chr9 37020622 37020622
chr9 21975017 21975017
chr10 43600689 43600689
chr10 89720907 89720907
chr10 104596981 104596981
chr10 104849468 104849468
chr10 104855656 104855656
chr10 123239112 123239112
chr11 32410774 32410774
chr11 69462910 69462910
chr11 94197260 94197260
chr11 94225807 94225807
chr11 94225920 94225920
chr11 125497466 125497466
chr12 1040373 1040373
chr12 4553383 4553383
chr12 6711147 6711147
chr12 21331625 21331625
chr12 49427652 49427652
chr12 49444545 49444545
chr12 121416622 121416622
chr12 133202215 133202215
chr13 21562948 21562948
chr13 28609825 28609825
chr13 32936646 32936646
chr13 113889474 113889474
chr13 113907391 113907391
chr14 105239894 105239894
chr14 105246325 105246325
chr14 38064215 38064215
chr14 20820537 20820537
chr14 35871217 35871217
chr15 41865488 41865488
chr15 51502986 51502986
chr15 51529112 51529112
chr15 99456253 99456253
chr15 99478225 99478225
chr15 51558731 51558731
chr16 68857441 68857441
chr16 359953 359953
chr16 89805977 89805977
chr16 89838078 89838078
chr16 89857964 89857964
chr17 7983969 7983969
chr17 29486152 29486152
chr17 29508775 29508775
chr17 29546175 29546175
chr17 29559932 29559932
chr17 29653293 29653293
chr17 37879762 37879762
chr17 62007498 62007498
chr17 78919558 78919558
chr19 3110349 3110349
chr19 3119239 3119239
chr19 4101062 4101062
chr19 11136215 11136215
chr19 15289613 15289613
chr19 15295134 15295134
chr19 41725271 41725271
chr20 43956527 43956527
chr20 43956636 43956636
chr20 43958850 43958850
chr20 43958872 43958872
chr20 54959296 54959296
chr20 54961541 54961541
chr21 37518706 37518706
chr21 39752673 39752673
chr21 39753375 39753375
chr22 41568480 41568480
chr22 30038152 30038152
chr22 30079213 30079213
chrX 76937963 76937963
chrX 39922359 39922359
chrX 39932907 39932907
chrX 44938563 44938563
chrX 100608191 100608191
chrX 100611285 100611285
chrX 76940534 76940534

View File

@ -0,0 +1,128 @@
chr1 2488153 2488153
chr1 23885599 23885599
chr1 65321388 65321388
chr1 120612006 120612006
chr1 193094375 193094375
chr1 226570840 226570840
chr2 47693959 47693959
chr2 48030838 48030838
chr2 61749716 61749716
chr2 141032088 141032088
chr2 215645464 215645464
chr3 30686414 30686414
chr3 47125385 47125385
chr3 134644636 134644636
chr3 142281612 142281612
chr3 192053274 192053274
chr4 55599436 55599436
chr4 143007419 143007419
chr4 153252061 153252061
chr4 185310218 185310218
chr4 25666099 25666099
chr5 35857177 35857177
chr5 38955796 38955796
chr5 39074296 39074296
chr5 67522722 67522722
chr5 112162854 112162854
chr5 149435759 149435759
chr5 161119125 161119125
chr5 170819887 170819887
chr5 180046344 180046344
chr6 30858857 30858857
chr6 32797876 32797876
chr6 36645696 36645696
chr6 117678083 117678083
chr6 117714346 117714346
chr7 2946461 2946461
chr7 2962753 2962753
chr7 87138645 87138645
chr7 151970931 151970931
chr8 90955624 90955624
chr8 90970935 90970935
chr8 90995019 90995019
chr8 68864728 68864728
chr8 38322346 38322346
chr8 145742879 145742879
chr9 5081780 5081780
chr9 8389364 8389364
chr9 93657761 93657761
chr9 98209594 98209594
chr9 21991923 21991923
chr9 37020622 37020622
chr9 21975017 21975017
chr10 43600689 43600689
chr10 89720907 89720907
chr10 104596981 104596981
chr10 104849468 104849468
chr10 104855656 104855656
chr10 123239112 123239112
chr11 32410774 32410774
chr11 69462910 69462910
chr11 94197260 94197260
chr11 94225807 94225807
chr11 94225920 94225920
chr11 125497466 125497466
chr12 1040373 1040373
chr12 4553383 4553383
chr12 6711147 6711147
chr12 21331625 21331625
chr12 49427652 49427652
chr12 49444545 49444545
chr12 121416622 121416622
chr12 133202215 133202215
chr13 21562948 21562948
chr13 28609825 28609825
chr13 32936646 32936646
chr13 113889474 113889474
chr13 113907391 113907391
chr14 105239894 105239894
chr14 105246325 105246325
chr14 38064215 38064215
chr14 20820537 20820537
chr14 35871217 35871217
chr15 41865488 41865488
chr15 51502986 51502986
chr15 51529112 51529112
chr15 99456253 99456253
chr15 99478225 99478225
chr15 51558731 51558731
chr16 68857441 68857441
chr16 359953 359953
chr16 89805977 89805977
chr16 89838078 89838078
chr16 89857964 89857964
chr17 7983969 7983969
chr17 29486152 29486152
chr17 29508775 29508775
chr17 29546175 29546175
chr17 29559932 29559932
chr17 29653293 29653293
chr17 37879762 37879762
chr17 62007498 62007498
chr17 78919558 78919558
chr19 3110349 3110349
chr19 3119239 3119239
chr19 4101062 4101062
chr19 11136215 11136215
chr19 15289613 15289613
chr19 15295134 15295134
chr19 41725271 41725271
chr20 43956527 43956527
chr20 43956636 43956636
chr20 43958850 43958850
chr20 43958872 43958872
chr20 54959296 54959296
chr20 54961541 54961541
chr21 37518706 37518706
chr21 39752673 39752673
chr21 39753375 39753375
chr22 41568480 41568480
chr22 30038152 30038152
chr22 30079213 30079213
chrX 76937963 76937963
chrX 39922359 39922359
chrX 39932907 39932907
chrX 44938563 44938563
chrX 100608191 100608191
chrX 100611285 100611285
chrX 76940534 76940534

View File

@ -5,6 +5,15 @@ import os
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
def environ_test():
workflow_path = os.environ.get('WORKFLOW', None)
public_path = os.environ.get('PUBLIC', None)
database_path = os.environ.get('DATABASE', None)
if not (workflow_path and public_path and database_path):
raise UserWarning('未设置WORKFLOW, PUBLIC, DATABASE环境')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="JM to run pipeline")
@ -20,10 +29,13 @@ if __name__ == '__main__':
parser.add_argument('-node', '--start_node',
help="node begain to run; 'addQc', 'addAlignment', "
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
" 'addHcs, addTmb, addAutoReport' and also run more than one node ,like this 'addTarget,addFusion'",
" 'addHcs, addTmb, addAutoReport' and also run more than one "
"node ,like this 'addTarget,addFusion'",
default='addQc')
args = parser.parse_args()
environ_test()
res_path = os.path.realpath(os.path.join(args.output_dir, args.barcode))
if not os.path.exists(res_path):
@ -36,6 +48,6 @@ if __name__ == '__main__':
f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \
f'> {res_path}/{args.barcode}_run.log ' \
f'2>> {res_path}/{args.barcode}_run.log &'
with open(os.path.join(res_path, 'exec'), 'w') as execfile:
execfile.write(cmd + '\n')
# with open(os.path.join(res_path, 'exec'), 'w') as execfile:
# execfile.write(cmd + '\n')
os.system(cmd)

View File

@ -39,7 +39,7 @@ def get_catecode(project, start_node='addQc'):
}
runcode = get_branch_nodes(dag, start_node)
public = os.environ.get('PUBLIC', '/home/zhangchao/project/pipeline/workflow/script/public')
public = os.environ.get('DATABASE')
path = os.path.join(public, 'info.csv')
info = pd.read_csv(path)
project = info[info['project'] == project]['CateCode']
@ -83,15 +83,16 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
# run pipeline
cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public'
# cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
# cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public'
cmd3 = f'cd {output_dir}'
# f'{"-Dcall-caching.enabled=false " if uncache else ""}'
# f'-Dconfig.file=/home/zhangchao/project/pipeline/workflow/script/cromwell.examples.conf ' \
cmd4 = f'/usr/bin/java -jar /home/zhangchao/project/pipeline/workflow/software/cromwell-51.jar run --inputs {jsfile_path} {wdl}'
cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
cmd4 = f'/usr/bin/java -jar $WORKFLOW/software/cromwell-51.jar run --inputs {jsfile_path} {wdl}'
# cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
cmd = f'{cmd3}; {cmd4}'
# 记录开始时间
start_time = time.time()
@ -128,7 +129,8 @@ if __name__ == '__main__':
parser.add_argument('-node', '--start_node',
help="node begain to run; 'addQc', 'addAlignment', "
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
" 'addHcs, 'addTmb', addAutoReport' and also run more than one node ,like this 'addTarget,addFusion'",
" 'addHcs, 'addTmb', addAutoReport' and also run more than one node ,"
"like this 'addTarget,addFusion'",
default='addQc')
args = parser.parse_args()

13
script/soft_bash 100755
View File

@ -0,0 +1,13 @@
# 目录环境
export WORKFLOW=/home/zhangchao/project/pipeline/workflow
export PUBLIC=$WORKFLOW/script/public
export DATABASE=$WORKFLOW/database
# python
export PATH=/dataseq/product/workflow/software/Python-3.8.11/bin:$PATH
# script
export PATH=$WORKFLOW/script:$PATH
# software
export PATH=$WORKFLOW/software/bin:$PATH

View File

@ -9,7 +9,7 @@ my ($input, $output, $cancer_type) = @ARGV;
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Cnv药物注释使用路径$database_path\n";
open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
open THERAPY, "$database_path/targetTherapy.txt";
my $h1 = <THERAPY>;
chomp $h1;
my %therapy;
@ -21,7 +21,7 @@ while (<THERAPY>) {
##药物翻译信息
open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt";
open DRUG, "$database_path/target_drug.txt";
my %drug;
<DRUG>;
while (<DRUG>) {
@ -55,7 +55,7 @@ sub drug {
}
open DIS, "/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {

View File

@ -114,7 +114,7 @@ while (<IN>) {
($codon =~ /dup/) ? ("Exon $exon insertion") :
("Exon $exon mutation");
}
elsif ($record{'AAChange_refGene'} =~ /splicing/) {
elsif ($record{'Func_refGene'} =~ /splicing/) {
$protein = 'Truncating Mutations';
$mut_type = '';
}
@ -123,7 +123,7 @@ while (<IN>) {
$mut_type = '';
}
else {
print $_;
print "药物注释未匹配到正确的protein或者mut_type";
}
##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral写入@neg;若基因不存在于%therapy写入@vus;

0
wdl/alignment.wdl 100644 → 100755
View File

0
wdl/call_mutation.wdl 100644 → 100755
View File

2
wdl/chemo.wdl 100644 → 100755
View File

@ -10,7 +10,7 @@ task run_chemo {
if [ ! -d ${output_dir}/chemo ];then
mkdir ${output_dir}/chemo
fi
chemo.py -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
>>>
output {

0
wdl/cnv.wdl 100644 → 100755
View File

0
wdl/fusion.wdl 100644 → 100755
View File

2
wdl/hereditary.wdl 100644 → 100755
View File

@ -8,7 +8,7 @@ task run_hereditary {
if [ ! -d ${output_dir}/hereditary ];then
mkdir ${output_dir}/hereditary
fi
hereditary.py -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
>>>
output {

0
wdl/msi.wdl 100644 → 100755
View File

65
wdl/pollution.wdl 100755
View File

@ -0,0 +1,65 @@
task run_pollution {
String name
String output_dir
String probe
String vcf
String? vcf2
command <<<
if [ ! -d ${output_dir}/pollution ];then
mkdir ${output_dir}/pollution
fi
pollution.py -n ${name} \
-v ${vcf} \
-v2 ${vcf2} \
-o ${output_dir}/pollution \
-p ${probe} \
-b $PUBLIC/pollution/${probe}_contaminate_ref.bed \
-c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
>>>
output {
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
}
}
workflow call_pollution {
Boolean run=true
String tumor
String? normal
String output_dir
String probe
String raw_vcf
String somatic_vcf
String germline_vcf
if (run) {
if (defined(normal)) {
call run_pollution as run_pollution_paired {
input:
name=tumor,
output_dir=output_dir,
probe=probe,
vcf=somatic_vcf,
vcf2=germline_vcf
}
}
if (!defined(normal)) {
call run_pollution as run_pollution_single {
input:
name=tumor,
output_dir=output_dir,
probe=probe,
vcf=raw_vcf
}
}
}
output {
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
}
}

3
wdl/postprocess.wdl 100644 → 100755
View File

@ -33,6 +33,7 @@ workflow call_postprocess {
String? cnv
String? msi
String? hereditary
String? pollution
String? chemo
String name
String? normal
@ -48,9 +49,9 @@ workflow call_postprocess {
msi=msi,
hereditary=hereditary,
chemo=chemo,
pollution=pollution,
name=name,
normal=normal,
normal=normal,
output_dir=output_dir,
cancer=cancer
}

0
wdl/qc.wdl 100644 → 100755
View File

0
wdl/statistics.wdl 100644 → 100755
View File

0
wdl/tmb.wdl 100644 → 100755
View File