污染处理
parent
73e90f8a5b
commit
1922d14b76
|
|
@ -4,51 +4,82 @@ import argparse
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import pysam
|
||||||
|
|
||||||
|
|
||||||
def single_monitoring(name, vcf, bed, freq_range, output_dir):
|
def load_bed_regions(bed_file):
|
||||||
vcf_header = [] # 用于存储VCF文件头部分
|
bed_regions = set()
|
||||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
if bed_file:
|
||||||
p_value_list = [] # 用于存储符合条件的 p-value
|
with open(bed_file, 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
parts = line.strip().split('\t')
|
||||||
|
if len(parts) >= 3:
|
||||||
|
chrom, start, end = parts[0], int(parts[1]), int(parts[2])
|
||||||
|
bed_regions.add((chrom, start, end))
|
||||||
|
return bed_regions
|
||||||
|
|
||||||
# 按行处理,保存为列表
|
|
||||||
with open(vcf, 'r') as vcf_file:
|
def build_record_dict(vcf_file):
|
||||||
for line in vcf_file:
|
records = {}
|
||||||
if line.startswith("#"):
|
for record in vcf_file:
|
||||||
vcf_header.append(line)
|
key = (record.contig, record.pos)
|
||||||
else:
|
records[key] = record
|
||||||
vcf_data.append(line)
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def simplify_genotype(freq):
|
||||||
|
if freq < 0.1:
|
||||||
|
return '0/0'
|
||||||
|
elif 0.1 <= freq <= 0.9:
|
||||||
|
return '0/1'
|
||||||
|
else:
|
||||||
|
return '1/1'
|
||||||
|
|
||||||
|
|
||||||
|
def single_monitor(name, vcf_file, bed_file, freq_range, output_dir):
|
||||||
|
"""
|
||||||
|
17, 160 单样本处理
|
||||||
|
rawvcf 对应bed 挑选出 位点,计算位点是否在给定范围内,计算比例得到z_socre
|
||||||
|
"""
|
||||||
|
|
||||||
# bed 处理
|
# bed 处理
|
||||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
bed_regions = load_bed_regions(bed_file)
|
||||||
position_list = list(df_position[2])
|
|
||||||
chr_list = list(df_position[0])
|
|
||||||
result_data = []
|
|
||||||
|
|
||||||
for i in range(len(position_list)):
|
|
||||||
filtered_lines = [line for line in vcf_data if
|
|
||||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
|
||||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
|
||||||
for line in filtered_lines:
|
|
||||||
p_value_str = line.split()[9].split(":")[4]
|
|
||||||
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
|
||||||
if 0.1 <= p_value <= 0.9:
|
|
||||||
result_data.append(line)
|
|
||||||
p_value_list.append(p_value)
|
|
||||||
select_vcf = os.path.join(output_dir, f'{name}_select.vcf')
|
|
||||||
with open(select_vcf, 'w') as output:
|
|
||||||
for header_line in vcf_header:
|
|
||||||
output.write(header_line)
|
|
||||||
for data_line in result_data:
|
|
||||||
output.write(data_line)
|
|
||||||
|
|
||||||
|
vcf = pysam.VariantFile(vcf_file)
|
||||||
|
res_pos = list()
|
||||||
count_normal = 0
|
count_normal = 0
|
||||||
count_exception = 0
|
count_exception = 0
|
||||||
for p_value in p_value_list:
|
|
||||||
if freq_range[0] <= p_value <= freq_range[1]:
|
for record in vcf:
|
||||||
count_normal += 1
|
contig = record.contig
|
||||||
else:
|
pos = record.pos
|
||||||
count_exception += 1
|
ref = record.ref
|
||||||
|
alt = record.alts[0]
|
||||||
|
freq = record.samples.get(record.samples.keys()[0]).get("AF")[0]
|
||||||
|
|
||||||
|
if freq < 0.1 or freq > 0.9:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 检查记录是否是点突变
|
||||||
|
if len(record.ref) == 1 and len(record.alts[0]) == 1:
|
||||||
|
# 在bed 区间内
|
||||||
|
for (bed_chrom, bed_start, bed_end) in bed_regions:
|
||||||
|
if contig == bed_chrom and bed_start < pos <= bed_end:
|
||||||
|
if freq_range[0] <= freq <= freq_range[1]:
|
||||||
|
count_normal += 1
|
||||||
|
res = 'yes'
|
||||||
|
else:
|
||||||
|
count_exception += 1
|
||||||
|
res = 'no'
|
||||||
|
res_pos.append(dict(
|
||||||
|
chr=contig,
|
||||||
|
pos=pos,
|
||||||
|
ref=ref,
|
||||||
|
alt=alt,
|
||||||
|
freq=freq,
|
||||||
|
res=res
|
||||||
|
))
|
||||||
|
break
|
||||||
count_all = count_exception + count_normal
|
count_all = count_exception + count_normal
|
||||||
if count_all == 0:
|
if count_all == 0:
|
||||||
z_score = 0
|
z_score = 0
|
||||||
|
|
@ -61,212 +92,160 @@ def single_monitoring(name, vcf, bed, freq_range, output_dir):
|
||||||
count_exception=count_exception,
|
count_exception=count_exception,
|
||||||
z_score=z_score
|
z_score=z_score
|
||||||
)
|
)
|
||||||
pd.DataFrame([res]).to_csv(os.path.join(output_dir, f'{name}_pollution.csv'), sep="\t", index=False)
|
pd.DataFrame([res]).to_csv(os.path.join(output_dir, f'{name}_pollution_sum.txt'), sep="\t", index=False)
|
||||||
|
|
||||||
|
res_pos_df = pd.DataFrame(res_pos)
|
||||||
|
if not res_pos_df.empty:
|
||||||
|
res_pos_df.rename(columns={
|
||||||
|
'res': f'res{freq_range}',
|
||||||
|
}, inplace=True)
|
||||||
|
res_pos_df.to_csv(os.path.join(output_dir, f'{name}_pollution_pos.txt'), sep="\t", index=False)
|
||||||
|
|
||||||
|
|
||||||
# 根据小bed筛选vcf
|
def paired_monitor(name, vcf_file, bed_file, cnvkit_bed, output_dir):
|
||||||
def select_position(vcf, bed, matched_file, unmatched_file):
|
"""
|
||||||
vcf_header = [] # 用于存储VCF文件头部分
|
682双样本处理
|
||||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
"""
|
||||||
|
bed_regions = load_bed_regions(bed_file)
|
||||||
|
cnvkit_bed_regions = load_bed_regions(cnvkit_bed)
|
||||||
|
vcf = pysam.VariantFile(vcf_file)
|
||||||
|
records_dict = build_record_dict(vcf)
|
||||||
|
res_pos = list()
|
||||||
|
|
||||||
# 按行处理,保存为列表
|
# 遍历 bed
|
||||||
with open(vcf, 'r') as vcf_file:
|
for (bed_chrom, bed_start, bed_end) in bed_regions:
|
||||||
for line in vcf_file:
|
key = (bed_chrom, bed_end)
|
||||||
if line.startswith("#"):
|
ref = '.'
|
||||||
vcf_header.append(line)
|
alt = '.'
|
||||||
else:
|
freq = '.'
|
||||||
vcf_data.append(line)
|
genotype = '.'
|
||||||
|
ref_normal = '.'
|
||||||
|
alt_normal = '.'
|
||||||
|
freq_normal = '.'
|
||||||
|
genotype_normal = '.'
|
||||||
|
res = 'no'
|
||||||
|
if key in records_dict:
|
||||||
|
record = records_dict[key]
|
||||||
|
ref = record.ref
|
||||||
|
alt = record.alts[0]
|
||||||
|
freq = record.samples.get(record.samples.keys()[0]).get("AF")[0]
|
||||||
|
freq_normal = record.samples.get(record.samples.keys()[1]).get("AF")[0]
|
||||||
|
ref_normal = ref
|
||||||
|
alt_normal = alt
|
||||||
|
|
||||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
genotype = simplify_genotype(freq)
|
||||||
position_list = list(df_position[2])
|
genotype_normal = simplify_genotype(freq_normal)
|
||||||
chr_list = list(df_position[0])
|
|
||||||
result_data = []
|
|
||||||
unmatched_data = [] # 用于存储未匹配的数据
|
|
||||||
|
|
||||||
for i in range(len(position_list)):
|
if genotype == genotype_normal:
|
||||||
filtered_lines = [line for line in vcf_data if
|
res = 'yes'
|
||||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
res_pos.append(dict(
|
||||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
chr=bed_chrom,
|
||||||
if not filtered_lines: # 如果没有匹配的点,添加到未匹配数据列表
|
pos=bed_end,
|
||||||
unmatched_data.append(f"{chr_list[i]}\t{position_list[i]}\t.\t.\t.\t.\t.\n")
|
ref=ref,
|
||||||
result_data.extend(filtered_lines)
|
alt=alt,
|
||||||
|
freq=freq,
|
||||||
|
genotype=genotype,
|
||||||
|
ref_normal=ref_normal,
|
||||||
|
alt_normal=alt_normal,
|
||||||
|
freq_normal=freq_normal,
|
||||||
|
genotype_normal=genotype_normal,
|
||||||
|
res=res
|
||||||
|
))
|
||||||
|
res_pos_df = pd.DataFrame(res_pos)
|
||||||
|
res_pos_df_sort = res_pos_df.sort_values(by='chr')
|
||||||
|
res_pos_df_sort.to_csv(os.path.join(output_dir, f'{name}_pollution_pos.txt'), sep="\t", index=False)
|
||||||
|
|
||||||
with open(matched_file, 'w') as output:
|
output_tumor_vcf = open(os.path.join(output_dir, f'{name}_cnvkit_tumor.vcf'), 'w')
|
||||||
for header_line in vcf_header:
|
output_normal_vcf = open(os.path.join(output_dir, f'{name}_cnvkit_normal.vcf'), 'w')
|
||||||
output.write(header_line)
|
for line in str(vcf.header).strip().split('\n'):
|
||||||
for data_line in result_data:
|
output_tumor_vcf.write(line + '\n') if line.startswith('##') else output_tumor_vcf.write(
|
||||||
output.write(data_line)
|
'\t'.join(line.split('\t')[:-1]) + '\n')
|
||||||
|
output_normal_vcf.write(line + '\n') if line.startswith('##') else output_normal_vcf.write(
|
||||||
|
'\t'.join(line.split('\t')[:-2] + line.split('\t')[-1:]) + '\n')
|
||||||
|
|
||||||
with open(unmatched_file, 'w') as unmatched_output:
|
for (bed_chrom, bed_start, bed_end) in cnvkit_bed_regions:
|
||||||
unmatched_output.writelines(unmatched_data)
|
key = (bed_chrom, bed_end)
|
||||||
|
if key in records_dict:
|
||||||
|
record = records_dict[key]
|
||||||
|
output_tumor_vcf.write('\t'.join(str(record).split('\t')[:-1]) + '\n')
|
||||||
|
output_normal_vcf.write('\t'.join(str(record).split('\t')[:-2] + str(record).split('\t')[-1:]) + '\n')
|
||||||
|
|
||||||
return matched_file, unmatched_file
|
output_tumor_vcf.close()
|
||||||
|
output_normal_vcf.close()
|
||||||
|
|
||||||
|
|
||||||
# 处理体系、胚系的vcf,得到目标信息
|
def paired_monitor_vcf(name, tumor_vcf_file, normal_vcf_file, bed_file, cnvkit_bed, output_dir):
|
||||||
def process_judge_vcf(input_vcf, output_vcf):
|
"""
|
||||||
with open(input_vcf, 'r') as input_file, open(output_vcf, 'w') as output_file:
|
624双样本处理
|
||||||
for line in input_file:
|
"""
|
||||||
if not line.startswith("#"):
|
bed_regions = load_bed_regions(bed_file)
|
||||||
fields = line.strip().split('\t')
|
tumor_vcf = pysam.VariantFile(tumor_vcf_file)
|
||||||
|
normal_vcf = pysam.VariantFile(normal_vcf_file)
|
||||||
|
|
||||||
vcf_format = fields[8].split(":")
|
tumor_records_dict = build_record_dict(tumor_vcf)
|
||||||
vcf_normal = fields[-1].split(":")
|
normal_records_dict = build_record_dict(normal_vcf)
|
||||||
vcf_info = dict(zip(vcf_format, vcf_normal))
|
|
||||||
af = vcf_info['AF']
|
|
||||||
vd = vcf_info['VD']
|
|
||||||
# info = fields[9].split(":")
|
|
||||||
# percentage = float(info[6])
|
|
||||||
percentage = float(af)
|
|
||||||
if 0.1 <= percentage <= 0.9:
|
|
||||||
b = 0.5
|
|
||||||
elif percentage < 0.1:
|
|
||||||
b = 0
|
|
||||||
elif percentage > 0.9:
|
|
||||||
b = 1
|
|
||||||
|
|
||||||
# 构建新的行数据
|
res_pos = list()
|
||||||
new_line = '\t'.join([fields[0], fields[1], fields[3], fields[4], af, str(b), vd])
|
|
||||||
output_file.write(new_line + '\n')
|
|
||||||
return output_vcf
|
|
||||||
|
|
||||||
|
# 遍历 bed
|
||||||
|
for (bed_chrom, bed_start, bed_end) in bed_regions:
|
||||||
|
key = (bed_chrom, bed_end)
|
||||||
|
ref = '.'
|
||||||
|
alt = '.'
|
||||||
|
freq = '.'
|
||||||
|
genotype = '.'
|
||||||
|
ref_normal = '.'
|
||||||
|
alt_normal = '.'
|
||||||
|
freq_normal = '.'
|
||||||
|
genotype_normal = '.'
|
||||||
|
res = 'no'
|
||||||
|
|
||||||
def merge_and_sort_files(matched_file, unmatched_file, output_file):
|
if key in tumor_records_dict:
|
||||||
# 检查 unmatched_file 是否为空
|
record = tumor_records_dict[key]
|
||||||
if os.stat(unmatched_file).st_size == 0:
|
ref = record.ref
|
||||||
# 对 matched_file 进行排序并写入 output_file
|
alt = record.alts[0]
|
||||||
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
freq = record.samples.get(record.samples.keys()[0]).get("AF")[0]
|
||||||
sorted_df = matched_df.sort_values(by=[0, 1])
|
genotype = simplify_genotype(freq)
|
||||||
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
|
if key in normal_records_dict:
|
||||||
return output_file
|
record = normal_records_dict[key]
|
||||||
|
ref_normal = record.ref
|
||||||
|
alt_normal = record.alts[0]
|
||||||
|
freq_normal = record.samples.get(record.samples.keys()[0]).get("AF")[0]
|
||||||
|
genotype_normal = simplify_genotype(freq_normal)
|
||||||
|
if genotype == genotype_normal:
|
||||||
|
res = 'yes'
|
||||||
|
res_pos.append(dict(
|
||||||
|
chr=bed_chrom,
|
||||||
|
pos=bed_end,
|
||||||
|
ref=ref,
|
||||||
|
alt=alt,
|
||||||
|
freq=freq,
|
||||||
|
genotype=genotype,
|
||||||
|
ref_normal=ref_normal,
|
||||||
|
alt_normal=alt_normal,
|
||||||
|
freq_normal=freq_normal,
|
||||||
|
genotype_normal=genotype_normal,
|
||||||
|
res=res
|
||||||
|
))
|
||||||
|
|
||||||
# 如果 unmatched_file 不为空,继续合并和排序操作
|
res_pos_df = pd.DataFrame(res_pos)
|
||||||
if os.stat(matched_file).st_size == 0:
|
res_pos_df_sort = res_pos_df.sort_values(by='chr')
|
||||||
matched_df = pd.DataFrame()
|
res_pos_df_sort.to_csv(os.path.join(output_dir, f'{name}_pollution_pos.txt'), sep="\t", index=False)
|
||||||
else:
|
|
||||||
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
|
||||||
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
|
|
||||||
|
|
||||||
# 合并数据帧
|
output_tumor_vcf = pysam.VariantFile(os.path.join(output_dir, f'{name}_cnvkit_tumor.vcf'), 'w',
|
||||||
combined_df = pd.concat([matched_df, unmatched_df])
|
header=tumor_vcf.header)
|
||||||
|
output_normal_vcf = pysam.VariantFile(os.path.join(output_dir, f'{name}_cnvkit_normal.vcf'), 'w',
|
||||||
|
header=normal_vcf.header)
|
||||||
|
|
||||||
# 根据第一列和第二列排序
|
cnvkit_bed_regions = load_bed_regions(cnvkit_bed)
|
||||||
sorted_df = combined_df.sort_values(by=[0, 1])
|
for (bed_chrom, bed_start, bed_end) in cnvkit_bed_regions:
|
||||||
|
key = (bed_chrom, bed_end)
|
||||||
# 将排序后的数据写入输出文件
|
if key in normal_records_dict:
|
||||||
sorted_df.to_csv(output_file, sep='\t', header=False, index=False)
|
output_normal_vcf.write(normal_records_dict[key])
|
||||||
return output_file
|
if key in tumor_records_dict:
|
||||||
|
output_tumor_vcf.write(tumor_records_dict[key])
|
||||||
|
output_tumor_vcf.close()
|
||||||
# 合并体系,胚系
|
output_normal_vcf.close()
|
||||||
def merge_and_compare_files(somatic_file, germline_file, output_merged_file, output_final_file):
|
|
||||||
# 合并两个文件
|
|
||||||
with open(somatic_file, 'r') as somatic, open(germline_file, 'r') as germline:
|
|
||||||
merged_lines = [f"{somatic_line.strip()}\t{germline_line.strip()}" for somatic_line, germline_line in
|
|
||||||
zip(somatic, germline)]
|
|
||||||
|
|
||||||
# 将合并后的数据写入输出文件
|
|
||||||
with open(output_merged_file, 'w') as output_file:
|
|
||||||
output_file.write('\n'.join(merged_lines))
|
|
||||||
|
|
||||||
# 比较两列数据并添加比较结果列
|
|
||||||
with open(output_merged_file, 'r') as merged, open(output_final_file, 'w') as final_output:
|
|
||||||
for line in merged:
|
|
||||||
fields = line.strip().split('\t')
|
|
||||||
if fields[5] == fields[12]:
|
|
||||||
comparison_result = "yes"
|
|
||||||
else:
|
|
||||||
comparison_result = "no"
|
|
||||||
final_output.write(f"{line.strip()}\t{comparison_result}\n")
|
|
||||||
return output_merged_file, output_final_file
|
|
||||||
|
|
||||||
|
|
||||||
# 根据大bed筛选vcf,作cnvkit的图
|
|
||||||
def select_cnvkit_vcf(vcf, bed, output_file):
|
|
||||||
vcf_header = [] # 用于存储VCF文件头部分
|
|
||||||
vcf_data = [] # 用于存储筛选后的VCF数据
|
|
||||||
p_value_list = [] # 用于存储符合条件的 p-value
|
|
||||||
|
|
||||||
# 按行处理,保存为列表
|
|
||||||
with open(vcf, 'r') as vcf_file:
|
|
||||||
for line in vcf_file:
|
|
||||||
if line.startswith("#"):
|
|
||||||
vcf_header.append(line)
|
|
||||||
else:
|
|
||||||
vcf_data.append(line)
|
|
||||||
|
|
||||||
df_position = pd.read_csv(bed, sep='\t', header=None, names=range(4))
|
|
||||||
position_list = list(df_position[2])
|
|
||||||
chr_list = list(df_position[0])
|
|
||||||
result_data = []
|
|
||||||
for i in range(len(position_list)):
|
|
||||||
filtered_lines = [line for line in vcf_data if
|
|
||||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
|
||||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
|
||||||
for line in filtered_lines:
|
|
||||||
# p_value_str = line.split()[9].split(":")[6]
|
|
||||||
# p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
|
||||||
|
|
||||||
vcf_format = line.split('\t')[8].split(":")
|
|
||||||
vcf_normal = line.split('\t')[-1].split(":")
|
|
||||||
vcf_info = dict(zip(vcf_format, vcf_normal))
|
|
||||||
p_value = float(vcf_info['AF'])
|
|
||||||
|
|
||||||
if 0.1 <= p_value <= 0.9:
|
|
||||||
result_data.append(line)
|
|
||||||
|
|
||||||
with open(output_file, 'w') as output:
|
|
||||||
for header_line in vcf_header:
|
|
||||||
output.write(header_line)
|
|
||||||
for data_line in result_data:
|
|
||||||
output.write(data_line)
|
|
||||||
|
|
||||||
return output_file
|
|
||||||
|
|
||||||
|
|
||||||
def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed, output_dir):
|
|
||||||
# 处理体系,根据bed筛选
|
|
||||||
select_position_output_file1 = os.path.join(output_dir, f'{name}_somatic_matched.vcf')
|
|
||||||
select_position_output_file2 = os.path.join(output_dir, f'{name}_somatic_unmatched.vcf')
|
|
||||||
somatic_matched_file, somatic_unmatched_file = select_position(somatic_vcf, ref_bed, select_position_output_file1,
|
|
||||||
select_position_output_file2)
|
|
||||||
# 处理胚系,根据bed筛选
|
|
||||||
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
|
|
||||||
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
|
|
||||||
germline_matched_file, germline_unmatched_file = select_position(germline_vcf, ref_bed,
|
|
||||||
select_position_output_file3,
|
|
||||||
select_position_output_file4)
|
|
||||||
# 处理体系,数值转换
|
|
||||||
process_judge_vcf_file1 = os.path.join(output_dir, f'{name}_somatic_matched_add_judge.vcf')
|
|
||||||
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
|
|
||||||
# 处理胚系,数值转换
|
|
||||||
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
|
|
||||||
germline_matched_add_judge_file = process_judge_vcf(germline_matched_file, process_judge_vcf_file2)
|
|
||||||
# 合并体系,将匹配到的和未匹配到bed的的合并
|
|
||||||
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
|
|
||||||
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
|
|
||||||
merge_and_sort_files_file1)
|
|
||||||
# 合并胚系,将匹配到的和未匹配到bed的的合并
|
|
||||||
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
|
|
||||||
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, germline_unmatched_file,
|
|
||||||
merge_and_sort_files_file2)
|
|
||||||
# 合并胚系,体系,将体系,胚系两个合并文件再合并
|
|
||||||
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')
|
|
||||||
result_file = os.path.join(output_dir, f'{name}_contaminate_result.txt')
|
|
||||||
merge_and_compare_files(somatic_merged_file, Germline_merged_file, result_pro_file, result_file)
|
|
||||||
##筛选作图vcf
|
|
||||||
cnvkit_output_file = os.path.join(output_dir, f'{name}_select_cnvkit.vcf')
|
|
||||||
select_cnvkit_vcf(germline_vcf, cnvkit_ref_bed, cnvkit_output_file)
|
|
||||||
##删除中间文件
|
|
||||||
os.remove(select_position_output_file1)
|
|
||||||
os.remove(select_position_output_file2)
|
|
||||||
os.remove(select_position_output_file3)
|
|
||||||
os.remove(select_position_output_file4)
|
|
||||||
os.remove(process_judge_vcf_file1)
|
|
||||||
os.remove(process_judge_vcf_file2)
|
|
||||||
os.remove(merge_and_sort_files_file1)
|
|
||||||
os.remove(merge_and_sort_files_file2)
|
|
||||||
os.remove(result_pro_file)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
@ -276,7 +255,8 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-b', '--ref_bed', help="ref_bed", required=True)
|
parser.add_argument('-b', '--ref_bed', help="ref_bed", required=True)
|
||||||
parser.add_argument('-v', '--vcf', help="raw vcf for prbe 160 or 17 ; somatic vcf for prbe 682 or 624",
|
parser.add_argument('-v', '--vcf', help="raw vcf for prbe 160 or 17 ; somatic vcf for prbe 682 or 624",
|
||||||
required=True)
|
required=True)
|
||||||
parser.add_argument('-v2', '--vcf2', help="germline vcf; required when prbe 682 or 624", default='', required=False, nargs='?')
|
parser.add_argument('-v2', '--vcf2', help="germline vcf; required when prbe 682 or 624", default='', required=False,
|
||||||
|
nargs='?')
|
||||||
parser.add_argument('-c', '--cnvkit_bed', help="cnvkit_bed; required when prbe 682 or 624")
|
parser.add_argument('-c', '--cnvkit_bed', help="cnvkit_bed; required when prbe 682 or 624")
|
||||||
parser.add_argument('-p', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
parser.add_argument('-p', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
||||||
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
|
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
|
||||||
|
|
@ -288,15 +268,18 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
if probe == '160' or probe == '17':
|
if probe == '160' or probe == '17':
|
||||||
freq_range = {"17": [0.3452, 0.6512], "160": [0.2930, 0.6753]}.get(probe)
|
freq_range = {"17": [0.3452, 0.6512], "160": [0.2930, 0.6753]}.get(probe)
|
||||||
single_monitoring(args.name, args.vcf, bed_path, freq_range, args.output_dir)
|
single_monitor(args.name, args.vcf, bed_path, freq_range, args.output_dir)
|
||||||
elif probe == '682' or probe == '624':
|
elif probe == '682':
|
||||||
if not args.vcf2:
|
|
||||||
parser.error('--vcf2 is required in prbe 682 or 624')
|
|
||||||
if not args.cnvkit_bed:
|
if not args.cnvkit_bed:
|
||||||
parser.error('--cnvkit_bed is required in prbe 682 or 624')
|
parser.error('--cnvkit_bed is required in probe 624')
|
||||||
cnvkit_bed_path = os.path.realpath(args.cnvkit_bed)
|
cnvkit_bed_path = os.path.realpath(args.cnvkit_bed)
|
||||||
print(f'污染检测使用cnvkit_bed: {cnvkit_bed_path}')
|
print(f'污染检测使用cnvkit_bed: {cnvkit_bed_path}')
|
||||||
paired_monitoring(args.name, args.vcf, args.vcf2, bed_path, args.cnvkit_bed, args.output_dir)
|
paired_monitor(args.name, args.vcf, bed_path, args.cnvkit_bed, args.output_dir)
|
||||||
|
elif probe == '624':
|
||||||
|
if not args.vcf2:
|
||||||
|
parser.error('--vcf2 is required in probe 624')
|
||||||
|
if not args.cnvkit_bed:
|
||||||
|
parser.error('--cnvkit_bed is required in probe 624')
|
||||||
|
paired_monitor_vcf(args.name, args.vcf, args.vcf2, bed_path, args.cnvkit_bed, args.output_dir)
|
||||||
else:
|
else:
|
||||||
parser.error('probe error. 682, 624, 160, 17 for now')
|
parser.error('probe error. 682, 624, 160, 17 for now')
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -1,128 +1,128 @@
|
||||||
chr1 2488153 2488153
|
chr1 2488152 2488153
|
||||||
chr1 23885599 23885599
|
chr1 23885598 23885599
|
||||||
chr1 65321388 65321388
|
chr1 65321387 65321388
|
||||||
chr1 120612006 120612006
|
chr1 120612005 120612006
|
||||||
chr1 193094375 193094375
|
chr1 193094374 193094375
|
||||||
chr1 226570840 226570840
|
chr1 226570839 226570840
|
||||||
chr2 47693959 47693959
|
chr2 47693958 47693959
|
||||||
chr2 48030838 48030838
|
chr2 48030837 48030838
|
||||||
chr2 61749716 61749716
|
chr2 61749715 61749716
|
||||||
chr2 141032088 141032088
|
chr2 141032087 141032088
|
||||||
chr2 215645464 215645464
|
chr2 215645463 215645464
|
||||||
chr3 30686414 30686414
|
chr3 30686413 30686414
|
||||||
chr3 47125385 47125385
|
chr3 47125384 47125385
|
||||||
chr3 134644636 134644636
|
chr3 134644635 134644636
|
||||||
chr3 142281612 142281612
|
chr3 142281611 142281612
|
||||||
chr3 192053274 192053274
|
chr3 192053273 192053274
|
||||||
chr4 55599436 55599436
|
chr4 55599435 55599436
|
||||||
chr4 143007419 143007419
|
chr4 143007418 143007419
|
||||||
chr4 153252061 153252061
|
chr4 153252060 153252061
|
||||||
chr4 185310218 185310218
|
chr4 185310217 185310218
|
||||||
chr4 25666099 25666099
|
chr4 25666098 25666099
|
||||||
chr5 35857177 35857177
|
chr5 35857176 35857177
|
||||||
chr5 38955796 38955796
|
chr5 38955795 38955796
|
||||||
chr5 39074296 39074296
|
chr5 39074295 39074296
|
||||||
chr5 67522722 67522722
|
chr5 67522721 67522722
|
||||||
chr5 112162854 112162854
|
chr5 112162853 112162854
|
||||||
chr5 149435759 149435759
|
chr5 149435758 149435759
|
||||||
chr5 161119125 161119125
|
chr5 161119124 161119125
|
||||||
chr5 170819887 170819887
|
chr5 170819886 170819887
|
||||||
chr5 180046344 180046344
|
chr5 180046343 180046344
|
||||||
chr6 30858857 30858857
|
chr6 30858856 30858857
|
||||||
chr6 32797876 32797876
|
chr6 32797875 32797876
|
||||||
chr6 36645696 36645696
|
chr6 36645695 36645696
|
||||||
chr6 117678083 117678083
|
chr6 117678082 117678083
|
||||||
chr6 117714346 117714346
|
chr6 117714345 117714346
|
||||||
chr7 2946461 2946461
|
chr7 2946460 2946461
|
||||||
chr7 2962753 2962753
|
chr7 2962752 2962753
|
||||||
chr7 87138645 87138645
|
chr7 87138644 87138645
|
||||||
chr7 151970931 151970931
|
chr7 151970930 151970931
|
||||||
chr8 90955624 90955624
|
chr8 90955623 90955624
|
||||||
chr8 90970935 90970935
|
chr8 90970934 90970935
|
||||||
chr8 90995019 90995019
|
chr8 90995018 90995019
|
||||||
chr8 68864728 68864728
|
chr8 68864727 68864728
|
||||||
chr8 38322346 38322346
|
chr8 38322345 38322346
|
||||||
chr8 145742879 145742879
|
chr8 145742878 145742879
|
||||||
chr9 5081780 5081780
|
chr9 5081779 5081780
|
||||||
chr9 8389364 8389364
|
chr9 8389363 8389364
|
||||||
chr9 93657761 93657761
|
chr9 93657760 93657761
|
||||||
chr9 98209594 98209594
|
chr9 98209593 98209594
|
||||||
chr9 21991923 21991923
|
chr9 21991922 21991923
|
||||||
chr9 37020622 37020622
|
chr9 37020621 37020622
|
||||||
chr9 21975017 21975017
|
chr9 21975016 21975017
|
||||||
chr10 43600689 43600689
|
chr10 43600688 43600689
|
||||||
chr10 89720907 89720907
|
chr10 89720906 89720907
|
||||||
chr10 104596981 104596981
|
chr10 104596980 104596981
|
||||||
chr10 104849468 104849468
|
chr10 104849467 104849468
|
||||||
chr10 104855656 104855656
|
chr10 104855655 104855656
|
||||||
chr10 123239112 123239112
|
chr10 123239111 123239112
|
||||||
chr11 32410774 32410774
|
chr11 32410773 32410774
|
||||||
chr11 69462910 69462910
|
chr11 69462909 69462910
|
||||||
chr11 94197260 94197260
|
chr11 94197259 94197260
|
||||||
chr11 94225807 94225807
|
chr11 94225806 94225807
|
||||||
chr11 94225920 94225920
|
chr11 94225919 94225920
|
||||||
chr11 125497466 125497466
|
chr11 125497465 125497466
|
||||||
chr12 1040373 1040373
|
chr12 1040372 1040373
|
||||||
chr12 4553383 4553383
|
chr12 4553382 4553383
|
||||||
chr12 6711147 6711147
|
chr12 6711146 6711147
|
||||||
chr12 21331625 21331625
|
chr12 21331624 21331625
|
||||||
chr12 49427652 49427652
|
chr12 49427651 49427652
|
||||||
chr12 49444545 49444545
|
chr12 49444544 49444545
|
||||||
chr12 121416622 121416622
|
chr12 121416621 121416622
|
||||||
chr12 133202215 133202215
|
chr12 133202214 133202215
|
||||||
chr13 21562948 21562948
|
chr13 21562947 21562948
|
||||||
chr13 28609825 28609825
|
chr13 28609824 28609825
|
||||||
chr13 32936646 32936646
|
chr13 32936645 32936646
|
||||||
chr13 113889474 113889474
|
chr13 113889473 113889474
|
||||||
chr13 113907391 113907391
|
chr13 113907390 113907391
|
||||||
chr14 105239894 105239894
|
chr14 105239893 105239894
|
||||||
chr14 105246325 105246325
|
chr14 105246324 105246325
|
||||||
chr14 38064215 38064215
|
chr14 38064214 38064215
|
||||||
chr14 20820537 20820537
|
chr14 20820536 20820537
|
||||||
chr14 35871217 35871217
|
chr14 35871216 35871217
|
||||||
chr15 41865488 41865488
|
chr15 41865487 41865488
|
||||||
chr15 51502986 51502986
|
chr15 51502985 51502986
|
||||||
chr15 51529112 51529112
|
chr15 51529111 51529112
|
||||||
chr15 99456253 99456253
|
chr15 99456252 99456253
|
||||||
chr15 99478225 99478225
|
chr15 99478224 99478225
|
||||||
chr15 51558731 51558731
|
chr15 51558730 51558731
|
||||||
chr16 68857441 68857441
|
chr16 68857440 68857441
|
||||||
chr16 359953 359953
|
chr16 359952 359953
|
||||||
chr16 89805977 89805977
|
chr16 89805976 89805977
|
||||||
chr16 89838078 89838078
|
chr16 89838077 89838078
|
||||||
chr16 89857964 89857964
|
chr16 89857963 89857964
|
||||||
chr17 7983969 7983969
|
chr17 7983968 7983969
|
||||||
chr17 29486152 29486152
|
chr17 29486151 29486152
|
||||||
chr17 29508775 29508775
|
chr17 29508774 29508775
|
||||||
chr17 29546175 29546175
|
chr17 29546174 29546175
|
||||||
chr17 29559932 29559932
|
chr17 29559931 29559932
|
||||||
chr17 29653293 29653293
|
chr17 29653292 29653293
|
||||||
chr17 37879762 37879762
|
chr17 37879761 37879762
|
||||||
chr17 62007498 62007498
|
chr17 62007497 62007498
|
||||||
chr17 78919558 78919558
|
chr17 78919557 78919558
|
||||||
chr19 3110349 3110349
|
chr19 3110348 3110349
|
||||||
chr19 3119239 3119239
|
chr19 3119238 3119239
|
||||||
chr19 4101062 4101062
|
chr19 4101061 4101062
|
||||||
chr19 11136215 11136215
|
chr19 11136214 11136215
|
||||||
chr19 15289613 15289613
|
chr19 15289612 15289613
|
||||||
chr19 15295134 15295134
|
chr19 15295133 15295134
|
||||||
chr19 41725271 41725271
|
chr19 41725270 41725271
|
||||||
chr20 43956527 43956527
|
chr20 43956526 43956527
|
||||||
chr20 43956636 43956636
|
chr20 43956635 43956636
|
||||||
chr20 43958850 43958850
|
chr20 43958849 43958850
|
||||||
chr20 43958872 43958872
|
chr20 43958871 43958872
|
||||||
chr20 54959296 54959296
|
chr20 54959295 54959296
|
||||||
chr20 54961541 54961541
|
chr20 54961540 54961541
|
||||||
chr21 37518706 37518706
|
chr21 37518705 37518706
|
||||||
chr21 39752673 39752673
|
chr21 39752672 39752673
|
||||||
chr21 39753375 39753375
|
chr21 39753374 39753375
|
||||||
chr22 41568480 41568480
|
chr22 41568479 41568480
|
||||||
chr22 30038152 30038152
|
chr22 30038151 30038152
|
||||||
chr22 30079213 30079213
|
chr22 30079212 30079213
|
||||||
chrX 76937963 76937963
|
chrX 76937962 76937963
|
||||||
chrX 39922359 39922359
|
chrX 39922358 39922359
|
||||||
chrX 39932907 39932907
|
chrX 39932906 39932907
|
||||||
chrX 44938563 44938563
|
chrX 44938562 44938563
|
||||||
chrX 100608191 100608191
|
chrX 100608190 100608191
|
||||||
chrX 100611285 100611285
|
chrX 100611284 100611285
|
||||||
chrX 76940534 76940534
|
chrX 76940533 76940534
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,128 +1,128 @@
|
||||||
chr1 2488153 2488153
|
chr1 2488152 2488153
|
||||||
chr1 23885599 23885599
|
chr1 23885598 23885599
|
||||||
chr1 65321388 65321388
|
chr1 65321387 65321388
|
||||||
chr1 120612006 120612006
|
chr1 120612005 120612006
|
||||||
chr1 193094375 193094375
|
chr1 193094374 193094375
|
||||||
chr1 226570840 226570840
|
chr1 226570839 226570840
|
||||||
chr2 47693959 47693959
|
chr2 47693958 47693959
|
||||||
chr2 48030838 48030838
|
chr2 48030837 48030838
|
||||||
chr2 61749716 61749716
|
chr2 61749715 61749716
|
||||||
chr2 141032088 141032088
|
chr2 141032087 141032088
|
||||||
chr2 215645464 215645464
|
chr2 215645463 215645464
|
||||||
chr3 30686414 30686414
|
chr3 30686413 30686414
|
||||||
chr3 47125385 47125385
|
chr3 47125384 47125385
|
||||||
chr3 134644636 134644636
|
chr3 134644635 134644636
|
||||||
chr3 142281612 142281612
|
chr3 142281611 142281612
|
||||||
chr3 192053274 192053274
|
chr3 192053273 192053274
|
||||||
chr4 55599436 55599436
|
chr4 55599435 55599436
|
||||||
chr4 143007419 143007419
|
chr4 143007418 143007419
|
||||||
chr4 153252061 153252061
|
chr4 153252060 153252061
|
||||||
chr4 185310218 185310218
|
chr4 185310217 185310218
|
||||||
chr4 25666099 25666099
|
chr4 25666098 25666099
|
||||||
chr5 35857177 35857177
|
chr5 35857176 35857177
|
||||||
chr5 38955796 38955796
|
chr5 38955795 38955796
|
||||||
chr5 39074296 39074296
|
chr5 39074295 39074296
|
||||||
chr5 67522722 67522722
|
chr5 67522721 67522722
|
||||||
chr5 112162854 112162854
|
chr5 112162853 112162854
|
||||||
chr5 149435759 149435759
|
chr5 149435758 149435759
|
||||||
chr5 161119125 161119125
|
chr5 161119124 161119125
|
||||||
chr5 170819887 170819887
|
chr5 170819886 170819887
|
||||||
chr5 180046344 180046344
|
chr5 180046343 180046344
|
||||||
chr6 30858857 30858857
|
chr6 30858856 30858857
|
||||||
chr6 32797876 32797876
|
chr6 32797875 32797876
|
||||||
chr6 36645696 36645696
|
chr6 36645695 36645696
|
||||||
chr6 117678083 117678083
|
chr6 117678082 117678083
|
||||||
chr6 117714346 117714346
|
chr6 117714345 117714346
|
||||||
chr7 2946461 2946461
|
chr7 2946460 2946461
|
||||||
chr7 2962753 2962753
|
chr7 2962752 2962753
|
||||||
chr7 87138645 87138645
|
chr7 87138644 87138645
|
||||||
chr7 151970931 151970931
|
chr7 151970930 151970931
|
||||||
chr8 90955624 90955624
|
chr8 90955623 90955624
|
||||||
chr8 90970935 90970935
|
chr8 90970934 90970935
|
||||||
chr8 90995019 90995019
|
chr8 90995018 90995019
|
||||||
chr8 68864728 68864728
|
chr8 68864727 68864728
|
||||||
chr8 38322346 38322346
|
chr8 38322345 38322346
|
||||||
chr8 145742879 145742879
|
chr8 145742878 145742879
|
||||||
chr9 5081780 5081780
|
chr9 5081779 5081780
|
||||||
chr9 8389364 8389364
|
chr9 8389363 8389364
|
||||||
chr9 93657761 93657761
|
chr9 93657760 93657761
|
||||||
chr9 98209594 98209594
|
chr9 98209593 98209594
|
||||||
chr9 21991923 21991923
|
chr9 21991922 21991923
|
||||||
chr9 37020622 37020622
|
chr9 37020621 37020622
|
||||||
chr9 21975017 21975017
|
chr9 21975016 21975017
|
||||||
chr10 43600689 43600689
|
chr10 43600688 43600689
|
||||||
chr10 89720907 89720907
|
chr10 89720906 89720907
|
||||||
chr10 104596981 104596981
|
chr10 104596980 104596981
|
||||||
chr10 104849468 104849468
|
chr10 104849467 104849468
|
||||||
chr10 104855656 104855656
|
chr10 104855655 104855656
|
||||||
chr10 123239112 123239112
|
chr10 123239111 123239112
|
||||||
chr11 32410774 32410774
|
chr11 32410773 32410774
|
||||||
chr11 69462910 69462910
|
chr11 69462909 69462910
|
||||||
chr11 94197260 94197260
|
chr11 94197259 94197260
|
||||||
chr11 94225807 94225807
|
chr11 94225806 94225807
|
||||||
chr11 94225920 94225920
|
chr11 94225919 94225920
|
||||||
chr11 125497466 125497466
|
chr11 125497465 125497466
|
||||||
chr12 1040373 1040373
|
chr12 1040372 1040373
|
||||||
chr12 4553383 4553383
|
chr12 4553382 4553383
|
||||||
chr12 6711147 6711147
|
chr12 6711146 6711147
|
||||||
chr12 21331625 21331625
|
chr12 21331624 21331625
|
||||||
chr12 49427652 49427652
|
chr12 49427651 49427652
|
||||||
chr12 49444545 49444545
|
chr12 49444544 49444545
|
||||||
chr12 121416622 121416622
|
chr12 121416621 121416622
|
||||||
chr12 133202215 133202215
|
chr12 133202214 133202215
|
||||||
chr13 21562948 21562948
|
chr13 21562947 21562948
|
||||||
chr13 28609825 28609825
|
chr13 28609824 28609825
|
||||||
chr13 32936646 32936646
|
chr13 32936645 32936646
|
||||||
chr13 113889474 113889474
|
chr13 113889473 113889474
|
||||||
chr13 113907391 113907391
|
chr13 113907390 113907391
|
||||||
chr14 105239894 105239894
|
chr14 105239893 105239894
|
||||||
chr14 105246325 105246325
|
chr14 105246324 105246325
|
||||||
chr14 38064215 38064215
|
chr14 38064214 38064215
|
||||||
chr14 20820537 20820537
|
chr14 20820536 20820537
|
||||||
chr14 35871217 35871217
|
chr14 35871216 35871217
|
||||||
chr15 41865488 41865488
|
chr15 41865487 41865488
|
||||||
chr15 51502986 51502986
|
chr15 51502985 51502986
|
||||||
chr15 51529112 51529112
|
chr15 51529111 51529112
|
||||||
chr15 99456253 99456253
|
chr15 99456252 99456253
|
||||||
chr15 99478225 99478225
|
chr15 99478224 99478225
|
||||||
chr15 51558731 51558731
|
chr15 51558730 51558731
|
||||||
chr16 68857441 68857441
|
chr16 68857440 68857441
|
||||||
chr16 359953 359953
|
chr16 359952 359953
|
||||||
chr16 89805977 89805977
|
chr16 89805976 89805977
|
||||||
chr16 89838078 89838078
|
chr16 89838077 89838078
|
||||||
chr16 89857964 89857964
|
chr16 89857963 89857964
|
||||||
chr17 7983969 7983969
|
chr17 7983968 7983969
|
||||||
chr17 29486152 29486152
|
chr17 29486151 29486152
|
||||||
chr17 29508775 29508775
|
chr17 29508774 29508775
|
||||||
chr17 29546175 29546175
|
chr17 29546174 29546175
|
||||||
chr17 29559932 29559932
|
chr17 29559931 29559932
|
||||||
chr17 29653293 29653293
|
chr17 29653292 29653293
|
||||||
chr17 37879762 37879762
|
chr17 37879761 37879762
|
||||||
chr17 62007498 62007498
|
chr17 62007497 62007498
|
||||||
chr17 78919558 78919558
|
chr17 78919557 78919558
|
||||||
chr19 3110349 3110349
|
chr19 3110348 3110349
|
||||||
chr19 3119239 3119239
|
chr19 3119238 3119239
|
||||||
chr19 4101062 4101062
|
chr19 4101061 4101062
|
||||||
chr19 11136215 11136215
|
chr19 11136214 11136215
|
||||||
chr19 15289613 15289613
|
chr19 15289612 15289613
|
||||||
chr19 15295134 15295134
|
chr19 15295133 15295134
|
||||||
chr19 41725271 41725271
|
chr19 41725270 41725271
|
||||||
chr20 43956527 43956527
|
chr20 43956526 43956527
|
||||||
chr20 43956636 43956636
|
chr20 43956635 43956636
|
||||||
chr20 43958850 43958850
|
chr20 43958849 43958850
|
||||||
chr20 43958872 43958872
|
chr20 43958871 43958872
|
||||||
chr20 54959296 54959296
|
chr20 54959295 54959296
|
||||||
chr20 54961541 54961541
|
chr20 54961540 54961541
|
||||||
chr21 37518706 37518706
|
chr21 37518705 37518706
|
||||||
chr21 39752673 39752673
|
chr21 39752672 39752673
|
||||||
chr21 39753375 39753375
|
chr21 39753374 39753375
|
||||||
chr22 41568480 41568480
|
chr22 41568479 41568480
|
||||||
chr22 30038152 30038152
|
chr22 30038151 30038152
|
||||||
chr22 30079213 30079213
|
chr22 30079212 30079213
|
||||||
chrX 76937963 76937963
|
chrX 76937962 76937963
|
||||||
chrX 39922359 39922359
|
chrX 39922358 39922359
|
||||||
chrX 39932907 39932907
|
chrX 39932906 39932907
|
||||||
chrX 44938563 44938563
|
chrX 44938562 44938563
|
||||||
chrX 100608191 100608191
|
chrX 100608190 100608191
|
||||||
chrX 100611285 100611285
|
chrX 100611284 100611285
|
||||||
chrX 76940534 76940534
|
chrX 76940533 76940534
|
||||||
|
|
|
||||||
|
|
@ -1,128 +1,128 @@
|
||||||
chr1 2488153 2488153
|
chr1 2488152 2488153
|
||||||
chr1 23885599 23885599
|
chr1 23885598 23885599
|
||||||
chr1 65321388 65321388
|
chr1 65321387 65321388
|
||||||
chr1 120612006 120612006
|
chr1 120612005 120612006
|
||||||
chr1 193094375 193094375
|
chr1 193094374 193094375
|
||||||
chr1 226570840 226570840
|
chr1 226570839 226570840
|
||||||
chr2 47693959 47693959
|
chr2 47693958 47693959
|
||||||
chr2 48030838 48030838
|
chr2 48030837 48030838
|
||||||
chr2 61749716 61749716
|
chr2 61749715 61749716
|
||||||
chr2 141032088 141032088
|
chr2 141032087 141032088
|
||||||
chr2 215645464 215645464
|
chr2 215645463 215645464
|
||||||
chr3 30686414 30686414
|
chr3 30686413 30686414
|
||||||
chr3 47125385 47125385
|
chr3 47125384 47125385
|
||||||
chr3 134644636 134644636
|
chr3 134644635 134644636
|
||||||
chr3 142281612 142281612
|
chr3 142281611 142281612
|
||||||
chr3 192053274 192053274
|
chr3 192053273 192053274
|
||||||
chr4 55599436 55599436
|
chr4 55599435 55599436
|
||||||
chr4 143007419 143007419
|
chr4 143007418 143007419
|
||||||
chr4 153252061 153252061
|
chr4 153252060 153252061
|
||||||
chr4 185310218 185310218
|
chr4 185310217 185310218
|
||||||
chr4 25666099 25666099
|
chr4 25666098 25666099
|
||||||
chr5 35857177 35857177
|
chr5 35857176 35857177
|
||||||
chr5 38955796 38955796
|
chr5 38955795 38955796
|
||||||
chr5 39074296 39074296
|
chr5 39074295 39074296
|
||||||
chr5 67522722 67522722
|
chr5 67522721 67522722
|
||||||
chr5 112162854 112162854
|
chr5 112162853 112162854
|
||||||
chr5 149435759 149435759
|
chr5 149435758 149435759
|
||||||
chr5 161119125 161119125
|
chr5 161119124 161119125
|
||||||
chr5 170819887 170819887
|
chr5 170819886 170819887
|
||||||
chr5 180046344 180046344
|
chr5 180046343 180046344
|
||||||
chr6 30858857 30858857
|
chr6 30858856 30858857
|
||||||
chr6 32797876 32797876
|
chr6 32797875 32797876
|
||||||
chr6 36645696 36645696
|
chr6 36645695 36645696
|
||||||
chr6 117678083 117678083
|
chr6 117678082 117678083
|
||||||
chr6 117714346 117714346
|
chr6 117714345 117714346
|
||||||
chr7 2946461 2946461
|
chr7 2946460 2946461
|
||||||
chr7 2962753 2962753
|
chr7 2962752 2962753
|
||||||
chr7 87138645 87138645
|
chr7 87138644 87138645
|
||||||
chr7 151970931 151970931
|
chr7 151970930 151970931
|
||||||
chr8 90955624 90955624
|
chr8 90955623 90955624
|
||||||
chr8 90970935 90970935
|
chr8 90970934 90970935
|
||||||
chr8 90995019 90995019
|
chr8 90995018 90995019
|
||||||
chr8 68864728 68864728
|
chr8 68864727 68864728
|
||||||
chr8 38322346 38322346
|
chr8 38322345 38322346
|
||||||
chr8 145742879 145742879
|
chr8 145742878 145742879
|
||||||
chr9 5081780 5081780
|
chr9 5081779 5081780
|
||||||
chr9 8389364 8389364
|
chr9 8389363 8389364
|
||||||
chr9 93657761 93657761
|
chr9 93657760 93657761
|
||||||
chr9 98209594 98209594
|
chr9 98209593 98209594
|
||||||
chr9 21991923 21991923
|
chr9 21991922 21991923
|
||||||
chr9 37020622 37020622
|
chr9 37020621 37020622
|
||||||
chr9 21975017 21975017
|
chr9 21975016 21975017
|
||||||
chr10 43600689 43600689
|
chr10 43600688 43600689
|
||||||
chr10 89720907 89720907
|
chr10 89720906 89720907
|
||||||
chr10 104596981 104596981
|
chr10 104596980 104596981
|
||||||
chr10 104849468 104849468
|
chr10 104849467 104849468
|
||||||
chr10 104855656 104855656
|
chr10 104855655 104855656
|
||||||
chr10 123239112 123239112
|
chr10 123239111 123239112
|
||||||
chr11 32410774 32410774
|
chr11 32410773 32410774
|
||||||
chr11 69462910 69462910
|
chr11 69462909 69462910
|
||||||
chr11 94197260 94197260
|
chr11 94197259 94197260
|
||||||
chr11 94225807 94225807
|
chr11 94225806 94225807
|
||||||
chr11 94225920 94225920
|
chr11 94225919 94225920
|
||||||
chr11 125497466 125497466
|
chr11 125497465 125497466
|
||||||
chr12 1040373 1040373
|
chr12 1040372 1040373
|
||||||
chr12 4553383 4553383
|
chr12 4553382 4553383
|
||||||
chr12 6711147 6711147
|
chr12 6711146 6711147
|
||||||
chr12 21331625 21331625
|
chr12 21331624 21331625
|
||||||
chr12 49427652 49427652
|
chr12 49427651 49427652
|
||||||
chr12 49444545 49444545
|
chr12 49444544 49444545
|
||||||
chr12 121416622 121416622
|
chr12 121416621 121416622
|
||||||
chr12 133202215 133202215
|
chr12 133202214 133202215
|
||||||
chr13 21562948 21562948
|
chr13 21562947 21562948
|
||||||
chr13 28609825 28609825
|
chr13 28609824 28609825
|
||||||
chr13 32936646 32936646
|
chr13 32936645 32936646
|
||||||
chr13 113889474 113889474
|
chr13 113889473 113889474
|
||||||
chr13 113907391 113907391
|
chr13 113907390 113907391
|
||||||
chr14 105239894 105239894
|
chr14 105239893 105239894
|
||||||
chr14 105246325 105246325
|
chr14 105246324 105246325
|
||||||
chr14 38064215 38064215
|
chr14 38064214 38064215
|
||||||
chr14 20820537 20820537
|
chr14 20820536 20820537
|
||||||
chr14 35871217 35871217
|
chr14 35871216 35871217
|
||||||
chr15 41865488 41865488
|
chr15 41865487 41865488
|
||||||
chr15 51502986 51502986
|
chr15 51502985 51502986
|
||||||
chr15 51529112 51529112
|
chr15 51529111 51529112
|
||||||
chr15 99456253 99456253
|
chr15 99456252 99456253
|
||||||
chr15 99478225 99478225
|
chr15 99478224 99478225
|
||||||
chr15 51558731 51558731
|
chr15 51558730 51558731
|
||||||
chr16 68857441 68857441
|
chr16 68857440 68857441
|
||||||
chr16 359953 359953
|
chr16 359952 359953
|
||||||
chr16 89805977 89805977
|
chr16 89805976 89805977
|
||||||
chr16 89838078 89838078
|
chr16 89838077 89838078
|
||||||
chr16 89857964 89857964
|
chr16 89857963 89857964
|
||||||
chr17 7983969 7983969
|
chr17 7983968 7983969
|
||||||
chr17 29486152 29486152
|
chr17 29486151 29486152
|
||||||
chr17 29508775 29508775
|
chr17 29508774 29508775
|
||||||
chr17 29546175 29546175
|
chr17 29546174 29546175
|
||||||
chr17 29559932 29559932
|
chr17 29559931 29559932
|
||||||
chr17 29653293 29653293
|
chr17 29653292 29653293
|
||||||
chr17 37879762 37879762
|
chr17 37879761 37879762
|
||||||
chr17 62007498 62007498
|
chr17 62007497 62007498
|
||||||
chr17 78919558 78919558
|
chr17 78919557 78919558
|
||||||
chr19 3110349 3110349
|
chr19 3110348 3110349
|
||||||
chr19 3119239 3119239
|
chr19 3119238 3119239
|
||||||
chr19 4101062 4101062
|
chr19 4101061 4101062
|
||||||
chr19 11136215 11136215
|
chr19 11136214 11136215
|
||||||
chr19 15289613 15289613
|
chr19 15289612 15289613
|
||||||
chr19 15295134 15295134
|
chr19 15295133 15295134
|
||||||
chr19 41725271 41725271
|
chr19 41725270 41725271
|
||||||
chr20 43956527 43956527
|
chr20 43956526 43956527
|
||||||
chr20 43956636 43956636
|
chr20 43956635 43956636
|
||||||
chr20 43958850 43958850
|
chr20 43958849 43958850
|
||||||
chr20 43958872 43958872
|
chr20 43958871 43958872
|
||||||
chr20 54959296 54959296
|
chr20 54959295 54959296
|
||||||
chr20 54961541 54961541
|
chr20 54961540 54961541
|
||||||
chr21 37518706 37518706
|
chr21 37518705 37518706
|
||||||
chr21 39752673 39752673
|
chr21 39752672 39752673
|
||||||
chr21 39753375 39753375
|
chr21 39753374 39753375
|
||||||
chr22 41568480 41568480
|
chr22 41568479 41568480
|
||||||
chr22 30038152 30038152
|
chr22 30038151 30038152
|
||||||
chr22 30079213 30079213
|
chr22 30079212 30079213
|
||||||
chrX 76937963 76937963
|
chrX 76937962 76937963
|
||||||
chrX 39922359 39922359
|
chrX 39922358 39922359
|
||||||
chrX 39932907 39932907
|
chrX 39932906 39932907
|
||||||
chrX 44938563 44938563
|
chrX 44938562 44938563
|
||||||
chrX 100608191 100608191
|
chrX 100608190 100608191
|
||||||
chrX 100611285 100611285
|
chrX 100611284 100611285
|
||||||
chrX 76940534 76940534
|
chrX 76940533 76940534
|
||||||
|
|
|
||||||
|
|
@ -190,8 +190,10 @@ workflow pipeline {
|
||||||
output_dir=workdir,
|
output_dir=workdir,
|
||||||
probe=probe,
|
probe=probe,
|
||||||
raw_vcf=call_mutation.raw_vcf,
|
raw_vcf=call_mutation.raw_vcf,
|
||||||
somatic_vcf=call_mutation.somatic_vcf,
|
initial_vcf=call_mutation.initial_vcf,
|
||||||
germline_vcf=call_mutation.germline_vcf
|
normal_vcf=call_mutation.normal_vcf,
|
||||||
|
cnv_cnr=call_cnv.cnv_cnr,
|
||||||
|
cnv_cns=call_cnv.cnv_cns
|
||||||
}
|
}
|
||||||
|
|
||||||
call postprocess.call_postprocess as call_postprocess {
|
call postprocess.call_postprocess as call_postprocess {
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,8 @@ task mutation_calling_umi {
|
||||||
-e 'INFO/AF[0] >= 0.1' \
|
-e 'INFO/AF[0] >= 0.1' \
|
||||||
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
||||||
|
|
||||||
|
cp ${output_dir}/mutation/${name}.1r.vcf ${output_dir}/mutation/${name}.initial.vcf
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
|
|
@ -165,8 +167,13 @@ task mutation_calling_umi_control {
|
||||||
${output_dir}/mutation/${name}.raw.addtagmsi.addmutalt.snp_indel.vcf
|
${output_dir}/mutation/${name}.raw.addtagmsi.addmutalt.snp_indel.vcf
|
||||||
|
|
||||||
cp ${output_dir}/mutation/${name}.raw.addtagmsi.addmutalt.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf
|
cp ${output_dir}/mutation/${name}.raw.addtagmsi.addmutalt.snp_indel.vcf ${output_dir}/mutation/${name}.snp_indel.somatic.vcf
|
||||||
cp ${output_dir}/mutation/${name}.normal.vcf ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
# cp ${output_dir}/mutation/${name}.normal.vcf ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
||||||
|
vcf_filter.py \
|
||||||
|
-i ${output_dir}/mutation/${name}.normal.vcf \
|
||||||
|
-e 'INFO/AF[0] >= 0.1' \
|
||||||
|
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
||||||
|
|
||||||
|
cp ${output_dir}/mutation/${name}.1r.vcf ${output_dir}/mutation/${name}.initial.vcf
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
|
|
@ -221,6 +228,8 @@ task mutation_calling_tissue {
|
||||||
-e 'INFO/AF[0] > 0.1' \
|
-e 'INFO/AF[0] > 0.1' \
|
||||||
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf
|
||||||
|
|
||||||
|
cp ${output_dir}/mutation/${name}.raw.vcf ${output_dir}/mutation/${name}.initial.vcf
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
|
|
@ -274,6 +283,8 @@ task mutation_calling_tissue_control {
|
||||||
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf \
|
-o ${output_dir}/mutation/${name}.snp_indel.germline.vcf \
|
||||||
-e 'INFO/STATUS="Germline" && FORMAT/AF[0] > 0.1 && FORMAT/AF[1] > 0.1'
|
-e 'INFO/STATUS="Germline" && FORMAT/AF[0] > 0.1 && FORMAT/AF[1] > 0.1'
|
||||||
|
|
||||||
|
cp ${output_dir}/mutation/${name}.raw.vcf ${output_dir}/mutation/${name}.initial.vcf
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
|
|
@ -824,5 +835,7 @@ workflow call_mutation {
|
||||||
String somatic_filter = "${output_dir}/mutation/${tumor}.snp_indel.somatic.hg19_multianno.filter.txt"
|
String somatic_filter = "${output_dir}/mutation/${tumor}.snp_indel.somatic.hg19_multianno.filter.txt"
|
||||||
String germline_filter = "${output_dir}/mutation/${tumor}.snp_indel.germline.hg19_multianno.filter.txt"
|
String germline_filter = "${output_dir}/mutation/${tumor}.snp_indel.germline.hg19_multianno.filter.txt"
|
||||||
String hotspot_filter = "${output_dir}/mutation/${tumor}.snp_indel.hotspot.hg19_multianno.filter.txt"
|
String hotspot_filter = "${output_dir}/mutation/${tumor}.snp_indel.hotspot.hg19_multianno.filter.txt"
|
||||||
|
String initial_vcf = "${output_dir}/mutation/${tumor}.initial.vcf"
|
||||||
|
String normal_vcf = "${output_dir}/mutation/${tumor}.normal.vcf"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -151,6 +151,8 @@ workflow call_cnv {
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String cnv_filter = "${output_dir}/cnv/${tumor}.rmdup.cns.filter.txt"
|
String cnv_filter = "${output_dir}/cnv/${tumor}.rmdup.cns.filter.txt"
|
||||||
|
String cnv_cnr = "${output_dir}/cnv/${tumor}.rmdup.cnr"
|
||||||
|
String cnv_cns = "${output_dir}/cnv/${tumor}.rmdup.cns"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,25 @@ task run_pollution {
|
||||||
-c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
|
-c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
|
||||||
|
|
||||||
}
|
}
|
||||||
|
output {
|
||||||
|
String cnvkit_tumor_vcf = "${output_dir}/pollution/${name}_cnvkit_tumor.vcf"
|
||||||
|
String cnvkit_normal_vcf = "${output_dir}/pollution/${name}_cnvkit_normal.vcf"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task run_generate_png {
|
||||||
|
String name
|
||||||
|
String probe
|
||||||
|
String cnvkit_tumor_vcf
|
||||||
|
String cnvkit_normal_vcf
|
||||||
|
String cnv_cnr
|
||||||
|
String cnv_cns
|
||||||
|
String output_dir
|
||||||
|
|
||||||
|
command {
|
||||||
|
cnvkit.py scatter ${cnv_cnr} -s ${cnv_cns} -v ${cnvkit_tumor_vcf} -o ${output_dir}/pollution/${name}_pollution_cnvkit_tumor.png
|
||||||
|
cnvkit.py scatter ${cnv_cnr} -s ${cnv_cns} -v ${cnvkit_normal_vcf} -o ${output_dir}/pollution/${name}_pollution_cnvkit_normal.png
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow call_pollution {
|
workflow call_pollution {
|
||||||
|
|
@ -32,8 +51,10 @@ workflow call_pollution {
|
||||||
String output_dir
|
String output_dir
|
||||||
String probe
|
String probe
|
||||||
String raw_vcf
|
String raw_vcf
|
||||||
String somatic_vcf
|
String initial_vcf
|
||||||
String germline_vcf
|
String normal_vcf
|
||||||
|
String cnv_cnr
|
||||||
|
String cnv_cns
|
||||||
|
|
||||||
if (run) {
|
if (run) {
|
||||||
if (defined(normal)) {
|
if (defined(normal)) {
|
||||||
|
|
@ -42,8 +63,19 @@ workflow call_pollution {
|
||||||
name=tumor,
|
name=tumor,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
probe=probe,
|
probe=probe,
|
||||||
vcf=somatic_vcf,
|
vcf=initial_vcf,
|
||||||
vcf2=germline_vcf
|
vcf2=normal_vcf
|
||||||
|
}
|
||||||
|
call run_generate_png {
|
||||||
|
input:
|
||||||
|
name=tumor,
|
||||||
|
probe=probe,
|
||||||
|
cnvkit_tumor_vcf=run_pollution_paired.cnvkit_tumor_vcf,
|
||||||
|
cnvkit_normal_vcf=run_pollution_paired.cnvkit_normal_vcf,
|
||||||
|
cnv_cnr=cnv_cnr,
|
||||||
|
cnv_cns=cnv_cns,
|
||||||
|
output_dir=output_dir
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -53,12 +85,12 @@ workflow call_pollution {
|
||||||
name=tumor,
|
name=tumor,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
probe=probe,
|
probe=probe,
|
||||||
vcf=raw_vcf
|
vcf=initial_vcf
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String pollution_res = "${output_dir}/pollution/${tumor}_pollution.csv"
|
String pollution_res = "${output_dir}/pollution/${tumor}_pollution.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue