From def064acf4f28c32ae68f6888ef040678e4ca0f5 Mon Sep 17 00:00:00 2001 From: chaopower Date: Wed, 10 Jan 2024 09:46:04 +0800 Subject: [PATCH] bug --- codes/pollution.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/codes/pollution.py b/codes/pollution.py index 67879c5..03cc256 100755 --- a/codes/pollution.py +++ b/codes/pollution.py @@ -109,9 +109,15 @@ def process_judge_vcf(input_vcf, output_vcf): for line in input_file: if not line.startswith("#"): fields = line.strip().split('\t') - info = fields[9].split(":") - percentage = float(info[6]) + vcf_format = fields[8].split(":") + vcf_normal = fields[-1].split(":") + vcf_info = dict(zip(vcf_format, vcf_normal)) + af = vcf_info['AF'] + vd = vcf_info['VD'] + # info = fields[9].split(":") + # percentage = float(info[6]) + percentage = float(af) if 0.1 <= percentage <= 0.9: b = 0.5 elif percentage < 0.1: @@ -120,7 +126,7 @@ def process_judge_vcf(input_vcf, output_vcf): b = 1 # 构建新的行数据 - new_line = '\t'.join([fields[0], fields[1], fields[3], fields[4], info[4], str(b), info[2]]) + new_line = '\t'.join([fields[0], fields[1], fields[3], fields[4], af, str(b), vd]) output_file.write(new_line + '\n') return output_vcf @@ -198,8 +204,14 @@ def select_cnvkit_vcf(vcf, bed, output_file): line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len( line.split()[3]) < 2 and len(line.split()[4]) < 2] for line in filtered_lines: - p_value_str = line.split()[9].split(":")[6] - p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str) + # p_value_str = line.split()[9].split(":")[6] + # p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str) + + vcf_format = line.split('\t')[8].split(":") + vcf_normal = line.split('\t')[-1].split(":") + vcf_info = dict(zip(vcf_format, vcf_normal)) + p_value = float(vcf_info['AF']) + if 0.1 <= p_value <= 0.9: result_data.append(line)