pipeline/script/split_vcf.py

22 lines
775 B
Python
Raw Normal View History

2023-10-10 11:09:16 +08:00
import vcf
# 打开 VCF 文件
vcf_reader = vcf.Reader(open('your_input.vcf', 'r'))
# 打开输出 VCF 文件
vcf_writer = vcf.Writer(open('output.vcf', 'w'), vcf_reader)
# 遍历 VCF 文件中的每个记录
for record in vcf_reader:
if 'STATUS' in record.INFO and (
'StrongSomatic' in record.INFO['STATUS'] or 'LikelySomatic' in record.INFO['STATUS']):
# 获取实验样本和对照样本的 AF 频率
experimental_af = record.genotype('experimental_sample')['AF']
control_af = record.genotype('control_sample')['AF']
# 检查实验样本的 AF 是否大于对照样本
if experimental_af > control_af:
vcf_writer.write_record(record)
# 关闭输出 VCF 文件
vcf_writer.close()