pipeline/script/filter_snpindel.py

25 lines
882 B
Python
Raw Normal View History

2023-10-10 11:09:16 +08:00
#!/usr/bin/python
"""
组织双样本过滤
"""
import pandas as pd
def main(path):
muts = pd.read_csv(path, sep='\t')
muts['freq'] = muts['Otherinfo11'].str.extract(r'AF=([\d.]+)')
# muts['total_reads'] = muts['Otherinfo11'].str.extract(r'DP=(\d+)')
# muts['mutant_reads'] = muts['Otherinfo11'].str.extract(r'VD=(\d+)')
muts['Tag'] = 'pass'
muts.loc[muts['ExonicFunc.refGene'] == 'synonymous SNV', 'Tag'] = 'synonymous'
muts.loc[muts['ExonicFunc.refGene'] == 'unknown', 'Tag'] = 'unknown_snp'
tmdf1 = muts[
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
muts['freq_high'] = tmdf1.max(axis=1)
muts.loc[muts['freq_high'] > 0.01, 'Tag'] = 'common_snp'