#!/usr/bin/python """ 组织双样本过滤 """ import pandas as pd def main(path): muts = pd.read_csv(path, sep='\t') muts['freq'] = muts['Otherinfo11'].str.extract(r'AF=([\d.]+)') # muts['total_reads'] = muts['Otherinfo11'].str.extract(r'DP=(\d+)') # muts['mutant_reads'] = muts['Otherinfo11'].str.extract(r'VD=(\d+)') muts['Tag'] = 'pass' muts.loc[muts['ExonicFunc.refGene'] == 'synonymous SNV', 'Tag'] = 'synonymous' muts.loc[muts['ExonicFunc.refGene'] == 'unknown', 'Tag'] = 'unknown_snp' tmdf1 = muts[ ['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS', 'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x))) muts['freq_high'] = tmdf1.max(axis=1) muts.loc[muts['freq_high'] > 0.01, 'Tag'] = 'common_snp'