25 lines
882 B
Python
25 lines
882 B
Python
|
|
#!/usr/bin/python
|
||
|
|
"""
|
||
|
|
组织双样本过滤
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
def main(path):
|
||
|
|
muts = pd.read_csv(path, sep='\t')
|
||
|
|
muts['freq'] = muts['Otherinfo11'].str.extract(r'AF=([\d.]+)')
|
||
|
|
# muts['total_reads'] = muts['Otherinfo11'].str.extract(r'DP=(\d+)')
|
||
|
|
# muts['mutant_reads'] = muts['Otherinfo11'].str.extract(r'VD=(\d+)')
|
||
|
|
muts['Tag'] = 'pass'
|
||
|
|
muts.loc[muts['ExonicFunc.refGene'] == 'synonymous SNV', 'Tag'] = 'synonymous'
|
||
|
|
muts.loc[muts['ExonicFunc.refGene'] == 'unknown', 'Tag'] = 'unknown_snp'
|
||
|
|
|
||
|
|
tmdf1 = muts[
|
||
|
|
['1000g2015aug_all', '1000g2015aug_eas', 'esp6500siv2_all', 'ExAC_nontcga_ALL', 'ExAC_nontcga_EAS',
|
||
|
|
'gnomAD_genome_ALL', 'gnomAD_genome_EAS']].replace('.', 0).applymap(lambda x: eval(str(x)))
|
||
|
|
muts['freq_high'] = tmdf1.max(axis=1)
|
||
|
|
muts.loc[muts['freq_high'] > 0.01, 'Tag'] = 'common_snp'
|
||
|
|
|
||
|
|
|