遗传分解hgvs,没有p. 采用c.
parent
778d55ed5b
commit
fb36b97329
|
|
@ -6,6 +6,32 @@ import re
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def split_hgvs(hgvs):
|
||||||
|
hgvs_split = hgvs.split(':')
|
||||||
|
if len(hgvs_split) == 4:
|
||||||
|
gene, position, transcript_version, coordinate_type = hgvs_split
|
||||||
|
|
||||||
|
# pattern = r'c\.\d+([\+\-])[12]\D+>\D+'
|
||||||
|
# match = re.search(pattern, coordinate_type)
|
||||||
|
# # if match:
|
||||||
|
# # transcript_version =
|
||||||
|
# # if match.group(1) == '-':X
|
||||||
|
|
||||||
|
variant_version = None
|
||||||
|
elif len(hgvs_split) == 5:
|
||||||
|
gene, position, transcript_version, coordinate_type, variant_version = hgvs_split
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Invalid HGVS format{hgvs}')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'gene': gene,
|
||||||
|
'transcript': position,
|
||||||
|
'exon': transcript_version,
|
||||||
|
'nacid': coordinate_type,
|
||||||
|
'aacid': variant_version
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class HereditaryRun:
|
class HereditaryRun:
|
||||||
|
|
||||||
def __init__(self, database, project, output_dir, name, file):
|
def __init__(self, database, project, output_dir, name, file):
|
||||||
|
|
@ -30,21 +56,19 @@ class HereditaryRun:
|
||||||
|
|
||||||
result_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
|
result_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
|
||||||
for _, rows in data.iterrows():
|
for _, rows in data.iterrows():
|
||||||
matches = re.match(r"([A-Za-z0-9]+):.*:(p\..*)", rows['AAChange_refGene'])
|
# matches = re.match(r"([A-Za-z0-9]+):.*:(p\..*)", rows['AAChange_refGene'])
|
||||||
row_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation', 'ClinicalSign'])
|
matches = split_hgvs(rows['AAChange_refGene'])
|
||||||
gene, mutation = '', ''
|
gene = matches['gene']
|
||||||
if matches:
|
aacid = matches['aacid'] if matches['aacid'] else matches['nacid']
|
||||||
gene = matches.group(1)
|
row_df = pd.DataFrame(
|
||||||
mutation = matches.group(2)
|
columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation', 'ClinicalSign'])
|
||||||
else:
|
|
||||||
raise UserWarning('HGVS 解析错误!')
|
|
||||||
|
|
||||||
selected_rows = expanded_database[expanded_database['Gene'].str.split(';').apply(lambda x: gene in x)]
|
selected_rows = expanded_database[expanded_database['Gene'].str.split(';').apply(lambda x: gene in x)]
|
||||||
|
|
||||||
row_df['Syndrome_Cn'] = selected_rows['Syndrome_Cn']
|
row_df['Syndrome_Cn'] = selected_rows['Syndrome_Cn']
|
||||||
row_df['inheritance'] = selected_rows['inheritance']
|
row_df['inheritance'] = selected_rows['inheritance']
|
||||||
row_df['Gene'] = gene
|
row_df['Gene'] = gene
|
||||||
row_df['mutation'] = mutation
|
row_df['mutation'] = aacid
|
||||||
row_df['genotype'] = '纯合' if rows['Freq'] > 0.9 else '杂合'
|
row_df['genotype'] = '纯合' if rows['Freq'] > 0.9 else '杂合'
|
||||||
row_df['ClinicalSign'] = str(rows['ClinicalSign'])
|
row_df['ClinicalSign'] = str(rows['ClinicalSign'])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue