更新bug

master
chaopower 2023-12-12 10:59:08 +08:00
parent 388296bffa
commit 2844ff3592
3 changed files with 28 additions and 4 deletions

File diff suppressed because one or more lines are too long

View File

@ -31,7 +31,7 @@ class HereditaryRun:
result_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
for _, rows in data.iterrows():
matches = re.match(r"([A-Za-z0-9]+):.*:(p\..*)", rows['AAChange_refGene'])
row_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
row_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation', 'ClinicalSign'])
gene, mutation = '', ''
if matches:
gene = matches.group(1)
@ -46,6 +46,8 @@ class HereditaryRun:
row_df['Gene'] = gene
row_df['mutation'] = mutation
row_df['genotype'] = '纯合' if rows['Freq'] > 0.8 else '杂合'
row_df['ClinicalSign'] = str(rows['ClinicalSign'])
result_df = pd.concat([result_df, row_df])
hereditaryfile = os.path.join(self.output_dir, f'{self.name}.hereditary.txt')
result_df.to_csv(hereditaryfile, sep='\t', index=False)

View File

@ -251,8 +251,19 @@ class PostProcess:
self.drug_parse(filter_sum_pos_df['DrugCn'].to_list())
filter_sum_pos_df['Validated'] = 1
filter_sum_pos_df = filter_sum_pos_df.fillna('.')
filter_sum_pos_res = filter_sum_pos_df.to_dict('records')
grouped_df = filter_sum_pos_df.groupby(['POS', 'REF', 'ALT'])
# 对每个分组进行操作
for group_name, group_data in grouped_df:
pos, ref, alt = group_name
if any(group_data['AMP_mut_level'] == 'I'):
filter_condition = (filter_sum_pos_df['POS'] == pos) & \
(filter_sum_pos_df['REF'] == ref) & \
(filter_sum_pos_df['ALT'] == alt)
filter_sum_pos_df.loc[filter_condition, 'AMP_mut_level'] = 'I'
pos_dict = filter_sum_pos_df.set_index(['POS', 'REF', 'ALT'])['AMP_mut_level'].to_dict()
filter_sum_pos_res = filter_sum_pos_df.to_dict('records')
filter_sum = os.path.join(self.path, 'fusion',
f'{self.sample_name}.fusion.hg19_multianno.filter.txt')
@ -288,6 +299,17 @@ class PostProcess:
self.drug_parse(filter_sum_pos_df['DrugCn'].to_list())
filter_sum_pos_df = filter_sum_pos_df.fillna('.')
filter_sum_pos_df['Validated'] = 1
grouped_df = filter_sum_pos_df.groupby(['chromosome', 'start', 'end', 'ref_gene'])
# 对每个分组进行操作
for group_name, group_data in grouped_df:
chromosome, start, end, ref_gene = group_name
if any(group_data['AMP_mut_level'] == 'I'):
filter_condition = (filter_sum_pos_df['chromosome'] == chromosome) & \
(filter_sum_pos_df['start'] == start) & \
(filter_sum_pos_df['end'] == end) & \
(filter_sum_pos_df['ref_gene'] == ref_gene)
filter_sum_pos_df.loc[filter_condition, 'AMP_mut_level'] = 'I'
filter_sum_pos_res = filter_sum_pos_df.to_dict('records')
pos_dict = filter_sum_pos_df.set_index(['chromosome', 'start', 'end', 'ref_gene'])[
'AMP_mut_level'].to_dict()