diff --git a/codes/postprocess.py b/codes/postprocess.py index a56427b..909ac9a 100755 --- a/codes/postprocess.py +++ b/codes/postprocess.py @@ -234,16 +234,23 @@ class PostProcess: cols = list(filter_sum_df.columns) tmb_file_check = check_file_exist_and_empty(tmb_file) + filter_sum_df = filter_sum_df.sort_index() if not tmb_file_check: tmb_df = pd.read_csv(tmb_file, sep='\t') key_cols = ['Chr', 'Start', 'End'] filter_sum_df = filter_sum_df.set_index(key_cols) + filter_sum_df = filter_sum_df.sort_index() tmb_df = tmb_df.set_index(key_cols) # 在filter_sum_df中的process列中追加字符串";tmb",对应tmb_df中的行 并且 是非 1,2类突变 - filter_sum_df['process'] = filter_sum_df.index.map( - lambda x: filter_sum_df.at[x, 'process'] + ';tmb' if x in tmb_df.index and filter_sum_df.at[ - x, 'AMP_mut_level'] not in ['I', 'II'] else filter_sum_df.at[x, 'process']) + # filter_sum_df['process'] = filter_sum_df.index.map( + # lambda x: filter_sum_df.at[x, 'process'] + ';tmb' if (x in tmb_df.index) and (filter_sum_df.at[ + # x, 'AMP_mut_level'] not in ['I', 'II']) else filter_sum_df.at[x, 'process']) + for index, row in filter_sum_df.iterrows(): + amp_mut_level = row['AMP_mut_level'] + process = row['process'] + if (index in tmb_df.index) and (amp_mut_level not in ['I', 'II']): + filter_sum_df.loc[index, 'process'] = process + ';tmb' # 找到tmb_df中不在filter_sum_df中的行,并将这些新的行添加到filter_sum_df中 new_rows = tmb_df[~tmb_df.index.isin(filter_sum_df.index)]