更新nextera

main
chaopower 2024-01-16 18:02:24 +08:00
parent 21ba86b6e2
commit dbd9c852a8
1 changed files with 52 additions and 11 deletions

View File

@ -38,6 +38,10 @@ class AutoLayout:
self.rule = self.read_rule()
# 甲基化文库不大于200,WGBS文库不大于200G
self.chip_speciallib_size = dict()
# Nextera 文库大小
self.chip_speciallib_nextera_size = dict()
self.logger = log(os.path.basename(f'{path}.txt'))
self.return_log = list()
self.no_assign_data = list()
@ -50,7 +54,7 @@ class AutoLayout:
merge = pd.read_excel(self.path, None)
ori_data = dict()
for name, sheet in merge.items():
sheet.fillna('.', inplace=True)
sheet.fillna('', inplace=True)
ori_data[name] = sheet.to_dict('records')
return ori_data
@ -65,6 +69,9 @@ class AutoLayout:
self.index_assignments[chipname].extend(library_data['data'])
self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']})
self.chip_customer[chipname].add(library_data['customer'])
self.chip_classification[chipname].add(library_data['classification'])
if newer:
self.chip_size[chipname] = library_data['size']
# if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']:
@ -72,13 +79,17 @@ class AutoLayout:
self.chip_speciallib_size[chipname] = library_data['size']
else:
self.chip_speciallib_size[chipname] = 0
if 'Nextera' in library_data['classification'].lower():
self.chip_speciallib_nextera_size[chipname] = library_data['size']
else:
self.chip_speciallib_nextera_size[chipname] = 0
else:
self.chip_size[chipname] += library_data['size']
if library_data['is_balance_lib'] == '':
self.chip_speciallib_size[chipname] += library_data['size']
self.chip_customer[chipname].add(library_data['customer'])
self.chip_classification[chipname].add(library_data['classification'])
if 'Nextera' in library_data['classification'].lower():
self.chip_speciallib_nextera_size[chipname] += library_data['size']
def count_barcode_radio(self, data):
df = pd.DataFrame(data)
@ -146,10 +157,14 @@ class AutoLayout:
# 将时间字符串转换为 datetime 对象
# mytime = datetime.strptime(row['time'], "%Y-%m-%d")
# mytime = row['time'].strftime("%Y-%m-%d")
mytime = row['time']
if 'nextera' in row['classification'].lower():
return 1
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 2
mytime = row['time']
# 判断日期是之前的还是之后的
if mytime < today_date:
return 3
@ -219,6 +234,25 @@ class AutoLayout:
return True
return False
def add_loc_num(self):
# 有nextera文库 必须满足大于50G
nextera_size = self.chip_speciallib_nextera_size[f'chip{self.loc_chip_num}']
if nextera_size > 50 or nextera_size == 0:
self.loc_chip_num += 1
else:
# 有nextera文库但是不满足50G 去除
nextary_barcode = set()
no_nextary_data = list()
for libdata in self.index_assignments[self.loc_chip_num]:
if libdata['classification'].lower() != 'nextera':
no_nextary_data.append(libdata)
else:
self.no_assign_data.extend(libdata['data'])
nextary_barcode.update(set([nextera_data['barcode'] for nextera_data in libdata['data']]))
self.chip_barcode_recode[self.loc_chip_num] -= nextary_barcode
def assign_samples(self):
ori_library_data = list()
@ -226,8 +260,8 @@ class AutoLayout:
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
ori_library_df = pd.DataFrame(self.ori_data['未测'])
need_col = ['#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer',
'classification', 'priority', 'time', '拆分方式', 'barcode', 'is_balance_lib'
need_col = ['status', '#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer',
'classification', 'priority', 'time', '拆分方式', 'barcode', 'is_balance_lib', '备注'
]
get_col = set(ori_library_df.columns)
unhave_col = set(need_col) - get_col
@ -239,17 +273,22 @@ class AutoLayout:
numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()
# 添加处理status列的逻辑
status_mask = ori_library_df['status'] == '暂不排样'
ori_library_df['note'] = ''
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
ori_library_df.loc[status_mask, 'note'] = '暂不排样'
need_col.append('note')
no_ori_data = ori_library_df[~(numeric_mask & time_mask)]
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask]
self.no_assign_data.extend(no_ori_data.to_dict('records'))
# 使用布尔索引筛选出不是数字和非日期的行
ori_library_df = ori_library_df[(numeric_mask & time_mask)]
ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask]
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
# # 极致客户有重复的把等级调到0防止放到了最后到了未测里
@ -320,9 +359,11 @@ class AutoLayout:
break
j += 1
else:
self.loc_chip_num += 1
# self.loc_chip_num += 1
self.add_loc_num()
if self.chip_size[chipname] > self.data_limit:
self.loc_chip_num += 1
# self.loc_chip_num += 1
self.add_loc_num()
def assign_again(self):
pass