new_sanwei
parent
70355323d1
commit
62ca5126be
276
tools/t7.py
276
tools/t7.py
|
|
@ -26,6 +26,8 @@ class AutoLayout:
|
||||||
self.librarynum = int(librarynum)
|
self.librarynum = int(librarynum)
|
||||||
self.data_limit = data_limit
|
self.data_limit = data_limit
|
||||||
self.data_lower = data_lower
|
self.data_lower = data_lower
|
||||||
|
self.get_col = list()
|
||||||
|
self.items = list()
|
||||||
|
|
||||||
# 芯片原始数据读取
|
# 芯片原始数据读取
|
||||||
self.ori_data = self.read_excel()
|
self.ori_data = self.read_excel()
|
||||||
|
|
@ -54,7 +56,7 @@ class AutoLayout:
|
||||||
self.rule = self.read_rule()
|
self.rule = self.read_rule()
|
||||||
self.rule_exclusive_customer = self.read_rule_exclusive_customer()
|
self.rule_exclusive_customer = self.read_rule_exclusive_customer()
|
||||||
|
|
||||||
# 子文库名称
|
# subsamplename
|
||||||
self.chip_sublib = defaultdict(set)
|
self.chip_sublib = defaultdict(set)
|
||||||
|
|
||||||
# 不平衡文库
|
# 不平衡文库
|
||||||
|
|
@ -93,11 +95,12 @@ class AutoLayout:
|
||||||
原始数据处理
|
原始数据处理
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
merge = pd.read_excel(self.path, None)
|
# 获取表头备注
|
||||||
ori_data = dict()
|
nrow = pd.read_excel(self.path, nrows=1)
|
||||||
for name, sheet in merge.items():
|
self.items = nrow.to_dict('records')
|
||||||
sheet.fillna('', inplace=True)
|
merge = pd.read_excel(self.path, skiprows=[1])
|
||||||
ori_data[name] = sheet.to_dict('records')
|
merge.fillna('', inplace=True)
|
||||||
|
ori_data = merge.to_dict('records')
|
||||||
return ori_data
|
return ori_data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -125,26 +128,26 @@ class AutoLayout:
|
||||||
if df.empty:
|
if df.empty:
|
||||||
return ratio_sites, is_not_balance_list
|
return ratio_sites, is_not_balance_list
|
||||||
s, e = 0, 16
|
s, e = 0, 16
|
||||||
if maxt == 'i7':
|
if maxt == 'indexi7':
|
||||||
s, e = 8, 16
|
s, e = 8, 16
|
||||||
if maxt == 'i5':
|
if maxt == 'indexi5':
|
||||||
s, e = 0, 8
|
s, e = 0, 8
|
||||||
num = e - s
|
num = e - s
|
||||||
df['barcode'] = df['barcode'].str.slice(s, e)
|
df['indexi5i7'] = df['indexi5i7'].str.slice(s, e)
|
||||||
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
|
barcode_df = pd.DataFrame(df['indexi5i7'].str.split('', expand=True).iloc[:, 1:-1].values,
|
||||||
columns=['T' + str(x) for x in range(num)]).join(df['data_needed'])
|
columns=['T' + str(x) for x in range(num)]).join(df['orderdatavolume'])
|
||||||
total = barcode_df['data_needed'].sum()
|
total = barcode_df['orderdatavolume'].sum()
|
||||||
|
|
||||||
for i in range(num):
|
for i in range(num):
|
||||||
column = 'T' + str(i)
|
column = 'T' + str(i)
|
||||||
col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
|
col_df = barcode_df.groupby(column).agg({'orderdatavolume': 'sum'})
|
||||||
# 去掉N计数
|
# 去掉N计数
|
||||||
if 'N' in col_df.index:
|
if 'N' in col_df.index:
|
||||||
base_n_size = col_df.loc['N', 'data_needed']
|
base_n_size = col_df.loc['N', 'orderdatavolume']
|
||||||
col_df = col_df.drop('N')
|
col_df = col_df.drop('N')
|
||||||
else:
|
else:
|
||||||
base_n_size = 0
|
base_n_size = 0
|
||||||
col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
|
col_df['ratio'] = (col_df['orderdatavolume']) / (total - base_n_size)
|
||||||
ratio = col_df['ratio'].to_dict()
|
ratio = col_df['ratio'].to_dict()
|
||||||
ratio_sites[i] = ratio
|
ratio_sites[i] = ratio
|
||||||
A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
|
A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
|
||||||
|
|
@ -191,25 +194,19 @@ class AutoLayout:
|
||||||
|
|
||||||
today_date = datetime.now()
|
today_date = datetime.now()
|
||||||
|
|
||||||
if 'nextera' in row['classification'].lower():
|
if 'nextera' in row['librarystructure'].lower():
|
||||||
return 1000
|
return 1000
|
||||||
|
|
||||||
if '华大' in row['classification']:
|
if '华大' in row['librarystructure']:
|
||||||
return 1100
|
return 1100
|
||||||
|
|
||||||
if '超加急' in row['priority']:
|
if row['cycletype'] == '极致周期' or '极致' in row['cycletype']:
|
||||||
return 1500
|
|
||||||
|
|
||||||
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
|
|
||||||
return 2000
|
return 2000
|
||||||
|
|
||||||
if '加急' in row['priority']:
|
if row['retestflag'] == '是':
|
||||||
return 3000
|
return 3000
|
||||||
|
|
||||||
if '补测' in row['priority']:
|
mytime = row['receivedtime']
|
||||||
return 4000
|
|
||||||
|
|
||||||
mytime = row['time']
|
|
||||||
# 判断日期是之前的还是之后的
|
# 判断日期是之前的还是之后的
|
||||||
if mytime < today_date:
|
if mytime < today_date:
|
||||||
return 5000
|
return 5000
|
||||||
|
|
@ -224,17 +221,17 @@ class AutoLayout:
|
||||||
same_barcode_df = pd.DataFrame(
|
same_barcode_df = pd.DataFrame(
|
||||||
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
|
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
|
||||||
|
|
||||||
# 按照 'barcode' 列进行分组
|
# 按照 'indexi5i7' 列进行分组
|
||||||
if same_barcode_df.empty:
|
if same_barcode_df.empty:
|
||||||
return
|
return
|
||||||
grouped = same_barcode_df.groupby('barcode')
|
grouped = same_barcode_df.groupby('indexi5i7')
|
||||||
|
|
||||||
# 获取具有重复的 'barcode' 分组
|
# 获取具有重复的 'indexi5i7' 分组
|
||||||
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
|
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
|
||||||
|
|
||||||
# 提取这些分组,计算文库重复次数
|
# 提取这些分组,计算文库重复次数
|
||||||
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index()
|
grouped_names = duplicate_groups.groupby('indexi5i7')['samplename'].apply(list).reset_index()
|
||||||
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library'])))
|
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['samplename'])))
|
||||||
new_lst = [spdata for data in random_list for spdata in data]
|
new_lst = [spdata for data in random_list for spdata in data]
|
||||||
counts = Counter(new_lst)
|
counts = Counter(new_lst)
|
||||||
|
|
||||||
|
|
@ -255,17 +252,17 @@ class AutoLayout:
|
||||||
"""
|
"""
|
||||||
self.index_assignments[chipname].extend(library_data['data'])
|
self.index_assignments[chipname].extend(library_data['data'])
|
||||||
|
|
||||||
self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']})
|
self.chip_barcode_recode[chipname].update({item['indexi5i7'] for item in library_data['data']})
|
||||||
self.chip_barcodei7_recode[chipname].update({item['i7'] for item in library_data['data']})
|
self.chip_barcodei7_recode[chipname].update({item['indexi7'] for item in library_data['data']})
|
||||||
self.chip_barcodei5_recode[chipname].update({item['i5'] for item in library_data['data']})
|
self.chip_barcodei5_recode[chipname].update({item['indexi5'] for item in library_data['data']})
|
||||||
|
|
||||||
# 华大的 文库 i7 不能重复,添加N+i7
|
# 华大的 文库 i7 不能重复,添加N+i7
|
||||||
if '华大' in library_data['classification']:
|
if '华大' in library_data['classification']:
|
||||||
self.chip_barcode_recode[chipname].update({'N' * 8 + item['i7'] for item in library_data['data']})
|
self.chip_barcode_recode[chipname].update({'N' * 8 + item['indexi7'] for item in library_data['data']})
|
||||||
# self.chip_barcode_recode[chipname].update({item['i5'] + 'N' * 8 for item in library_data['data']})
|
# self.chip_barcode_recode[chipname].update({item['indexi5'] + 'N' * 8 for item in library_data['data']})
|
||||||
|
|
||||||
# 子文库
|
# 子文库
|
||||||
self.chip_sublib[chipname].update({item['sublibrary'] for item in library_data['data']})
|
self.chip_sublib[chipname].update({item['subsamplename'] for item in library_data['data']})
|
||||||
|
|
||||||
self.chip_customer[chipname].add(library_data['customer'])
|
self.chip_customer[chipname].add(library_data['customer'])
|
||||||
self.chip_classification[chipname].add(library_data['classification'])
|
self.chip_classification[chipname].add(library_data['classification'])
|
||||||
|
|
@ -273,13 +270,11 @@ class AutoLayout:
|
||||||
if newer:
|
if newer:
|
||||||
self.chip_size[chipname] = library_data['size']
|
self.chip_size[chipname] = library_data['size']
|
||||||
self.chip_size_N[chipname] = 0
|
self.chip_size_N[chipname] = 0
|
||||||
if 'N' in library_data['data'][0]['barcode']:
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
||||||
# print(library_data['data'][0]['barcode'])
|
|
||||||
self.chip_size_N[chipname] = library_data['size']
|
self.chip_size_N[chipname] = library_data['size']
|
||||||
# if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']:
|
|
||||||
if library_data['is_balance_lib'] == '否':
|
if library_data['is_balance_lib'] == '否':
|
||||||
self.chip_speciallib_size[chipname] = library_data['size']
|
self.chip_speciallib_size[chipname] = library_data['size']
|
||||||
elif library_data['is_balance_lib'] == '甲基化':
|
elif '甲基化' in library_data['classification']:
|
||||||
self.chip_methylib_size[chipname] = library_data['size']
|
self.chip_methylib_size[chipname] = library_data['size']
|
||||||
else:
|
else:
|
||||||
self.chip_speciallib_size[chipname] = 0
|
self.chip_speciallib_size[chipname] = 0
|
||||||
|
|
@ -297,15 +292,13 @@ class AutoLayout:
|
||||||
self.chip_size[chipname] += library_data['size']
|
self.chip_size[chipname] += library_data['size']
|
||||||
if library_data['is_balance_lib'] == '否':
|
if library_data['is_balance_lib'] == '否':
|
||||||
self.chip_speciallib_size[chipname] += library_data['size']
|
self.chip_speciallib_size[chipname] += library_data['size']
|
||||||
if library_data['is_balance_lib'] == '甲基化':
|
if '甲基化' in library_data['classification']:
|
||||||
self.chip_methylib_size[chipname] += library_data['size']
|
self.chip_methylib_size[chipname] += library_data['size']
|
||||||
if 'nextera' in library_data['classification'].lower():
|
if 'nextera' in library_data['classification'].lower():
|
||||||
self.chip_speciallib_nextera_size[chipname] += library_data['size']
|
self.chip_speciallib_nextera_size[chipname] += library_data['size']
|
||||||
if '华大' in library_data['classification']:
|
if '华大' in library_data['classification']:
|
||||||
self.chip_speciallib_huada_size[chipname] += library_data['size']
|
self.chip_speciallib_huada_size[chipname] += library_data['size']
|
||||||
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
||||||
if 'N' in library_data['data'][0]['barcode']:
|
|
||||||
# print(library_data['data'][0]['barcode'])
|
|
||||||
self.chip_size_N[chipname] += library_data['size']
|
self.chip_size_N[chipname] += library_data['size']
|
||||||
|
|
||||||
def use_rule_exclusive_classfication(self, chipname, classfication):
|
def use_rule_exclusive_classfication(self, chipname, classfication):
|
||||||
|
|
@ -331,7 +324,7 @@ class AutoLayout:
|
||||||
"""
|
"""
|
||||||
size = library_data['size']
|
size = library_data['size']
|
||||||
size_N = 0
|
size_N = 0
|
||||||
if 'N' in library_data['data'][0]['barcode']:
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
||||||
size_N = library_data['size']
|
size_N = library_data['size']
|
||||||
classification = library_data['classification']
|
classification = library_data['classification']
|
||||||
customer = library_data['customer']
|
customer = library_data['customer']
|
||||||
|
|
@ -346,11 +339,11 @@ class AutoLayout:
|
||||||
|
|
||||||
# barcode有重复
|
# barcode有重复
|
||||||
notrepeatbarcode = True
|
notrepeatbarcode = True
|
||||||
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}) or \
|
if self.chip_barcode_recode[chipname].intersection({item['indexi5i7'] for item in library_data['data']}) or \
|
||||||
self.chip_barcode_recode[chipname].intersection(
|
self.chip_barcode_recode[chipname].intersection(
|
||||||
{'N' * 8 + item['i7'] for item in library_data['data']}) or \
|
{'N' * 8 + item['indexi7'] for item in library_data['data']}) or \
|
||||||
self.chip_barcode_recode[chipname].intersection(
|
self.chip_barcode_recode[chipname].intersection(
|
||||||
{item['i5'] + 'N' * 8 for item in library_data['data']}):
|
{item['indexi5'] + 'N' * 8 for item in library_data['data']}):
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
# print(chipname, library, 'barcode有重复')
|
# print(chipname, library, 'barcode有重复')
|
||||||
|
|
||||||
|
|
@ -411,15 +404,15 @@ class AutoLayout:
|
||||||
base_balance = True
|
base_balance = True
|
||||||
notrepeatbarcode = True
|
notrepeatbarcode = True
|
||||||
if self.chip_barcodei7_recode[chipname].intersection(
|
if self.chip_barcodei7_recode[chipname].intersection(
|
||||||
{item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
|
{item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
if self.chip_barcodei5_recode[chipname].intersection(
|
if self.chip_barcodei5_recode[chipname].intersection(
|
||||||
{item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
|
{item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
# 是个N的取消
|
# 是个N的取消
|
||||||
if ('N' * 8 in {item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
|
if ('N' * 8 in {item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
if ('N' * 8 in {item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
|
if ('N' * 8 in {item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
if self.chip_size[chipname] > 900:
|
if self.chip_size[chipname] > 900:
|
||||||
current_data = copy.deepcopy(self.index_assignments[chipname])
|
current_data = copy.deepcopy(self.index_assignments[chipname])
|
||||||
|
|
@ -431,7 +424,7 @@ class AutoLayout:
|
||||||
|
|
||||||
# 子文库名不能重复
|
# 子文库名不能重复
|
||||||
notrepeatsublib = True
|
notrepeatsublib = True
|
||||||
if self.chip_sublib[chipname].intersection({item['sublibrary'] for item in library_data['data']}):
|
if self.chip_sublib[chipname].intersection({item['subsamplename'] for item in library_data['data']}):
|
||||||
notrepeatsublib = False
|
notrepeatsublib = False
|
||||||
|
|
||||||
if sizelimit and notrepeatbarcode and \
|
if sizelimit and notrepeatbarcode and \
|
||||||
|
|
@ -464,7 +457,7 @@ class AutoLayout:
|
||||||
no_nextary_data.append(libdata)
|
no_nextary_data.append(libdata)
|
||||||
else:
|
else:
|
||||||
self.no_assign_data.append(libdata)
|
self.no_assign_data.append(libdata)
|
||||||
nextary_barcode.update(libdata['barcode'])
|
nextary_barcode.update(libdata['indexi5i7'])
|
||||||
self.index_assignments[chipname] = no_nextary_data
|
self.index_assignments[chipname] = no_nextary_data
|
||||||
self.chip_barcode_recode[chipname] -= nextary_barcode
|
self.chip_barcode_recode[chipname] -= nextary_barcode
|
||||||
self.chip_speciallib_nextera_size[chipname] = 0
|
self.chip_speciallib_nextera_size[chipname] = 0
|
||||||
|
|
@ -479,7 +472,7 @@ class AutoLayout:
|
||||||
no_huada_data.append(libdata)
|
no_huada_data.append(libdata)
|
||||||
else:
|
else:
|
||||||
self.no_assign_data.append(libdata)
|
self.no_assign_data.append(libdata)
|
||||||
huada_barcode.update(libdata['barcode'])
|
huada_barcode.update(libdata['indexi5i7'])
|
||||||
self.index_assignments[chipname] = no_huada_data
|
self.index_assignments[chipname] = no_huada_data
|
||||||
self.chip_barcode_recode[chipname] -= huada_barcode
|
self.chip_barcode_recode[chipname] -= huada_barcode
|
||||||
self.chip_speciallib_huada_size[chipname] = 0
|
self.chip_speciallib_huada_size[chipname] = 0
|
||||||
|
|
@ -490,61 +483,52 @@ class AutoLayout:
|
||||||
|
|
||||||
def assign_samples(self):
|
def assign_samples(self):
|
||||||
|
|
||||||
if '未测' not in self.ori_data.keys():
|
# if '未测' not in self.ori_data.keys():
|
||||||
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
|
# raise UserWarning('提供excel没有 未测 sheet ,请核查!')
|
||||||
ori_library_df = pd.DataFrame(self.ori_data['未测'])
|
ori_library_df = pd.DataFrame(self.ori_data)
|
||||||
|
|
||||||
# 检查提供excel 是否有必须表头
|
# # 检查提供excel 是否有必须表头
|
||||||
get_col = set(ori_library_df.columns)
|
# get_col = set(ori_library_df.columns)
|
||||||
unhave_col = set(self.need_cols) - get_col
|
# unhave_col = set(self.need_cols) - get_col
|
||||||
|
#
|
||||||
if unhave_col:
|
# if unhave_col:
|
||||||
unhave_from = '; '.join(unhave_col)
|
# unhave_from = '; '.join(unhave_col)
|
||||||
raise UserWarning(f'未测表里没有 {unhave_from} 表头,请核查!')
|
# raise UserWarning(f'未测表里没有 {unhave_from} 表头,请核查!')
|
||||||
|
|
||||||
# 数据标准格式
|
# 数据标准格式
|
||||||
numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
|
numeric_mask = pd.to_numeric(ori_library_df['orderdatavolume'], errors='coerce').notna()
|
||||||
time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()
|
time_mask = pd.to_datetime(ori_library_df['receivedtime'], errors='coerce').notna()
|
||||||
|
|
||||||
# 添加处理status列的逻辑
|
|
||||||
status_mask = ori_library_df['status'] == '暂不排样'
|
|
||||||
|
|
||||||
# 非正常barcode
|
# 非正常barcode
|
||||||
barcode_mask = ori_library_df['barcode'].str.len() != 16
|
barcode_mask = ori_library_df['indexi5i7'].str.len() != 16
|
||||||
|
|
||||||
ori_library_df['note'] = ''
|
ori_library_df['note'] = ''
|
||||||
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
|
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
|
||||||
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
|
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
|
||||||
ori_library_df.loc[status_mask, 'note'] = '暂不排样'
|
|
||||||
ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode'
|
ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode'
|
||||||
|
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | barcode_mask]
|
||||||
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask | barcode_mask]
|
|
||||||
|
|
||||||
# 某个客户的检测的数据超过1个T就单独处理
|
|
||||||
# summary = ori_library_df.groupby('customer').agg({'data_needed': 'sum'})
|
|
||||||
# print(summary)
|
|
||||||
|
|
||||||
self.no_assign_data.extend(no_ori_data.to_dict('records'))
|
self.no_assign_data.extend(no_ori_data.to_dict('records'))
|
||||||
|
|
||||||
# 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行, 以及非16位置barcode
|
# 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行, 以及非16位置barcode
|
||||||
ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask & ~barcode_mask]
|
ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~barcode_mask]
|
||||||
|
|
||||||
# 时间格式化
|
# 时间格式化
|
||||||
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
|
ori_library_df['receivedtime'] = pd.to_datetime(ori_library_df['receivedtime'], errors='coerce')
|
||||||
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
||||||
|
|
||||||
# 极致客户有重复的,把等级调到1900,防止放到了最后,到了未测里
|
# 极致客户有重复的,把等级调到1900,防止放到了最后,到了未测里
|
||||||
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
|
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
|
||||||
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list())
|
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='indexi5i7', keep=False)]['samplename'].to_list())
|
||||||
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900
|
ori_library_df.loc[ori_library_df['samplename'].isin(must_lib), 'level'] = 1900
|
||||||
|
|
||||||
for library, library_df in ori_library_df.groupby('#library'):
|
for library, library_df in ori_library_df.groupby('samplename'):
|
||||||
|
|
||||||
size = library_df['data_needed'].sum()
|
size = library_df['orderdatavolume'].sum()
|
||||||
is_balance_lib = library_df['is_balance_lib'].values[0]
|
is_balance_lib = library_df['librarybalancedflag'].values[0]
|
||||||
|
|
||||||
# 文库内部有重复
|
# 文库内部有重复
|
||||||
if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)):
|
if len(library_df['indexi5i7'].values) > len(set(library_df['indexi5i7'].values)):
|
||||||
library_df['note'] = '文库内部有重复'
|
library_df['note'] = '文库内部有重复'
|
||||||
self.no_assign_data.extend(library_df.to_dict('records'))
|
self.no_assign_data.extend(library_df.to_dict('records'))
|
||||||
continue
|
continue
|
||||||
|
|
@ -552,53 +536,53 @@ class AutoLayout:
|
||||||
# 不平衡文库 大于250G 的数据 先进行拆分
|
# 不平衡文库 大于250G 的数据 先进行拆分
|
||||||
if is_balance_lib == '否' and size > 250:
|
if is_balance_lib == '否' and size > 250:
|
||||||
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
|
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
|
||||||
data_needed = library_df['data_needed'].copy()
|
data_needed = library_df['orderdatavolume'].copy()
|
||||||
for num in range(int(size), 0, -200):
|
for num in range(int(size), 0, -200):
|
||||||
addnum = 200
|
addnum = 200
|
||||||
if num <= 200:
|
if num <= 200:
|
||||||
addnum = num
|
addnum = num
|
||||||
library_df['data_needed'] = (addnum / size) * data_needed
|
library_df['orderdatavolume'] = (addnum / size) * data_needed
|
||||||
|
|
||||||
self.ori_lib_data.append(dict(
|
self.ori_lib_data.append(dict(
|
||||||
library=library,
|
library=library,
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
||||||
size=library_df['data_needed'].sum(),
|
size=library_df['orderdatavolume'].sum(),
|
||||||
split_method=library_df['拆分方式'].values[0],
|
split_method=library_df['cycletype'].values[0],
|
||||||
time=library_df['time'].values[0],
|
time=library_df['receivedtime'].values[0],
|
||||||
level=1950,
|
level=1950,
|
||||||
customer=library_df['customer'].values[0],
|
customer=library_df['companynamea'].values[0],
|
||||||
classification=library_df['classification'].values[0],
|
classification=library_df['librarystructure'].values[0],
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
data=library_df.to_dict('records')
|
||||||
))
|
))
|
||||||
self.split_lib.add(library)
|
self.split_lib.add(library)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 拆分处理 分为了2个大文库
|
# 拆分处理 分为了2个大文库
|
||||||
if size > self.data_limit / 2:
|
if size > self.data_limit / 2:
|
||||||
library_df['data_needed'] = library_df['data_needed'] / 2
|
library_df['orderdatavolume'] = library_df['orderdatavolume'] / 2
|
||||||
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
||||||
self.ori_lib_data.append(dict(
|
self.ori_lib_data.append(dict(
|
||||||
library=library,
|
library=library,
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
||||||
size=library_df['data_needed'].sum(),
|
size=library_df['orderdatavolume'].sum(),
|
||||||
split_method=library_df['拆分方式'].values[0],
|
split_method=library_df['cycletype'].values[0],
|
||||||
time=library_df['time'].values[0],
|
time=library_df['receivedtime'].values[0],
|
||||||
level=library_df['level'].values[0],
|
level=library_df['level'].values[0],
|
||||||
customer=library_df['customer'].values[0],
|
customer=library_df['companynamea'].values[0],
|
||||||
classification=library_df['classification'].values[0],
|
classification=library_df['librarystructure'].values[0],
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
data=library_df.to_dict('records')
|
||||||
))
|
))
|
||||||
|
|
||||||
self.ori_lib_data.append(dict(
|
self.ori_lib_data.append(dict(
|
||||||
library=library,
|
library=library,
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
||||||
size=library_df['data_needed'].sum(),
|
size=library_df['orderdatavolume'].sum(),
|
||||||
split_method=library_df['拆分方式'].values[0],
|
split_method=library_df['cycletype'].values[0],
|
||||||
time=library_df['time'].values[0],
|
time=library_df['receivedtime'].values[0],
|
||||||
level=library_df['level'].values[0],
|
level=library_df['level'].values[0],
|
||||||
customer=library_df['customer'].values[0],
|
customer=library_df['companynamea'].values[0],
|
||||||
classification=library_df['classification'].values[0],
|
classification=library_df['librarystructure'].values[0],
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
data=library_df.to_dict('records')
|
||||||
))
|
))
|
||||||
|
|
||||||
self.combinations_same_barcode()
|
self.combinations_same_barcode()
|
||||||
|
|
@ -643,30 +627,31 @@ class AutoLayout:
|
||||||
if not chip_assignments:
|
if not chip_assignments:
|
||||||
continue
|
continue
|
||||||
df = pd.DataFrame(chip_assignments)
|
df = pd.DataFrame(chip_assignments)
|
||||||
if df['data_needed'].sum() < self.data_lower:
|
if df['orderdatavolume'].sum() < self.data_lower:
|
||||||
left_data.extend(chip_assignments)
|
left_data.extend(chip_assignments)
|
||||||
no_need_chipname.append(chip_idx)
|
no_need_chipname.append(chip_idx)
|
||||||
for chip_idx in no_need_chipname:
|
for chip_idx in no_need_chipname:
|
||||||
del self.index_assignments[chip_idx]
|
del self.index_assignments[chip_idx]
|
||||||
|
if not left_data:
|
||||||
|
return
|
||||||
ori_library_df = pd.DataFrame(left_data)
|
ori_library_df = pd.DataFrame(left_data)
|
||||||
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
||||||
ori_lib_data = list()
|
ori_lib_data = list()
|
||||||
for library, library_df in ori_library_df.groupby('#library'):
|
for library, library_df in ori_library_df.groupby('samplename'):
|
||||||
level = library_df['level'].values[0]
|
level = library_df['level'].values[0]
|
||||||
if library in self.split_lib:
|
if library in self.split_lib:
|
||||||
level = 1950
|
level = 1950
|
||||||
|
|
||||||
ori_lib_data.append(dict(
|
ori_lib_data.append(dict(
|
||||||
library=library,
|
library=library,
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
||||||
size=library_df['data_needed'].sum(),
|
size=library_df['orderdatavolume'].sum(),
|
||||||
split_method=library_df['拆分方式'].values[0],
|
split_method=library_df['cycletype'].values[0],
|
||||||
time=library_df['time'].values[0],
|
time=library_df['receivedtime'].values[0],
|
||||||
level=level,
|
level=level,
|
||||||
customer=library_df['customer'].values[0],
|
customer=library_df['companynamea'].values[0],
|
||||||
classification=library_df['classification'].values[0],
|
classification=library_df['librarystructure'].values[0],
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
data=library_df.to_dict('records')
|
||||||
))
|
))
|
||||||
|
|
||||||
ori_lib_data = sorted(ori_lib_data, key=lambda x: (x['level'], x['time'], -x['size']))
|
ori_lib_data = sorted(ori_lib_data, key=lambda x: (x['level'], x['time'], -x['size']))
|
||||||
|
|
@ -700,14 +685,14 @@ class AutoLayout:
|
||||||
self.add_loc_num(chipname)
|
self.add_loc_num(chipname)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
# print('# 测试代码')
|
print('# 测试代码')
|
||||||
# self.assign_samples()
|
self.assign_samples()
|
||||||
# self.assign_again()
|
self.assign_again_size()
|
||||||
try:
|
try:
|
||||||
self.assign_samples()
|
self.assign_samples()
|
||||||
self.assign_again_size()
|
self.assign_again_size()
|
||||||
# self.assign_again_size(max_barcode='i7')
|
# self.assign_again_size(max_barcode='indexi7')
|
||||||
# self.assign_again_size(max_barcode='i5')
|
# self.assign_again_size(max_barcode='indexi5')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.return_log.append(f'T7排样出错, 请联系!{e}')
|
self.return_log.append(f'T7排样出错, 请联系!{e}')
|
||||||
self.index_assignments = {}
|
self.index_assignments = {}
|
||||||
|
|
@ -721,18 +706,15 @@ class AutoLayout:
|
||||||
if not chip_assignments:
|
if not chip_assignments:
|
||||||
continue
|
continue
|
||||||
df = pd.DataFrame(chip_assignments)
|
df = pd.DataFrame(chip_assignments)
|
||||||
df['time'] = df['time'].dt.strftime('%Y-%m-%d')
|
df['receivedtime'] = df['receivedtime'].dt.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
if [method for method in df['拆分方式'].values if '极致' in method]:
|
if [method for method in df['cycletype'].values if '极致' in method]:
|
||||||
addname = 'X'
|
addname = 'X'
|
||||||
else:
|
else:
|
||||||
addname = ''
|
addname = ''
|
||||||
|
|
||||||
other_name = ''
|
other_name = ''
|
||||||
# if 'chipB' in chip_idx and df['barcode'].duplicated().any():
|
|
||||||
# other_name = '_i7'
|
|
||||||
|
|
||||||
if df['data_needed'].sum() < (self.data_lower - 50) and not addname:
|
if df['orderdatavolume'].sum() < (self.data_lower - 50) and not addname:
|
||||||
df['note'] = f'排样数据量不足{self.data_lower - 50}G'
|
df['note'] = f'排样数据量不足{self.data_lower - 50}G'
|
||||||
self.no_assign_data.extend(df.to_dict('records'))
|
self.no_assign_data.extend(df.to_dict('records'))
|
||||||
continue
|
continue
|
||||||
|
|
@ -740,32 +722,26 @@ class AutoLayout:
|
||||||
df['note'] = '排样管数超标'
|
df['note'] = '排样管数超标'
|
||||||
self.no_assign_data.extend(df.to_dict('records'))
|
self.no_assign_data.extend(df.to_dict('records'))
|
||||||
continue
|
continue
|
||||||
librarynum += len(set(df['#library'].values))
|
librarynum += len(set(df['samplename'].values))
|
||||||
|
|
||||||
self.dec_barcode_radio(chip_idx)
|
self.dec_barcode_radio(chip_idx)
|
||||||
chipname = addname + chip_idx + other_name
|
chipname = addname + chip_idx + other_name
|
||||||
|
|
||||||
sum_list = list()
|
df = pd.concat([pd.DataFrame(self.items), df]).reset_index(drop=True)
|
||||||
for library, library_df in df.groupby('#library'):
|
|
||||||
sum_list.append(dict(
|
df.to_excel(writer, sheet_name=chipname, index=False)
|
||||||
二次拆分=library,
|
|
||||||
客户=library_df['customer'].values[0],
|
|
||||||
类型=library_df['classification'].values[0],
|
|
||||||
打折前=library_df['data_needed'].sum()
|
|
||||||
))
|
|
||||||
df_sum = pd.DataFrame(sum_list)
|
|
||||||
res_df = pd.concat([df, df_sum], axis=1)
|
|
||||||
res_df.to_excel(writer, sheet_name=chipname, index=False)
|
|
||||||
chip_loc += 1
|
chip_loc += 1
|
||||||
|
|
||||||
no_assign_df = pd.DataFrame(self.no_assign_data)
|
no_assign_df = pd.DataFrame(self.no_assign_data)
|
||||||
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
|
||||||
no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['#library'] if lib in self.split_lib]))
|
|
||||||
if no_assign_df_not_balance:
|
|
||||||
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
|
|
||||||
if not no_assign_df.empty:
|
if not no_assign_df.empty:
|
||||||
no_assign_df = no_assign_df[self.need_cols]
|
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
||||||
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['samplename'] if lib in self.split_lib]))
|
||||||
|
if no_assign_df_not_balance:
|
||||||
|
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
|
||||||
|
# if not no_assign_df.empty:
|
||||||
|
# no_assign_df = no_assign_df[self.need_cols]
|
||||||
|
no_assign_df = pd.concat([pd.DataFrame(self.items), no_assign_df]).reset_index(drop=True)
|
||||||
|
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
||||||
if self.return_log:
|
if self.return_log:
|
||||||
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
|
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue