main
chaopower 2024-02-29 15:43:48 +08:00
parent 6ba6b137dd
commit d1a3778f1c
1 changed files with 89 additions and 43 deletions

View File

@ -1,7 +1,7 @@
import copy import copy
import os import os
import time import time
from collections import defaultdict from collections import defaultdict, Counter
from datetime import datetime from datetime import datetime
import pandas as pd import pandas as pd
@ -132,27 +132,27 @@ class AutoLayout:
today_date = datetime.now() today_date = datetime.now()
if 'nextera' in row['classification'].lower(): if 'nextera' in row['classification'].lower():
return 10 return 1000
if '华大' in row['classification']: if '华大' in row['classification']:
return 11 return 1100
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']: if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 20 return 2000
if '加急' in row['priority']:
return 3000
if '补测' in row['priority']:
return 4000
mytime = row['time'] mytime = row['time']
# 判断日期是之前的还是之后的 # 判断日期是之前的还是之后的
if mytime < today_date: if mytime < today_date:
return 30 return 5000
if '加急' in row['priority']:
return 40
if '补测' in row['priority']:
return 50
else: else:
return 1000 return 100000
@staticmethod @staticmethod
def read_rule(): def read_rule():
@ -190,6 +190,49 @@ class AutoLayout:
ori_data[name] = sheet.to_dict('records') ori_data[name] = sheet.to_dict('records')
return ori_data return ori_data
def combinations_same_barcode(self):
"""
barcode 有重复的极致样本 进行排列组合汇集成新的可能性
"""
# 筛选有重复的行
# same_barcode_data = [data for data in self.ori_lib_data if data['level'] == 1900]
# same_barcode_sorted = sorted(same_barcode_data, key=lambda x: (-x['size']))
#
# same_barcode_dict = dict()
# for index, data in enumerate(same_barcode_sorted):
# same_barcode_dict[data['library']] = data['level'] + index + 1
# correct_data = list()
# for data in self.ori_lib_data:
# if data in same_barcode_sorted:
# data['level'] = same_barcode_dict[data['library']]
# correct_data.append(data)
# self.ori_lib_data = correct_data
same_barcode_df = pd.DataFrame(
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
# 按照 'barcode' 列进行分组
if same_barcode_df.empty:
return
grouped = same_barcode_df.groupby('barcode')
# 获取具有重复的 'barcode' 分组
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
# 提取这些分组,计算文库重复次数
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index()
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library'])))
new_lst = [spdata for data in random_list for spdata in data]
counts = Counter(new_lst)
correct_data = list()
for data in self.ori_lib_data:
if data['library'] in counts:
data['level'] -= counts[data['library']]
correct_data.append(data)
self.ori_lib_data = correct_data
def add_new_data(self, chipname, library_data, newer=True): def add_new_data(self, chipname, library_data, newer=True):
""" """
增加新数据到已知芯片上 增加新数据到已知芯片上
@ -241,7 +284,8 @@ class AutoLayout:
return False return False
def use_rule_exclusive_customer(self, chipname, customer): def use_rule_exclusive_customer(self, chipname, customer):
may_classfic = set(self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2']) may_classfic = set(
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
if self.chip_customer[chipname].intersection(may_classfic): if self.chip_customer[chipname].intersection(may_classfic):
return True return True
return False return False
@ -285,8 +329,9 @@ class AutoLayout:
splibrary = False splibrary = False
# 甲基化文库不能大于250G # 甲基化文库不能大于250G
# 甲基化更改成100G
spmethylibrary = True spmethylibrary = True
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250: if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 100:
spmethylibrary = False spmethylibrary = False
# 不使用平衡文库 # 不使用平衡文库
@ -304,7 +349,12 @@ class AutoLayout:
if is_not_balance_list: if is_not_balance_list:
base_balance = False base_balance = False
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary: # 华大的文库不能超过限制的一半
use_huada = True
if self.chip_speciallib_huada_size[chipname] > self.data_limit / 2:
use_huada = False
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary and use_huada:
return True return True
return False return False
@ -312,7 +362,7 @@ class AutoLayout:
""" """
锚定芯片号增加 锚定芯片号增加
""" """
# 有nextera, 华大文库 必须满足大于50G # 有nextera, 华大文库 必须满足大于50G 到了芯片结算
chipname = f'chip{self.loc_chip_num}' chipname = f'chip{self.loc_chip_num}'
nextera_size = self.chip_speciallib_nextera_size[chipname] nextera_size = self.chip_speciallib_nextera_size[chipname]
huada_size = self.chip_speciallib_huada_size[chipname] huada_size = self.chip_speciallib_huada_size[chipname]
@ -351,7 +401,7 @@ class AutoLayout:
self.loc_chip_num += 1 self.loc_chip_num += 1
def assign_samples(self): def assign_samples(self):
ori_library_data = list() # ori_library_data = list()
if '未测' not in self.ori_data.keys(): if '未测' not in self.ori_data.keys():
raise UserWarning('提供excel没有 未测 sheet ,请核查!') raise UserWarning('提供excel没有 未测 sheet ,请核查!')
@ -396,9 +446,10 @@ class AutoLayout:
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce') ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
# 极致客户有重复的把等级调到0防止放到了最后到了未测里 # 极致客户有重复的把等级调到19防止放到了最后到了未测里
ori_library_df.loc[ must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
(ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19 must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list())
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('#library'):
@ -410,28 +461,11 @@ class AutoLayout:
self.no_assign_data.extend(library_df.to_dict('records')) self.no_assign_data.extend(library_df.to_dict('records'))
continue continue
# 拆分处理 # 拆分处理 分为了2个大文库
flag = False
if size > (self.data_limit) / 2: if size > (self.data_limit) / 2:
library_df['data_needed'] = library_df['data_needed'] / 2 library_df['data_needed'] = library_df['data_needed'] / 2
flag = True
ori_library_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0],
level=library_df['level'].values[0],
customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records')
))
# 拆分对半
if flag:
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ') self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
ori_library_data.append(dict( self.ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(), size=library_df['data_needed'].sum(),
@ -442,9 +476,22 @@ class AutoLayout:
classification=library_df['classification'].values[0], classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df[self.need_cols].to_dict('records')
)) ))
self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
# self.ori_lib_data = ori_sort_data self.ori_lib_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0],
level=library_df['level'].values[0],
customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records')
))
self.combinations_same_barcode()
self.ori_lib_data = sorted(self.ori_lib_data, key=lambda x: (x['level'], x['time']))
while self.ori_lib_data: while self.ori_lib_data:
library_data = self.ori_lib_data[0] library_data = self.ori_lib_data[0]
chipname = f'chip{self.loc_chip_num}' chipname = f'chip{self.loc_chip_num}'
@ -474,6 +521,7 @@ class AutoLayout:
self.add_loc_num() self.add_loc_num()
def run(self): def run(self):
# print('# 测试代码')
# self.assign_samples() # self.assign_samples()
try: try:
self.assign_samples() self.assign_samples()
@ -522,7 +570,7 @@ class AutoLayout:
res_df = pd.concat([df, df_sum], axis=1) res_df = pd.concat([df, df_sum], axis=1)
res_df.to_excel(writer, sheet_name=chipname, index=False) res_df.to_excel(writer, sheet_name=chipname, index=False)
chip_loc += 1 chip_loc += 1
# self.no_assign_data.extend(self.diffic_assign_data)
no_assign_df = pd.DataFrame(self.no_assign_data) no_assign_df = pd.DataFrame(self.no_assign_data)
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x) no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
if not no_assign_df.empty: if not no_assign_df.empty:
@ -543,5 +591,3 @@ if __name__ == '__main__':
end_time = time.time() end_time = time.time()
execution_time = end_time - start_time execution_time = end_time - start_time
print(f"代码执行时间为:{execution_time}") print(f"代码执行时间为:{execution_time}")
# server()