不平衡文库大于250G 拆分处理

main
chaopower 2024-03-12 14:58:32 +08:00
parent 29a651f874
commit 6225342e33
1 changed files with 58 additions and 12 deletions

View File

@ -25,21 +25,26 @@ class AutoLayout:
self.librarynum = int(librarynum) self.librarynum = int(librarynum)
self.data_limit = data_limit self.data_limit = data_limit
# 芯片原始数据读取
self.ori_data = self.read_excel()
# 记录所有的排好的芯片数据
self.index_assignments = defaultdict(list) self.index_assignments = defaultdict(list)
# 芯片数量量大小
# 记录每个芯片数量大小
self.chip_size = dict() self.chip_size = dict()
# 芯片是否极致
self.chip_type = dict() # 记录芯片barcode, i7, i5 barcode信息
# 芯片barcode
self.chip_barcode_recode = defaultdict(set) self.chip_barcode_recode = defaultdict(set)
self.chip_barcodei7_recode = defaultdict(set) self.chip_barcodei7_recode = defaultdict(set)
self.chip_barcodei5_recode = defaultdict(set) self.chip_barcodei5_recode = defaultdict(set)
# 芯片原始数据读取
self.ori_data = self.read_excel()
# 当前锚芯片 # 当前锚芯片
self.loc_chip_num = 1 self.loc_chip_num = 1
# 芯片客户 # 芯片客户
self.chip_customer = defaultdict(set) self.chip_customer = defaultdict(set)
# 文库 # 文库
self.chip_classification = defaultdict(set) self.chip_classification = defaultdict(set)
self.rule = self.read_rule() self.rule = self.read_rule()
@ -55,16 +60,21 @@ class AutoLayout:
self.chip_speciallib_nextera_size = dict() self.chip_speciallib_nextera_size = dict()
# 华大 文库 # 华大 文库
self.chip_speciallib_huada_size = dict() self.chip_speciallib_huada_size = dict()
# 排序好的文库数据
self.ori_lib_data = list()
self.logger = log(os.path.basename(f'{path}.txt')) self.logger = log(os.path.basename(f'{path}.txt'))
self.return_log = list() self.return_log = list()
self.no_assign_data = list() self.no_assign_data = list()
self.ori_lib_data = list()
self.need_cols = self.read_cols() self.need_cols = self.read_cols()
self.is_use_balance = is_use_balance self.is_use_balance = is_use_balance
self.is_use_max = is_use_max self.is_use_max = is_use_max
# 记录拆分的不平衡文库
self.split_lib = set()
@staticmethod @staticmethod
def read_cols(): def read_cols():
df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx')) df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
@ -252,6 +262,7 @@ class AutoLayout:
:return: :return:
""" """
self.index_assignments[chipname].extend(library_data['data']) self.index_assignments[chipname].extend(library_data['data'])
self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']}) self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']})
self.chip_barcodei7_recode[chipname].update({item['i7'] for item in library_data['data']}) self.chip_barcodei7_recode[chipname].update({item['i7'] for item in library_data['data']})
self.chip_barcodei5_recode[chipname].update({item['i5'] for item in library_data['data']}) self.chip_barcodei5_recode[chipname].update({item['i5'] for item in library_data['data']})
@ -318,6 +329,7 @@ class AutoLayout:
# barcode有重复 # barcode有重复
notrepeatbarcode = True notrepeatbarcode = True
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}) or \ if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}) or \
self.chip_barcode_recode[chipname].intersection( self.chip_barcode_recode[chipname].intersection(
{'N' * 8 + item['i7'] for item in library_data['data']}) or \ {'N' * 8 + item['i7'] for item in library_data['data']}) or \
@ -370,11 +382,13 @@ class AutoLayout:
if max_barcode != 'all': if max_barcode != 'all':
base_balance = True base_balance = True
notrepeatbarcode = True notrepeatbarcode = True
if self.chip_barcodei7_recode[chipname].intersection({item['i7'] for item in library_data['data']}) and max_barcode == 'i7': if self.chip_barcodei7_recode[chipname].intersection(
{item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
notrepeatbarcode = False notrepeatbarcode = False
if self.chip_barcodei5_recode[chipname].intersection({item['i5'] for item in library_data['data']}) and max_barcode == 'i5': if self.chip_barcodei5_recode[chipname].intersection(
{item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
notrepeatbarcode = False notrepeatbarcode = False
#是个N的取消 # 是个N的取消
if ('N' * 8 in {item['i5'] for item in library_data['data']}) and max_barcode == 'i5': if ('N' * 8 in {item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
notrepeatbarcode = False notrepeatbarcode = False
if ('N' * 8 in {item['i7'] for item in library_data['data']}) and max_barcode == 'i7': if ('N' * 8 in {item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
@ -487,6 +501,7 @@ class AutoLayout:
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('#library'):
size = library_df['data_needed'].sum() size = library_df['data_needed'].sum()
is_balance_lib = library_df['is_balance_lib'].values[0]
# 文库内部有重复 # 文库内部有重复
if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)): if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)):
@ -494,6 +509,30 @@ class AutoLayout:
self.no_assign_data.extend(library_df.to_dict('records')) self.no_assign_data.extend(library_df.to_dict('records'))
continue continue
# 不平衡文库 大于250G 的数据 先进行拆分
if is_balance_lib == '' and size > 250:
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
data_needed = library_df['data_needed'].copy()
for num in range(int(size), 0, -200):
addnum = 200
if num <= 200:
addnum = num
library_df['data_needed'] = (addnum / size) * data_needed
self.ori_lib_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0],
level=1950,
customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records')
))
self.split_lib.add(library)
continue
# 拆分处理 分为了2个大文库 # 拆分处理 分为了2个大文库
if size > (self.data_limit) / 2: if size > (self.data_limit) / 2:
library_df['data_needed'] = library_df['data_needed'] / 2 library_df['data_needed'] = library_df['data_needed'] / 2
@ -574,13 +613,17 @@ class AutoLayout:
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
ori_lib_data = list() ori_lib_data = list()
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('#library'):
level = library_df['level'].values[0]
if library in self.split_lib:
level = 1950
ori_lib_data.append(dict( ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(), size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0], split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0], time=library_df['time'].values[0],
level=library_df['level'].values[0], level=level,
customer=library_df['customer'].values[0], customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0], classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df[self.need_cols].to_dict('records')
@ -677,6 +720,9 @@ class AutoLayout:
no_assign_df = pd.DataFrame(self.no_assign_data) no_assign_df = pd.DataFrame(self.no_assign_data)
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x) no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['#library'] if lib in self.split_lib]))
if no_assign_df_not_balance:
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
if not no_assign_df.empty: if not no_assign_df.empty:
no_assign_df = no_assign_df[self.need_cols] no_assign_df = no_assign_df[self.need_cols]
no_assign_df.to_excel(writer, sheet_name='未测', index=False) no_assign_df.to_excel(writer, sheet_name='未测', index=False)