不平衡文库大于250G 拆分处理
parent
29a651f874
commit
6225342e33
66
tools/t7.py
66
tools/t7.py
|
|
@ -25,21 +25,26 @@ class AutoLayout:
|
|||
self.librarynum = int(librarynum)
|
||||
self.data_limit = data_limit
|
||||
|
||||
# 芯片原始数据读取
|
||||
self.ori_data = self.read_excel()
|
||||
|
||||
# 记录所有的排好的芯片数据
|
||||
self.index_assignments = defaultdict(list)
|
||||
# 芯片数量量大小
|
||||
|
||||
# 记录每个芯片数量大小
|
||||
self.chip_size = dict()
|
||||
# 芯片是否极致
|
||||
self.chip_type = dict()
|
||||
# 芯片barcode
|
||||
|
||||
# 记录芯片barcode, i7, i5 barcode信息
|
||||
self.chip_barcode_recode = defaultdict(set)
|
||||
self.chip_barcodei7_recode = defaultdict(set)
|
||||
self.chip_barcodei5_recode = defaultdict(set)
|
||||
# 芯片原始数据读取
|
||||
self.ori_data = self.read_excel()
|
||||
|
||||
# 当前锚芯片
|
||||
self.loc_chip_num = 1
|
||||
|
||||
# 芯片客户
|
||||
self.chip_customer = defaultdict(set)
|
||||
|
||||
# 文库
|
||||
self.chip_classification = defaultdict(set)
|
||||
self.rule = self.read_rule()
|
||||
|
|
@ -55,16 +60,21 @@ class AutoLayout:
|
|||
self.chip_speciallib_nextera_size = dict()
|
||||
# 华大 文库
|
||||
self.chip_speciallib_huada_size = dict()
|
||||
# 排序好的文库数据
|
||||
self.ori_lib_data = list()
|
||||
|
||||
self.logger = log(os.path.basename(f'{path}.txt'))
|
||||
self.return_log = list()
|
||||
self.no_assign_data = list()
|
||||
self.ori_lib_data = list()
|
||||
|
||||
self.need_cols = self.read_cols()
|
||||
|
||||
self.is_use_balance = is_use_balance
|
||||
self.is_use_max = is_use_max
|
||||
|
||||
# 记录拆分的不平衡文库
|
||||
self.split_lib = set()
|
||||
|
||||
@staticmethod
|
||||
def read_cols():
|
||||
df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
|
||||
|
|
@ -252,6 +262,7 @@ class AutoLayout:
|
|||
:return:
|
||||
"""
|
||||
self.index_assignments[chipname].extend(library_data['data'])
|
||||
|
||||
self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']})
|
||||
self.chip_barcodei7_recode[chipname].update({item['i7'] for item in library_data['data']})
|
||||
self.chip_barcodei5_recode[chipname].update({item['i5'] for item in library_data['data']})
|
||||
|
|
@ -318,6 +329,7 @@ class AutoLayout:
|
|||
|
||||
# barcode有重复
|
||||
notrepeatbarcode = True
|
||||
|
||||
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}) or \
|
||||
self.chip_barcode_recode[chipname].intersection(
|
||||
{'N' * 8 + item['i7'] for item in library_data['data']}) or \
|
||||
|
|
@ -370,9 +382,11 @@ class AutoLayout:
|
|||
if max_barcode != 'all':
|
||||
base_balance = True
|
||||
notrepeatbarcode = True
|
||||
if self.chip_barcodei7_recode[chipname].intersection({item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
|
||||
if self.chip_barcodei7_recode[chipname].intersection(
|
||||
{item['i7'] for item in library_data['data']}) and max_barcode == 'i7':
|
||||
notrepeatbarcode = False
|
||||
if self.chip_barcodei5_recode[chipname].intersection({item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
|
||||
if self.chip_barcodei5_recode[chipname].intersection(
|
||||
{item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
|
||||
notrepeatbarcode = False
|
||||
# 是个N的取消
|
||||
if ('N' * 8 in {item['i5'] for item in library_data['data']}) and max_barcode == 'i5':
|
||||
|
|
@ -487,6 +501,7 @@ class AutoLayout:
|
|||
for library, library_df in ori_library_df.groupby('#library'):
|
||||
|
||||
size = library_df['data_needed'].sum()
|
||||
is_balance_lib = library_df['is_balance_lib'].values[0]
|
||||
|
||||
# 文库内部有重复
|
||||
if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)):
|
||||
|
|
@ -494,6 +509,30 @@ class AutoLayout:
|
|||
self.no_assign_data.extend(library_df.to_dict('records'))
|
||||
continue
|
||||
|
||||
# 不平衡文库 大于250G 的数据 先进行拆分
|
||||
if is_balance_lib == '否' and size > 250:
|
||||
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
|
||||
data_needed = library_df['data_needed'].copy()
|
||||
for num in range(int(size), 0, -200):
|
||||
addnum = 200
|
||||
if num <= 200:
|
||||
addnum = num
|
||||
library_df['data_needed'] = (addnum / size) * data_needed
|
||||
|
||||
self.ori_lib_data.append(dict(
|
||||
library=library,
|
||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||
size=library_df['data_needed'].sum(),
|
||||
split_method=library_df['拆分方式'].values[0],
|
||||
time=library_df['time'].values[0],
|
||||
level=1950,
|
||||
customer=library_df['customer'].values[0],
|
||||
classification=library_df['classification'].values[0],
|
||||
data=library_df[self.need_cols].to_dict('records')
|
||||
))
|
||||
self.split_lib.add(library)
|
||||
continue
|
||||
|
||||
# 拆分处理 分为了2个大文库
|
||||
if size > (self.data_limit) / 2:
|
||||
library_df['data_needed'] = library_df['data_needed'] / 2
|
||||
|
|
@ -574,13 +613,17 @@ class AutoLayout:
|
|||
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
||||
ori_lib_data = list()
|
||||
for library, library_df in ori_library_df.groupby('#library'):
|
||||
level = library_df['level'].values[0]
|
||||
if library in self.split_lib:
|
||||
level = 1950
|
||||
|
||||
ori_lib_data.append(dict(
|
||||
library=library,
|
||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||
size=library_df['data_needed'].sum(),
|
||||
split_method=library_df['拆分方式'].values[0],
|
||||
time=library_df['time'].values[0],
|
||||
level=library_df['level'].values[0],
|
||||
level=level,
|
||||
customer=library_df['customer'].values[0],
|
||||
classification=library_df['classification'].values[0],
|
||||
data=library_df[self.need_cols].to_dict('records')
|
||||
|
|
@ -677,6 +720,9 @@ class AutoLayout:
|
|||
|
||||
no_assign_df = pd.DataFrame(self.no_assign_data)
|
||||
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
||||
no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['#library'] if lib in self.split_lib]))
|
||||
if no_assign_df_not_balance:
|
||||
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
|
||||
if not no_assign_df.empty:
|
||||
no_assign_df = no_assign_df[self.need_cols]
|
||||
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
||||
|
|
|
|||
Loading…
Reference in New Issue