main
chaopower 2024-02-29 15:43:48 +08:00
parent 6ba6b137dd
commit d1a3778f1c
1 changed files with 89 additions and 43 deletions

View File

@ -1,7 +1,7 @@
import copy
import os
import time
from collections import defaultdict
from collections import defaultdict, Counter
from datetime import datetime
import pandas as pd
@ -132,27 +132,27 @@ class AutoLayout:
today_date = datetime.now()
if 'nextera' in row['classification'].lower():
return 10
return 1000
if '华大' in row['classification']:
return 11
return 1100
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 20
return 2000
if '加急' in row['priority']:
return 3000
if '补测' in row['priority']:
return 4000
mytime = row['time']
# 判断日期是之前的还是之后的
if mytime < today_date:
return 30
if '加急' in row['priority']:
return 40
if '补测' in row['priority']:
return 50
return 5000
else:
return 1000
return 100000
@staticmethod
def read_rule():
@ -190,6 +190,49 @@ class AutoLayout:
ori_data[name] = sheet.to_dict('records')
return ori_data
def combinations_same_barcode(self):
"""
barcode 有重复的极致样本 进行排列组合汇集成新的可能性
"""
# 筛选有重复的行
# same_barcode_data = [data for data in self.ori_lib_data if data['level'] == 1900]
# same_barcode_sorted = sorted(same_barcode_data, key=lambda x: (-x['size']))
#
# same_barcode_dict = dict()
# for index, data in enumerate(same_barcode_sorted):
# same_barcode_dict[data['library']] = data['level'] + index + 1
# correct_data = list()
# for data in self.ori_lib_data:
# if data in same_barcode_sorted:
# data['level'] = same_barcode_dict[data['library']]
# correct_data.append(data)
# self.ori_lib_data = correct_data
same_barcode_df = pd.DataFrame(
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
# 按照 'barcode' 列进行分组
if same_barcode_df.empty:
return
grouped = same_barcode_df.groupby('barcode')
# 获取具有重复的 'barcode' 分组
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
# 提取这些分组,计算文库重复次数
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index()
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library'])))
new_lst = [spdata for data in random_list for spdata in data]
counts = Counter(new_lst)
correct_data = list()
for data in self.ori_lib_data:
if data['library'] in counts:
data['level'] -= counts[data['library']]
correct_data.append(data)
self.ori_lib_data = correct_data
def add_new_data(self, chipname, library_data, newer=True):
"""
增加新数据到已知芯片上
@ -241,7 +284,8 @@ class AutoLayout:
return False
def use_rule_exclusive_customer(self, chipname, customer):
may_classfic = set(self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
may_classfic = set(
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
if self.chip_customer[chipname].intersection(may_classfic):
return True
return False
@ -285,8 +329,9 @@ class AutoLayout:
splibrary = False
# 甲基化文库不能大于250G
# 甲基化更改成100G
spmethylibrary = True
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250:
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 100:
spmethylibrary = False
# 不使用平衡文库
@ -304,7 +349,12 @@ class AutoLayout:
if is_not_balance_list:
base_balance = False
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary:
# 华大的文库不能超过限制的一半
use_huada = True
if self.chip_speciallib_huada_size[chipname] > self.data_limit / 2:
use_huada = False
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary and use_huada:
return True
return False
@ -312,7 +362,7 @@ class AutoLayout:
"""
锚定芯片号增加
"""
# 有nextera, 华大文库 必须满足大于50G
# 有nextera, 华大文库 必须满足大于50G 到了芯片结算
chipname = f'chip{self.loc_chip_num}'
nextera_size = self.chip_speciallib_nextera_size[chipname]
huada_size = self.chip_speciallib_huada_size[chipname]
@ -351,7 +401,7 @@ class AutoLayout:
self.loc_chip_num += 1
def assign_samples(self):
ori_library_data = list()
# ori_library_data = list()
if '未测' not in self.ori_data.keys():
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
@ -396,9 +446,10 @@ class AutoLayout:
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
# 极致客户有重复的把等级调到0防止放到了最后到了未测里
ori_library_df.loc[
(ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19
# 极致客户有重复的把等级调到19防止放到了最后到了未测里
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list())
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900
for library, library_df in ori_library_df.groupby('#library'):
@ -410,13 +461,23 @@ class AutoLayout:
self.no_assign_data.extend(library_df.to_dict('records'))
continue
# 拆分处理
flag = False
# 拆分处理 分为了2个大文库
if size > (self.data_limit) / 2:
library_df['data_needed'] = library_df['data_needed'] / 2
flag = True
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
self.ori_lib_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0],
level=library_df['level'].values[0],
customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records')
))
ori_library_data.append(dict(
self.ori_lib_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
@ -428,23 +489,9 @@ class AutoLayout:
data=library_df[self.need_cols].to_dict('records')
))
# 拆分对半
if flag:
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
ori_library_data.append(dict(
library=library,
is_balance_lib=library_df['is_balance_lib'].values[0],
size=library_df['data_needed'].sum(),
split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0],
level=library_df['level'].values[0],
customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records')
))
self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
self.combinations_same_barcode()
self.ori_lib_data = sorted(self.ori_lib_data, key=lambda x: (x['level'], x['time']))
# self.ori_lib_data = ori_sort_data
while self.ori_lib_data:
library_data = self.ori_lib_data[0]
chipname = f'chip{self.loc_chip_num}'
@ -474,6 +521,7 @@ class AutoLayout:
self.add_loc_num()
def run(self):
# print('# 测试代码')
# self.assign_samples()
try:
self.assign_samples()
@ -522,7 +570,7 @@ class AutoLayout:
res_df = pd.concat([df, df_sum], axis=1)
res_df.to_excel(writer, sheet_name=chipname, index=False)
chip_loc += 1
# self.no_assign_data.extend(self.diffic_assign_data)
no_assign_df = pd.DataFrame(self.no_assign_data)
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
if not no_assign_df.empty:
@ -543,5 +591,3 @@ if __name__ == '__main__':
end_time = time.time()
execution_time = end_time - start_time
print(f"代码执行时间为:{execution_time}")
# server()