main
parent
6ba6b137dd
commit
d1a3778f1c
134
tools/t7.py
134
tools/t7.py
|
|
@ -1,7 +1,7 @@
|
|||
import copy
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from collections import defaultdict, Counter
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
|
|
@ -132,27 +132,27 @@ class AutoLayout:
|
|||
today_date = datetime.now()
|
||||
|
||||
if 'nextera' in row['classification'].lower():
|
||||
return 10
|
||||
return 1000
|
||||
|
||||
if '华大' in row['classification']:
|
||||
return 11
|
||||
return 1100
|
||||
|
||||
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
|
||||
return 20
|
||||
return 2000
|
||||
|
||||
if '加急' in row['priority']:
|
||||
return 3000
|
||||
|
||||
if '补测' in row['priority']:
|
||||
return 4000
|
||||
|
||||
mytime = row['time']
|
||||
# 判断日期是之前的还是之后的
|
||||
if mytime < today_date:
|
||||
return 30
|
||||
|
||||
if '加急' in row['priority']:
|
||||
return 40
|
||||
|
||||
if '补测' in row['priority']:
|
||||
return 50
|
||||
return 5000
|
||||
|
||||
else:
|
||||
return 1000
|
||||
return 100000
|
||||
|
||||
@staticmethod
|
||||
def read_rule():
|
||||
|
|
@ -190,6 +190,49 @@ class AutoLayout:
|
|||
ori_data[name] = sheet.to_dict('records')
|
||||
return ori_data
|
||||
|
||||
def combinations_same_barcode(self):
|
||||
"""
|
||||
barcode 有重复的极致样本 进行排列组合,汇集成新的可能性
|
||||
"""
|
||||
# 筛选有重复的行
|
||||
|
||||
# same_barcode_data = [data for data in self.ori_lib_data if data['level'] == 1900]
|
||||
# same_barcode_sorted = sorted(same_barcode_data, key=lambda x: (-x['size']))
|
||||
#
|
||||
# same_barcode_dict = dict()
|
||||
# for index, data in enumerate(same_barcode_sorted):
|
||||
# same_barcode_dict[data['library']] = data['level'] + index + 1
|
||||
# correct_data = list()
|
||||
# for data in self.ori_lib_data:
|
||||
# if data in same_barcode_sorted:
|
||||
# data['level'] = same_barcode_dict[data['library']]
|
||||
# correct_data.append(data)
|
||||
# self.ori_lib_data = correct_data
|
||||
|
||||
same_barcode_df = pd.DataFrame(
|
||||
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
|
||||
|
||||
# 按照 'barcode' 列进行分组
|
||||
if same_barcode_df.empty:
|
||||
return
|
||||
grouped = same_barcode_df.groupby('barcode')
|
||||
|
||||
# 获取具有重复的 'barcode' 分组
|
||||
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
|
||||
|
||||
# 提取这些分组,计算文库重复次数
|
||||
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index()
|
||||
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library'])))
|
||||
new_lst = [spdata for data in random_list for spdata in data]
|
||||
counts = Counter(new_lst)
|
||||
|
||||
correct_data = list()
|
||||
for data in self.ori_lib_data:
|
||||
if data['library'] in counts:
|
||||
data['level'] -= counts[data['library']]
|
||||
correct_data.append(data)
|
||||
self.ori_lib_data = correct_data
|
||||
|
||||
def add_new_data(self, chipname, library_data, newer=True):
|
||||
"""
|
||||
增加新数据到已知芯片上
|
||||
|
|
@ -241,7 +284,8 @@ class AutoLayout:
|
|||
return False
|
||||
|
||||
def use_rule_exclusive_customer(self, chipname, customer):
|
||||
may_classfic = set(self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
|
||||
may_classfic = set(
|
||||
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
|
||||
if self.chip_customer[chipname].intersection(may_classfic):
|
||||
return True
|
||||
return False
|
||||
|
|
@ -285,8 +329,9 @@ class AutoLayout:
|
|||
splibrary = False
|
||||
|
||||
# 甲基化文库不能大于250G
|
||||
# 甲基化更改成100G
|
||||
spmethylibrary = True
|
||||
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250:
|
||||
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 100:
|
||||
spmethylibrary = False
|
||||
|
||||
# 不使用平衡文库
|
||||
|
|
@ -304,7 +349,12 @@ class AutoLayout:
|
|||
if is_not_balance_list:
|
||||
base_balance = False
|
||||
|
||||
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary:
|
||||
# 华大的文库不能超过限制的一半
|
||||
use_huada = True
|
||||
if self.chip_speciallib_huada_size[chipname] > self.data_limit / 2:
|
||||
use_huada = False
|
||||
|
||||
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary and use_huada:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
@ -312,7 +362,7 @@ class AutoLayout:
|
|||
"""
|
||||
锚定芯片号增加
|
||||
"""
|
||||
# 有nextera, 华大文库 必须满足大于50G
|
||||
# 有nextera, 华大文库 必须满足大于50G 到了芯片结算
|
||||
chipname = f'chip{self.loc_chip_num}'
|
||||
nextera_size = self.chip_speciallib_nextera_size[chipname]
|
||||
huada_size = self.chip_speciallib_huada_size[chipname]
|
||||
|
|
@ -351,7 +401,7 @@ class AutoLayout:
|
|||
self.loc_chip_num += 1
|
||||
|
||||
def assign_samples(self):
|
||||
ori_library_data = list()
|
||||
# ori_library_data = list()
|
||||
|
||||
if '未测' not in self.ori_data.keys():
|
||||
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
|
||||
|
|
@ -396,9 +446,10 @@ class AutoLayout:
|
|||
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
|
||||
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
||||
|
||||
# 极致客户有重复的,把等级调到0,防止放到了最后,到了未测里
|
||||
ori_library_df.loc[
|
||||
(ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19
|
||||
# 极致客户有重复的,把等级调到19,防止放到了最后,到了未测里
|
||||
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
|
||||
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list())
|
||||
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900
|
||||
|
||||
for library, library_df in ori_library_df.groupby('#library'):
|
||||
|
||||
|
|
@ -410,28 +461,11 @@ class AutoLayout:
|
|||
self.no_assign_data.extend(library_df.to_dict('records'))
|
||||
continue
|
||||
|
||||
# 拆分处理
|
||||
flag = False
|
||||
# 拆分处理 分为了2个大文库
|
||||
if size > (self.data_limit) / 2:
|
||||
library_df['data_needed'] = library_df['data_needed'] / 2
|
||||
flag = True
|
||||
|
||||
ori_library_data.append(dict(
|
||||
library=library,
|
||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||
size=library_df['data_needed'].sum(),
|
||||
split_method=library_df['拆分方式'].values[0],
|
||||
time=library_df['time'].values[0],
|
||||
level=library_df['level'].values[0],
|
||||
customer=library_df['customer'].values[0],
|
||||
classification=library_df['classification'].values[0],
|
||||
data=library_df[self.need_cols].to_dict('records')
|
||||
))
|
||||
|
||||
# 拆分对半
|
||||
if flag:
|
||||
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
||||
ori_library_data.append(dict(
|
||||
self.ori_lib_data.append(dict(
|
||||
library=library,
|
||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||
size=library_df['data_needed'].sum(),
|
||||
|
|
@ -442,9 +476,22 @@ class AutoLayout:
|
|||
classification=library_df['classification'].values[0],
|
||||
data=library_df[self.need_cols].to_dict('records')
|
||||
))
|
||||
self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
|
||||
|
||||
# self.ori_lib_data = ori_sort_data
|
||||
self.ori_lib_data.append(dict(
|
||||
library=library,
|
||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||
size=library_df['data_needed'].sum(),
|
||||
split_method=library_df['拆分方式'].values[0],
|
||||
time=library_df['time'].values[0],
|
||||
level=library_df['level'].values[0],
|
||||
customer=library_df['customer'].values[0],
|
||||
classification=library_df['classification'].values[0],
|
||||
data=library_df[self.need_cols].to_dict('records')
|
||||
))
|
||||
|
||||
self.combinations_same_barcode()
|
||||
self.ori_lib_data = sorted(self.ori_lib_data, key=lambda x: (x['level'], x['time']))
|
||||
|
||||
while self.ori_lib_data:
|
||||
library_data = self.ori_lib_data[0]
|
||||
chipname = f'chip{self.loc_chip_num}'
|
||||
|
|
@ -474,6 +521,7 @@ class AutoLayout:
|
|||
self.add_loc_num()
|
||||
|
||||
def run(self):
|
||||
# print('# 测试代码')
|
||||
# self.assign_samples()
|
||||
try:
|
||||
self.assign_samples()
|
||||
|
|
@ -522,7 +570,7 @@ class AutoLayout:
|
|||
res_df = pd.concat([df, df_sum], axis=1)
|
||||
res_df.to_excel(writer, sheet_name=chipname, index=False)
|
||||
chip_loc += 1
|
||||
# self.no_assign_data.extend(self.diffic_assign_data)
|
||||
|
||||
no_assign_df = pd.DataFrame(self.no_assign_data)
|
||||
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
||||
if not no_assign_df.empty:
|
||||
|
|
@ -543,5 +591,3 @@ if __name__ == '__main__':
|
|||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
print(f"代码执行时间为:{execution_time} 秒")
|
||||
|
||||
# server()
|
||||
|
|
|
|||
Loading…
Reference in New Issue