2023-12-20 17:06:48 +08:00
|
|
|
|
import copy
|
2023-12-07 17:47:53 +08:00
|
|
|
|
import os
|
|
|
|
|
|
import time
|
2024-02-29 15:43:48 +08:00
|
|
|
|
from collections import defaultdict, Counter
|
2023-06-27 13:01:44 +08:00
|
|
|
|
from datetime import datetime
|
2024-06-20 11:12:59 +08:00
|
|
|
|
from io import BytesIO
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
import openpyxl
|
2023-12-07 17:47:53 +08:00
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
from tools.common import basedir
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# 复制样式函数
|
|
|
|
|
|
def copy_cell_style(src_cell, dest_cell):
|
|
|
|
|
|
dest_cell.font = copy.copy(src_cell.font)
|
|
|
|
|
|
dest_cell.border = copy.copy(src_cell.border)
|
|
|
|
|
|
dest_cell.fill = copy.copy(src_cell.fill)
|
|
|
|
|
|
dest_cell.number_format = copy.copy(src_cell.number_format)
|
|
|
|
|
|
dest_cell.protection = copy.copy(src_cell.protection)
|
|
|
|
|
|
dest_cell.alignment = copy.copy(src_cell.alignment)
|
2024-01-30 14:31:18 +08:00
|
|
|
|
|
|
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
class AutoLayout:
|
|
|
|
|
|
"""
|
|
|
|
|
|
自动化派样
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
def __init__(self, path, librarynum, is_use_balance=1, is_use_max=0, output=basedir, data_limit=1650,
|
|
|
|
|
|
data_lower=1600):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.path = path
|
|
|
|
|
|
self.output = output
|
2024-01-02 13:53:43 +08:00
|
|
|
|
self.librarynum = int(librarynum)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.data_limit = data_limit
|
2024-03-13 14:24:51 +08:00
|
|
|
|
self.data_lower = data_lower
|
2024-03-30 21:42:23 +08:00
|
|
|
|
self.get_col = list()
|
|
|
|
|
|
self.items = list()
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-03-12 14:58:32 +08:00
|
|
|
|
# 芯片原始数据读取
|
|
|
|
|
|
self.ori_data = self.read_excel()
|
|
|
|
|
|
|
|
|
|
|
|
# 记录所有的排好的芯片数据
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.index_assignments = defaultdict(list)
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
|
|
|
|
|
# 记录每个芯片数量大小
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.chip_size = dict()
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# 含N端芯片数量大小
|
|
|
|
|
|
self.chip_size_N = dict()
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
|
|
|
|
|
# 记录芯片barcode, i7, i5 barcode信息
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.chip_barcode_recode = defaultdict(set)
|
2024-03-01 18:05:46 +08:00
|
|
|
|
self.chip_barcodei7_recode = defaultdict(set)
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.chip_barcodei5_recode = defaultdict(set)
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
# 当前锚芯片
|
|
|
|
|
|
self.loc_chip_num = 1
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
# 芯片客户
|
|
|
|
|
|
self.chip_customer = defaultdict(set)
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
2023-12-07 17:47:53 +08:00
|
|
|
|
# 文库
|
|
|
|
|
|
self.chip_classification = defaultdict(set)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.rule = self.read_rule()
|
2024-02-23 16:45:58 +08:00
|
|
|
|
self.rule_exclusive_customer = self.read_rule_exclusive_customer()
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
# subsamplename
|
2024-03-14 16:58:29 +08:00
|
|
|
|
self.chip_sublib = defaultdict(set)
|
|
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 不平衡文库
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.chip_speciallib_size = dict()
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 甲基化文库
|
|
|
|
|
|
self.chip_methylib_size = dict()
|
|
|
|
|
|
|
2024-01-16 18:02:24 +08:00
|
|
|
|
# Nextera 文库大小
|
|
|
|
|
|
self.chip_speciallib_nextera_size = dict()
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 华大 文库
|
|
|
|
|
|
self.chip_speciallib_huada_size = dict()
|
2024-03-12 14:58:32 +08:00
|
|
|
|
# 排序好的文库数据
|
|
|
|
|
|
self.ori_lib_data = list()
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# self.logger = log(os.path.basename(f'{path}.txt'))
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.return_log = list()
|
2023-12-07 17:47:53 +08:00
|
|
|
|
self.no_assign_data = list()
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
2024-05-22 11:29:11 +08:00
|
|
|
|
# 包lane处理
|
|
|
|
|
|
self.order_assign_data = list()
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# self.need_cols = self.read_cols()
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.is_use_balance = is_use_balance
|
|
|
|
|
|
self.is_use_max = is_use_max
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-03-12 14:58:32 +08:00
|
|
|
|
# 记录拆分的不平衡文库
|
|
|
|
|
|
self.split_lib = set()
|
|
|
|
|
|
|
2024-03-04 17:10:22 +08:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def read_cols():
|
|
|
|
|
|
df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
|
|
|
|
|
|
cols = list(df['cols'].values)
|
|
|
|
|
|
return cols
|
|
|
|
|
|
|
|
|
|
|
|
def read_excel(self):
|
|
|
|
|
|
"""
|
|
|
|
|
|
原始数据处理
|
|
|
|
|
|
:return:
|
|
|
|
|
|
"""
|
2024-03-30 21:42:23 +08:00
|
|
|
|
# 获取表头备注
|
|
|
|
|
|
nrow = pd.read_excel(self.path, nrows=1)
|
|
|
|
|
|
self.items = nrow.to_dict('records')
|
|
|
|
|
|
merge = pd.read_excel(self.path, skiprows=[1])
|
|
|
|
|
|
merge.fillna('', inplace=True)
|
|
|
|
|
|
ori_data = merge.to_dict('records')
|
2024-03-04 17:10:22 +08:00
|
|
|
|
return ori_data
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def read_rule():
|
|
|
|
|
|
df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
|
|
|
|
|
|
newdf = pd.DataFrame()
|
|
|
|
|
|
newdf['c1'] = df['c2']
|
|
|
|
|
|
newdf['c2'] = df['c1']
|
|
|
|
|
|
res = pd.concat([df, newdf])
|
|
|
|
|
|
return res.reset_index()
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def read_rule_exclusive_customer():
|
|
|
|
|
|
df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_customer.xlsx'))
|
|
|
|
|
|
newdf = pd.DataFrame()
|
|
|
|
|
|
newdf['customer1'] = df['customer2']
|
|
|
|
|
|
newdf['customer2'] = df['customer1']
|
|
|
|
|
|
res = pd.concat([df, newdf])
|
|
|
|
|
|
return res.reset_index()
|
|
|
|
|
|
|
|
|
|
|
|
def count_barcode_radio(self, data, maxt=''):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
df = pd.DataFrame(data)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
ratio_sites = dict()
|
|
|
|
|
|
is_not_balance_list = []
|
|
|
|
|
|
if df.empty:
|
|
|
|
|
|
return ratio_sites, is_not_balance_list
|
2024-03-01 18:05:46 +08:00
|
|
|
|
s, e = 0, 16
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if maxt == 'indexi7':
|
2024-03-01 18:05:46 +08:00
|
|
|
|
s, e = 8, 16
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if maxt == 'indexi5':
|
2024-03-04 17:10:22 +08:00
|
|
|
|
s, e = 0, 8
|
2024-03-01 18:05:46 +08:00
|
|
|
|
num = e - s
|
2024-03-30 21:42:23 +08:00
|
|
|
|
df['indexi5i7'] = df['indexi5i7'].str.slice(s, e)
|
|
|
|
|
|
barcode_df = pd.DataFrame(df['indexi5i7'].str.split('', expand=True).iloc[:, 1:-1].values,
|
|
|
|
|
|
columns=['T' + str(x) for x in range(num)]).join(df['orderdatavolume'])
|
|
|
|
|
|
total = barcode_df['orderdatavolume'].sum()
|
2023-12-20 17:06:48 +08:00
|
|
|
|
|
2024-03-01 18:05:46 +08:00
|
|
|
|
for i in range(num):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
column = 'T' + str(i)
|
2024-03-30 21:42:23 +08:00
|
|
|
|
col_df = barcode_df.groupby(column).agg({'orderdatavolume': 'sum'})
|
2023-06-27 13:01:44 +08:00
|
|
|
|
# 去掉N计数
|
|
|
|
|
|
if 'N' in col_df.index:
|
2024-03-30 21:42:23 +08:00
|
|
|
|
base_n_size = col_df.loc['N', 'orderdatavolume']
|
2023-06-27 13:01:44 +08:00
|
|
|
|
col_df = col_df.drop('N')
|
|
|
|
|
|
else:
|
2024-01-30 14:31:18 +08:00
|
|
|
|
base_n_size = 0
|
2024-03-30 21:42:23 +08:00
|
|
|
|
col_df['ratio'] = (col_df['orderdatavolume']) / (total - base_n_size)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
ratio = col_df['ratio'].to_dict()
|
2023-12-20 17:06:48 +08:00
|
|
|
|
ratio_sites[i] = ratio
|
2024-02-05 17:13:32 +08:00
|
|
|
|
A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
|
2023-06-27 13:01:44 +08:00
|
|
|
|
for decbase in ['A', 'T', 'C', 'G']:
|
|
|
|
|
|
if decbase not in ratio:
|
|
|
|
|
|
ratio[decbase] = 0
|
2023-12-07 17:47:53 +08:00
|
|
|
|
if ratio[decbase] >= 0.6:
|
|
|
|
|
|
A.append(decbase)
|
|
|
|
|
|
if 0.2 <= ratio[decbase] < 0.6:
|
|
|
|
|
|
B.append(decbase)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
if 0.15 <= ratio[decbase] < 0.2:
|
2023-12-07 17:47:53 +08:00
|
|
|
|
C.append(decbase)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
if 0.1 <= ratio[decbase] < 0.15:
|
2023-12-07 17:47:53 +08:00
|
|
|
|
D.append(decbase)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
if 0.08 <= ratio[decbase] < 0.1:
|
|
|
|
|
|
E.append(decbase)
|
|
|
|
|
|
if ratio[decbase] < 0.08:
|
|
|
|
|
|
F.append(decbase)
|
|
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 新增一个碱基可行规则
|
|
|
|
|
|
if 0.125 <= ratio[decbase] <= 0.625:
|
|
|
|
|
|
G.append(decbase)
|
|
|
|
|
|
|
|
|
|
|
|
A_num, B_num, C_num, D_num, E_num, F_num, G_num = len(A), len(B), len(C), len(D), len(E), len(F), len(G)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
|
|
|
|
|
|
E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
|
2024-02-05 17:13:32 +08:00
|
|
|
|
E_num == 1 and (A_num + B_num + C_num) == 3) or (
|
|
|
|
|
|
F_num == 1 and G_num == 3 and self.is_use_max)):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
is_not_balance_list.append(
|
2023-12-20 17:06:48 +08:00
|
|
|
|
'第%s位置,算出结果为 %s' % (i, ratio)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
)
|
2023-12-20 17:06:48 +08:00
|
|
|
|
return ratio_sites, is_not_balance_list
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2023-12-20 17:06:48 +08:00
|
|
|
|
def dec_barcode_radio(self, chipname):
|
|
|
|
|
|
data = self.index_assignments[chipname]
|
|
|
|
|
|
ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
|
|
|
|
|
|
if is_not_balance_list:
|
|
|
|
|
|
desc = '\n'.join(is_not_balance_list)
|
|
|
|
|
|
self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
|
|
|
|
|
|
print(f'芯片{chipname}有碱基不平衡:\n{desc}')
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def level(row):
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
|
|
|
|
|
today_date = datetime.now()
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
if '贞固' in row['companynamea'].lower():
|
|
|
|
|
|
return 999
|
|
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if 'nextera' in row['librarystructure'].lower():
|
2024-02-29 15:43:48 +08:00
|
|
|
|
return 1000
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if '华大' in row['librarystructure']:
|
2024-02-29 15:43:48 +08:00
|
|
|
|
return 1100
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if row['cycletype'] == '极致周期' or '极致' in row['cycletype']:
|
2024-02-29 15:43:48 +08:00
|
|
|
|
return 2000
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
mytime = row['createdtime']
|
2024-02-29 15:43:48 +08:00
|
|
|
|
# 判断日期是之前的还是之后的
|
|
|
|
|
|
if mytime < today_date:
|
|
|
|
|
|
return 5000
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
|
|
|
|
|
else:
|
2024-02-29 15:43:48 +08:00
|
|
|
|
return 100000
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-02-29 15:43:48 +08:00
|
|
|
|
def combinations_same_barcode(self):
|
|
|
|
|
|
"""
|
|
|
|
|
|
barcode 有重复的极致样本 进行排列组合,汇集成新的可能性
|
|
|
|
|
|
"""
|
|
|
|
|
|
same_barcode_df = pd.DataFrame(
|
|
|
|
|
|
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
|
|
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
# 按照 'indexi5i7' 列进行分组
|
2024-02-29 15:43:48 +08:00
|
|
|
|
if same_barcode_df.empty:
|
|
|
|
|
|
return
|
2024-03-30 21:42:23 +08:00
|
|
|
|
grouped = same_barcode_df.groupby('indexi5i7')
|
2024-02-29 15:43:48 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
# 获取具有重复的 'indexi5i7' 分组
|
2024-02-29 15:43:48 +08:00
|
|
|
|
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
|
|
|
|
|
|
|
|
|
|
|
|
# 提取这些分组,计算文库重复次数
|
2024-06-20 11:12:59 +08:00
|
|
|
|
grouped_names = duplicate_groups.groupby('indexi5i7')['presamplename'].apply(list).reset_index()
|
|
|
|
|
|
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['presamplename'])))
|
2024-02-29 15:43:48 +08:00
|
|
|
|
new_lst = [spdata for data in random_list for spdata in data]
|
|
|
|
|
|
counts = Counter(new_lst)
|
|
|
|
|
|
|
|
|
|
|
|
correct_data = list()
|
|
|
|
|
|
for data in self.ori_lib_data:
|
|
|
|
|
|
if data['library'] in counts:
|
|
|
|
|
|
data['level'] -= counts[data['library']]
|
|
|
|
|
|
correct_data.append(data)
|
|
|
|
|
|
self.ori_lib_data = correct_data
|
|
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
def add_new_data(self, chipname, library_data, newer=True):
|
|
|
|
|
|
"""
|
|
|
|
|
|
增加新数据到已知芯片上
|
|
|
|
|
|
:param chipname:
|
|
|
|
|
|
:param library_data:
|
|
|
|
|
|
:param newer:
|
|
|
|
|
|
:return:
|
|
|
|
|
|
"""
|
|
|
|
|
|
self.index_assignments[chipname].extend(library_data['data'])
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
self.chip_barcode_recode[chipname].update({item['indexi5i7'] for item in library_data['data']})
|
|
|
|
|
|
self.chip_barcodei7_recode[chipname].update({item['indexi7'] for item in library_data['data']})
|
|
|
|
|
|
self.chip_barcodei5_recode[chipname].update({item['indexi5'] for item in library_data['data']})
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2024-03-14 13:26:45 +08:00
|
|
|
|
# 华大的 文库 i7 不能重复,添加N+i7
|
|
|
|
|
|
if '华大' in library_data['classification']:
|
2024-03-30 21:42:23 +08:00
|
|
|
|
self.chip_barcode_recode[chipname].update({'N' * 8 + item['indexi7'] for item in library_data['data']})
|
|
|
|
|
|
# self.chip_barcode_recode[chipname].update({item['indexi5'] + 'N' * 8 for item in library_data['data']})
|
2024-03-14 13:26:45 +08:00
|
|
|
|
|
2024-03-14 16:58:29 +08:00
|
|
|
|
# 子文库
|
2024-03-30 21:42:23 +08:00
|
|
|
|
self.chip_sublib[chipname].update({item['subsamplename'] for item in library_data['data']})
|
2024-03-14 16:58:29 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.chip_customer[chipname].add(library_data['customer'])
|
|
|
|
|
|
self.chip_classification[chipname].add(library_data['classification'])
|
|
|
|
|
|
|
|
|
|
|
|
if newer:
|
|
|
|
|
|
self.chip_size[chipname] = library_data['size']
|
2024-03-21 10:03:26 +08:00
|
|
|
|
self.chip_size_N[chipname] = 0
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
2024-03-21 10:03:26 +08:00
|
|
|
|
self.chip_size_N[chipname] = library_data['size']
|
2024-02-05 17:13:32 +08:00
|
|
|
|
if library_data['is_balance_lib'] == '否':
|
|
|
|
|
|
self.chip_speciallib_size[chipname] = library_data['size']
|
2024-03-30 21:42:23 +08:00
|
|
|
|
elif '甲基化' in library_data['classification']:
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.chip_methylib_size[chipname] = library_data['size']
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.chip_speciallib_size[chipname] = 0
|
|
|
|
|
|
self.chip_methylib_size[chipname] = 0
|
|
|
|
|
|
if 'nextera' in library_data['classification'].lower():
|
|
|
|
|
|
self.chip_speciallib_nextera_size[chipname] = library_data['size']
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.chip_speciallib_nextera_size[chipname] = 0
|
|
|
|
|
|
if '华大' in library_data['classification']:
|
|
|
|
|
|
self.chip_speciallib_huada_size[chipname] = library_data['size']
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.chip_speciallib_huada_size[chipname] = 0
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.chip_size[chipname] += library_data['size']
|
|
|
|
|
|
if library_data['is_balance_lib'] == '否':
|
|
|
|
|
|
self.chip_speciallib_size[chipname] += library_data['size']
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if '甲基化' in library_data['classification']:
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.chip_methylib_size[chipname] += library_data['size']
|
|
|
|
|
|
if 'nextera' in library_data['classification'].lower():
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.chip_speciallib_nextera_size[chipname] += library_data['size']
|
2024-02-05 17:13:32 +08:00
|
|
|
|
if '华大' in library_data['classification']:
|
|
|
|
|
|
self.chip_speciallib_huada_size[chipname] += library_data['size']
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
2024-03-21 10:03:26 +08:00
|
|
|
|
self.chip_size_N[chipname] += library_data['size']
|
|
|
|
|
|
|
2024-03-22 15:43:06 +08:00
|
|
|
|
def use_rule_exclusive_classfication(self, chipname, classfication):
|
|
|
|
|
|
"""
|
|
|
|
|
|
文库不能排在一起
|
|
|
|
|
|
"""
|
2023-12-14 10:26:34 +08:00
|
|
|
|
may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2'])
|
2023-12-07 17:47:53 +08:00
|
|
|
|
if self.chip_customer[chipname].intersection(may_classfic):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2024-02-23 16:45:58 +08:00
|
|
|
|
def use_rule_exclusive_customer(self, chipname, customer):
|
2024-03-22 15:43:06 +08:00
|
|
|
|
"""文库不能排在一起"""
|
2024-02-29 15:43:48 +08:00
|
|
|
|
may_classfic = set(
|
|
|
|
|
|
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
|
2024-02-23 16:45:58 +08:00
|
|
|
|
if self.chip_customer[chipname].intersection(may_classfic):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2024-03-04 17:10:22 +08:00
|
|
|
|
def judge_data(self, chipname, library_data, max_barcode='all'):
|
2024-02-05 17:13:32 +08:00
|
|
|
|
"""
|
|
|
|
|
|
约束条件
|
|
|
|
|
|
"""
|
2023-06-27 13:01:44 +08:00
|
|
|
|
size = library_data['size']
|
2024-03-21 10:03:26 +08:00
|
|
|
|
size_N = 0
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if 'N' in library_data['data'][0]['indexi5i7']:
|
2024-03-21 10:03:26 +08:00
|
|
|
|
size_N = library_data['size']
|
2023-12-07 17:47:53 +08:00
|
|
|
|
classification = library_data['classification']
|
2024-02-23 16:45:58 +08:00
|
|
|
|
customer = library_data['customer']
|
2024-01-02 13:53:43 +08:00
|
|
|
|
is_balance_lib = library_data['is_balance_lib']
|
2024-03-22 15:43:06 +08:00
|
|
|
|
# library = library_data['library']
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
|
|
|
|
|
# 芯片大小不能超过设定限制
|
|
|
|
|
|
sizelimit = True
|
|
|
|
|
|
if self.chip_size[chipname] + size > self.data_limit:
|
|
|
|
|
|
sizelimit = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '芯片大小不能超过设定限制')
|
2023-12-20 17:06:48 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
# barcode有重复
|
|
|
|
|
|
notrepeatbarcode = True
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if self.chip_barcode_recode[chipname].intersection({item['indexi5i7'] for item in library_data['data']}) or \
|
2024-02-22 17:51:44 +08:00
|
|
|
|
self.chip_barcode_recode[chipname].intersection(
|
2024-03-30 21:42:23 +08:00
|
|
|
|
{'N' * 8 + item['indexi7'] for item in library_data['data']}) or \
|
2024-02-22 18:02:14 +08:00
|
|
|
|
self.chip_barcode_recode[chipname].intersection(
|
2024-03-30 21:42:23 +08:00
|
|
|
|
{item['indexi5'] + 'N' * 8 for item in library_data['data']}):
|
2023-06-27 13:01:44 +08:00
|
|
|
|
notrepeatbarcode = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, 'barcode有重复')
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
|
|
|
|
|
# 互斥的文库
|
2023-12-14 10:26:34 +08:00
|
|
|
|
exclusive_classific = True
|
2024-03-22 15:43:06 +08:00
|
|
|
|
if self.use_rule_exclusive_classfication(chipname, classification):
|
2023-12-14 10:26:34 +08:00
|
|
|
|
exclusive_classific = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '互斥的文库')
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
2024-02-23 16:45:58 +08:00
|
|
|
|
# 互斥的用户
|
|
|
|
|
|
exclusive_customer = True
|
|
|
|
|
|
if self.use_rule_exclusive_customer(chipname, customer):
|
|
|
|
|
|
exclusive_customer = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '互斥的用户')
|
2024-02-23 16:45:58 +08:00
|
|
|
|
|
2024-01-02 13:53:43 +08:00
|
|
|
|
# 不平衡文库大于250G 不能添加
|
2023-06-27 13:01:44 +08:00
|
|
|
|
splibrary = True
|
2024-01-02 13:53:43 +08:00
|
|
|
|
if is_balance_lib == '否' and self.chip_speciallib_size[chipname] + size > 250:
|
2023-06-27 13:01:44 +08:00
|
|
|
|
splibrary = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '不平衡文库大于250G')
|
2023-12-20 17:06:48 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 甲基化文库不能大于250G
|
2024-02-29 15:43:48 +08:00
|
|
|
|
# 甲基化更改成100G
|
2024-02-05 17:13:32 +08:00
|
|
|
|
spmethylibrary = True
|
2024-02-29 15:43:48 +08:00
|
|
|
|
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 100:
|
2024-02-05 17:13:32 +08:00
|
|
|
|
spmethylibrary = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '甲基化文库不能大于100G')
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2024-03-13 14:24:51 +08:00
|
|
|
|
# 不使用不平衡文库的判断
|
2024-02-05 17:13:32 +08:00
|
|
|
|
if not self.is_use_balance:
|
|
|
|
|
|
splibrary = True
|
|
|
|
|
|
spmethylibrary = True
|
|
|
|
|
|
|
2023-12-20 17:06:48 +08:00
|
|
|
|
# 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始
|
|
|
|
|
|
base_balance = True
|
2024-02-05 17:13:32 +08:00
|
|
|
|
if self.chip_size[chipname] > 900:
|
2023-12-20 17:06:48 +08:00
|
|
|
|
current_data = copy.deepcopy(self.index_assignments[chipname])
|
|
|
|
|
|
new_data = library_data['data']
|
|
|
|
|
|
current_data.extend(new_data)
|
|
|
|
|
|
ratio_sites, is_not_balance_list = self.count_barcode_radio(current_data)
|
|
|
|
|
|
if is_not_balance_list:
|
|
|
|
|
|
base_balance = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '碱基不平衡')
|
|
|
|
|
|
|
|
|
|
|
|
# 含N端的数据量不超过 上面设定碱基不平衡的900G的一半
|
|
|
|
|
|
sizelimit_N = True
|
|
|
|
|
|
if self.chip_size_N[chipname] + size_N > 450:
|
|
|
|
|
|
sizelimit_N = False
|
2023-12-20 17:06:48 +08:00
|
|
|
|
|
2024-03-01 09:22:39 +08:00
|
|
|
|
# 华大的文库不能超过限制的一半, 华大的数据就不能再加
|
2024-02-29 15:43:48 +08:00
|
|
|
|
use_huada = True
|
2024-03-01 09:22:39 +08:00
|
|
|
|
if (self.chip_speciallib_huada_size[chipname] > self.data_limit / 2) and ('华大' in classification):
|
2024-02-29 15:43:48 +08:00
|
|
|
|
use_huada = False
|
2024-03-21 10:03:26 +08:00
|
|
|
|
# print(chipname, library, '华大的文库不能超过限制的一半')
|
2024-02-29 15:43:48 +08:00
|
|
|
|
|
2024-03-01 18:05:46 +08:00
|
|
|
|
# 开启i5或者i7
|
2024-03-04 17:10:22 +08:00
|
|
|
|
if max_barcode != 'all':
|
2024-03-01 18:05:46 +08:00
|
|
|
|
base_balance = True
|
|
|
|
|
|
notrepeatbarcode = True
|
2024-03-12 14:58:32 +08:00
|
|
|
|
if self.chip_barcodei7_recode[chipname].intersection(
|
2024-03-30 21:42:23 +08:00
|
|
|
|
{item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
|
2024-03-04 17:10:22 +08:00
|
|
|
|
notrepeatbarcode = False
|
2024-03-12 14:58:32 +08:00
|
|
|
|
if self.chip_barcodei5_recode[chipname].intersection(
|
2024-03-30 21:42:23 +08:00
|
|
|
|
{item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
|
2024-03-04 17:10:22 +08:00
|
|
|
|
notrepeatbarcode = False
|
2024-03-12 14:58:32 +08:00
|
|
|
|
# 是个N的取消
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if ('N' * 8 in {item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
|
2024-03-04 17:10:22 +08:00
|
|
|
|
notrepeatbarcode = False
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if ('N' * 8 in {item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
|
2024-03-01 18:05:46 +08:00
|
|
|
|
notrepeatbarcode = False
|
|
|
|
|
|
if self.chip_size[chipname] > 900:
|
|
|
|
|
|
current_data = copy.deepcopy(self.index_assignments[chipname])
|
|
|
|
|
|
new_data = library_data['data']
|
|
|
|
|
|
current_data.extend(new_data)
|
2024-03-04 17:10:22 +08:00
|
|
|
|
ratio_sites, is_not_balance_list = self.count_barcode_radio(current_data, maxt=max_barcode)
|
2024-03-01 18:05:46 +08:00
|
|
|
|
if is_not_balance_list:
|
|
|
|
|
|
base_balance = False
|
|
|
|
|
|
|
2024-03-14 16:58:29 +08:00
|
|
|
|
# 子文库名不能重复
|
|
|
|
|
|
notrepeatsublib = True
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if self.chip_sublib[chipname].intersection({item['subsamplename'] for item in library_data['data']}):
|
2024-03-14 16:58:29 +08:00
|
|
|
|
notrepeatsublib = False
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# 不平衡文库不能放散样1
|
|
|
|
|
|
is_not_balance_lib_chip1 = True
|
|
|
|
|
|
if is_balance_lib == '否' and self.loc_chip_num == 1 :
|
|
|
|
|
|
is_not_balance_lib_chip1 = False
|
|
|
|
|
|
|
2024-03-14 16:58:29 +08:00
|
|
|
|
if sizelimit and notrepeatbarcode and \
|
|
|
|
|
|
exclusive_classific and \
|
|
|
|
|
|
exclusive_customer and \
|
|
|
|
|
|
splibrary and \
|
|
|
|
|
|
base_balance and \
|
|
|
|
|
|
spmethylibrary and \
|
|
|
|
|
|
use_huada and \
|
2024-03-21 10:03:26 +08:00
|
|
|
|
notrepeatsublib and \
|
2024-06-20 11:12:59 +08:00
|
|
|
|
sizelimit_N and \
|
|
|
|
|
|
is_not_balance_lib_chip1:
|
2023-06-27 13:01:44 +08:00
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2024-03-04 17:10:22 +08:00
|
|
|
|
def add_loc_num(self, chipname):
|
2024-02-05 17:13:32 +08:00
|
|
|
|
"""
|
|
|
|
|
|
锚定芯片号增加
|
|
|
|
|
|
"""
|
2024-02-29 15:43:48 +08:00
|
|
|
|
# 有nextera, 华大文库 必须满足大于50G 到了芯片结算
|
2024-03-04 17:10:22 +08:00
|
|
|
|
# chipname = f'chip{self.loc_chip_num}'
|
2024-01-18 18:31:13 +08:00
|
|
|
|
nextera_size = self.chip_speciallib_nextera_size[chipname]
|
2024-02-05 17:13:32 +08:00
|
|
|
|
huada_size = self.chip_speciallib_huada_size[chipname]
|
|
|
|
|
|
flag = True
|
|
|
|
|
|
if 0 < nextera_size < 50:
|
2024-01-16 18:02:24 +08:00
|
|
|
|
# 有nextera文库,但是不满足50G 去除
|
|
|
|
|
|
nextary_barcode = set()
|
|
|
|
|
|
no_nextary_data = list()
|
2024-01-18 18:31:13 +08:00
|
|
|
|
for libdata in self.index_assignments[chipname]:
|
2024-01-16 18:02:24 +08:00
|
|
|
|
if libdata['classification'].lower() != 'nextera':
|
|
|
|
|
|
no_nextary_data.append(libdata)
|
|
|
|
|
|
else:
|
2024-01-18 18:31:13 +08:00
|
|
|
|
self.no_assign_data.append(libdata)
|
2024-03-30 21:42:23 +08:00
|
|
|
|
nextary_barcode.update(libdata['indexi5i7'])
|
2024-01-18 18:31:13 +08:00
|
|
|
|
self.index_assignments[chipname] = no_nextary_data
|
|
|
|
|
|
self.chip_barcode_recode[chipname] -= nextary_barcode
|
|
|
|
|
|
self.chip_speciallib_nextera_size[chipname] = 0
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.chip_size[chipname] -= nextera_size
|
|
|
|
|
|
flag = False
|
|
|
|
|
|
if 0 < huada_size < 50:
|
|
|
|
|
|
# 有华大文库,但是不满足50G 去除
|
|
|
|
|
|
huada_barcode = set()
|
|
|
|
|
|
no_huada_data = list()
|
|
|
|
|
|
for libdata in self.index_assignments[chipname]:
|
2024-03-04 17:10:22 +08:00
|
|
|
|
if '华大' not in libdata['classification']:
|
2024-02-05 17:13:32 +08:00
|
|
|
|
no_huada_data.append(libdata)
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.no_assign_data.append(libdata)
|
2024-03-30 21:42:23 +08:00
|
|
|
|
huada_barcode.update(libdata['indexi5i7'])
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.index_assignments[chipname] = no_huada_data
|
|
|
|
|
|
self.chip_barcode_recode[chipname] -= huada_barcode
|
|
|
|
|
|
self.chip_speciallib_huada_size[chipname] = 0
|
|
|
|
|
|
self.chip_size[chipname] -= huada_size
|
|
|
|
|
|
flag = False
|
|
|
|
|
|
if flag:
|
|
|
|
|
|
self.loc_chip_num += 1
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
def assign_samples(self):
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
ori_library_df = pd.DataFrame(self.ori_data)
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 数据标准格式
|
2024-03-30 21:42:23 +08:00
|
|
|
|
numeric_mask = pd.to_numeric(ori_library_df['orderdatavolume'], errors='coerce').notna()
|
2024-06-20 11:12:59 +08:00
|
|
|
|
time_mask = pd.to_datetime(ori_library_df['createdtime'], errors='coerce').notna()
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 非正常barcode
|
2024-03-30 21:42:23 +08:00
|
|
|
|
barcode_mask = ori_library_df['indexi5i7'].str.len() != 16
|
2024-05-07 17:59:42 +08:00
|
|
|
|
ori_library_df.loc[barcode_mask, 'indexi5i7'] = ori_library_df.loc[barcode_mask, 'indexi5'].str[-8:] + \
|
2024-05-22 11:29:11 +08:00
|
|
|
|
ori_library_df.loc[barcode_mask, 'indexi7'].str[-8:]
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2023-12-07 17:47:53 +08:00
|
|
|
|
ori_library_df['note'] = ''
|
|
|
|
|
|
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
|
|
|
|
|
|
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
|
2024-05-22 11:29:11 +08:00
|
|
|
|
ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode,已修改'
|
2024-05-07 17:59:42 +08:00
|
|
|
|
no_ori_data = ori_library_df[~(numeric_mask & time_mask)]
|
2024-01-16 18:02:24 +08:00
|
|
|
|
|
2024-03-22 15:43:06 +08:00
|
|
|
|
self.no_assign_data.extend(no_ori_data.to_dict('records'))
|
|
|
|
|
|
|
2024-05-22 11:29:11 +08:00
|
|
|
|
# 包lane的剔除
|
|
|
|
|
|
orderlane_mask = ori_library_df['productname'].str.contains('包lane')
|
|
|
|
|
|
|
|
|
|
|
|
self.order_assign_data = ori_library_df[orderlane_mask].to_dict('records')
|
|
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# 使用布尔索引筛选出不是数字和非日期的行,包lane的
|
2024-05-22 11:29:11 +08:00
|
|
|
|
ori_library_df = ori_library_df[(numeric_mask & time_mask) & (~orderlane_mask)]
|
2024-03-22 15:43:06 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
# 时间格式化
|
2024-06-20 11:12:59 +08:00
|
|
|
|
ori_library_df['createdtime'] = pd.to_datetime(ori_library_df['createdtime'], errors='coerce')
|
2023-06-27 13:01:44 +08:00
|
|
|
|
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-03-22 15:43:06 +08:00
|
|
|
|
# 极致客户有重复的,把等级调到1900,防止放到了最后,到了未测里
|
2024-02-29 15:43:48 +08:00
|
|
|
|
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
|
2024-06-20 11:12:59 +08:00
|
|
|
|
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='indexi5i7', keep=False)]['presamplename'].to_list())
|
|
|
|
|
|
ori_library_df.loc[ori_library_df['presamplename'].isin(must_lib), 'level'] = 1900
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-06-20 11:12:59 +08:00
|
|
|
|
for library, library_df in ori_library_df.groupby('presamplename'):
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
size = library_df['orderdatavolume'].sum()
|
|
|
|
|
|
is_balance_lib = library_df['librarybalancedflag'].values[0]
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-01-19 17:57:14 +08:00
|
|
|
|
# 文库内部有重复
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if len(library_df['indexi5i7'].values) > len(set(library_df['indexi5i7'].values)):
|
2024-01-19 17:57:14 +08:00
|
|
|
|
library_df['note'] = '文库内部有重复'
|
|
|
|
|
|
self.no_assign_data.extend(library_df.to_dict('records'))
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2024-03-12 14:58:32 +08:00
|
|
|
|
# 不平衡文库 大于250G 的数据 先进行拆分
|
|
|
|
|
|
if is_balance_lib == '否' and size > 250:
|
|
|
|
|
|
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
|
2024-03-30 21:42:23 +08:00
|
|
|
|
data_needed = library_df['orderdatavolume'].copy()
|
2024-03-12 14:58:32 +08:00
|
|
|
|
for num in range(int(size), 0, -200):
|
|
|
|
|
|
addnum = 200
|
|
|
|
|
|
if num <= 200:
|
|
|
|
|
|
addnum = num
|
2024-03-30 21:42:23 +08:00
|
|
|
|
library_df['orderdatavolume'] = (addnum / size) * data_needed
|
2024-03-12 14:58:32 +08:00
|
|
|
|
|
|
|
|
|
|
self.ori_lib_data.append(dict(
|
|
|
|
|
|
library=library,
|
2024-05-22 11:29:11 +08:00
|
|
|
|
sample_code=library_df['sampleCode'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
|
|
|
|
|
size=library_df['orderdatavolume'].sum(),
|
|
|
|
|
|
split_method=library_df['cycletype'].values[0],
|
|
|
|
|
|
time=library_df['receivedtime'].values[0],
|
2024-03-12 14:58:32 +08:00
|
|
|
|
level=1950,
|
2024-03-30 21:42:23 +08:00
|
|
|
|
customer=library_df['companynamea'].values[0],
|
|
|
|
|
|
classification=library_df['librarystructure'].values[0],
|
|
|
|
|
|
data=library_df.to_dict('records')
|
2024-03-12 14:58:32 +08:00
|
|
|
|
))
|
|
|
|
|
|
self.split_lib.add(library)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2024-02-29 15:43:48 +08:00
|
|
|
|
# 拆分处理 分为了2个大文库
|
2024-03-22 15:43:06 +08:00
|
|
|
|
if size > self.data_limit / 2:
|
2024-03-30 21:42:23 +08:00
|
|
|
|
library_df['orderdatavolume'] = library_df['orderdatavolume'] / 2
|
2024-02-29 15:43:48 +08:00
|
|
|
|
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
|
|
|
|
|
self.ori_lib_data.append(dict(
|
|
|
|
|
|
library=library,
|
2024-05-22 11:29:11 +08:00
|
|
|
|
sample_code=library_df['sampleCode'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
|
|
|
|
|
size=library_df['orderdatavolume'].sum(),
|
|
|
|
|
|
split_method=library_df['cycletype'].values[0],
|
|
|
|
|
|
time=library_df['receivedtime'].values[0],
|
2024-02-29 15:43:48 +08:00
|
|
|
|
level=library_df['level'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
customer=library_df['companynamea'].values[0],
|
|
|
|
|
|
classification=library_df['librarystructure'].values[0],
|
|
|
|
|
|
data=library_df.to_dict('records')
|
2024-02-29 15:43:48 +08:00
|
|
|
|
))
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-02-29 15:43:48 +08:00
|
|
|
|
self.ori_lib_data.append(dict(
|
2023-06-27 13:01:44 +08:00
|
|
|
|
library=library,
|
2024-05-22 11:29:11 +08:00
|
|
|
|
sample_code=library_df['sampleCode'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
|
|
|
|
|
size=library_df['orderdatavolume'].sum(),
|
|
|
|
|
|
split_method=library_df['cycletype'].values[0],
|
|
|
|
|
|
time=library_df['receivedtime'].values[0],
|
2023-06-27 13:01:44 +08:00
|
|
|
|
level=library_df['level'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
customer=library_df['companynamea'].values[0],
|
|
|
|
|
|
classification=library_df['librarystructure'].values[0],
|
|
|
|
|
|
data=library_df.to_dict('records')
|
2023-06-27 13:01:44 +08:00
|
|
|
|
))
|
2024-01-02 13:53:43 +08:00
|
|
|
|
|
2024-02-29 15:43:48 +08:00
|
|
|
|
self.combinations_same_barcode()
|
|
|
|
|
|
self.ori_lib_data = sorted(self.ori_lib_data, key=lambda x: (x['level'], x['time']))
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-02-05 17:13:32 +08:00
|
|
|
|
while self.ori_lib_data:
|
|
|
|
|
|
library_data = self.ori_lib_data[0]
|
2023-06-27 13:01:44 +08:00
|
|
|
|
chipname = f'chip{self.loc_chip_num}'
|
|
|
|
|
|
|
|
|
|
|
|
# 空白芯片直接添加
|
|
|
|
|
|
if chipname not in self.index_assignments:
|
|
|
|
|
|
self.add_new_data(chipname, library_data)
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.ori_lib_data.remove(library_data)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 判断条件
|
|
|
|
|
|
if self.judge_data(chipname, library_data):
|
|
|
|
|
|
self.add_new_data(chipname, library_data, newer=False)
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.ori_lib_data.remove(library_data)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
else:
|
2024-02-05 17:13:32 +08:00
|
|
|
|
for j in range(len(self.ori_lib_data)):
|
|
|
|
|
|
newlibrary_data = self.ori_lib_data[j]
|
2023-06-27 13:01:44 +08:00
|
|
|
|
if self.judge_data(chipname, newlibrary_data):
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.ori_lib_data.remove(newlibrary_data)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
self.add_new_data(chipname, newlibrary_data, newer=False)
|
|
|
|
|
|
break
|
|
|
|
|
|
j += 1
|
|
|
|
|
|
else:
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.add_loc_num(chipname)
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2023-12-14 10:26:34 +08:00
|
|
|
|
if self.chip_size[chipname] > self.data_limit:
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.add_loc_num(chipname)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
|
2024-03-04 17:10:22 +08:00
|
|
|
|
def assign_again_size(self, max_barcode='all'):
|
2024-03-01 18:05:46 +08:00
|
|
|
|
"""
|
2024-03-04 17:10:22 +08:00
|
|
|
|
剩余的数据
|
2024-03-01 18:05:46 +08:00
|
|
|
|
"""
|
|
|
|
|
|
left_data = list()
|
|
|
|
|
|
no_need_chipname = list()
|
|
|
|
|
|
for chip_idx, chip_assignments in self.index_assignments.items():
|
|
|
|
|
|
if not chip_assignments:
|
|
|
|
|
|
continue
|
|
|
|
|
|
df = pd.DataFrame(chip_assignments)
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if df['orderdatavolume'].sum() < self.data_lower:
|
2024-03-01 18:05:46 +08:00
|
|
|
|
left_data.extend(chip_assignments)
|
|
|
|
|
|
no_need_chipname.append(chip_idx)
|
|
|
|
|
|
for chip_idx in no_need_chipname:
|
|
|
|
|
|
del self.index_assignments[chip_idx]
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if not left_data:
|
|
|
|
|
|
return
|
2024-03-01 18:05:46 +08:00
|
|
|
|
ori_library_df = pd.DataFrame(left_data)
|
|
|
|
|
|
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
|
|
|
|
|
ori_lib_data = list()
|
2024-06-20 11:12:59 +08:00
|
|
|
|
for library, library_df in ori_library_df.groupby('presamplename'):
|
2024-03-12 14:58:32 +08:00
|
|
|
|
level = library_df['level'].values[0]
|
|
|
|
|
|
if library in self.split_lib:
|
|
|
|
|
|
level = 1950
|
|
|
|
|
|
|
2024-03-01 18:05:46 +08:00
|
|
|
|
ori_lib_data.append(dict(
|
|
|
|
|
|
library=library,
|
2024-06-20 11:12:59 +08:00
|
|
|
|
sample_code=library_df['sampleCode'].values[0],
|
2024-03-30 21:42:23 +08:00
|
|
|
|
is_balance_lib=library_df['librarybalancedflag'].values[0],
|
|
|
|
|
|
size=library_df['orderdatavolume'].sum(),
|
|
|
|
|
|
split_method=library_df['cycletype'].values[0],
|
|
|
|
|
|
time=library_df['receivedtime'].values[0],
|
2024-03-12 14:58:32 +08:00
|
|
|
|
level=level,
|
2024-03-30 21:42:23 +08:00
|
|
|
|
customer=library_df['companynamea'].values[0],
|
|
|
|
|
|
classification=library_df['librarystructure'].values[0],
|
|
|
|
|
|
data=library_df.to_dict('records')
|
2024-03-01 18:05:46 +08:00
|
|
|
|
))
|
|
|
|
|
|
|
2024-03-04 17:10:22 +08:00
|
|
|
|
ori_lib_data = sorted(ori_lib_data, key=lambda x: (x['level'], x['time'], -x['size']))
|
2024-03-01 18:05:46 +08:00
|
|
|
|
self.loc_chip_num = 100
|
|
|
|
|
|
while ori_lib_data:
|
|
|
|
|
|
library_data = ori_lib_data[0]
|
2024-03-22 15:43:06 +08:00
|
|
|
|
chipname = f'chip{self.loc_chip_num}_{max_barcode}' if max_barcode != 'all' else f'chip{self.loc_chip_num}'
|
2024-03-01 18:05:46 +08:00
|
|
|
|
|
|
|
|
|
|
# 空白芯片直接添加
|
|
|
|
|
|
if chipname not in self.index_assignments:
|
|
|
|
|
|
self.add_new_data(chipname, library_data)
|
|
|
|
|
|
ori_lib_data.remove(library_data)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 判断条件
|
2024-03-04 17:10:22 +08:00
|
|
|
|
if self.judge_data(chipname, library_data, max_barcode=max_barcode):
|
2024-03-01 18:05:46 +08:00
|
|
|
|
self.add_new_data(chipname, library_data, newer=False)
|
|
|
|
|
|
ori_lib_data.remove(library_data)
|
|
|
|
|
|
else:
|
|
|
|
|
|
for j in range(len(ori_lib_data)):
|
|
|
|
|
|
newlibrary_data = ori_lib_data[j]
|
2024-03-04 17:10:22 +08:00
|
|
|
|
if self.judge_data(chipname, newlibrary_data, max_barcode=max_barcode):
|
2024-03-01 18:05:46 +08:00
|
|
|
|
ori_lib_data.remove(newlibrary_data)
|
|
|
|
|
|
self.add_new_data(chipname, newlibrary_data, newer=False)
|
|
|
|
|
|
break
|
|
|
|
|
|
j += 1
|
|
|
|
|
|
else:
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.add_loc_num(chipname)
|
2024-03-01 18:05:46 +08:00
|
|
|
|
|
|
|
|
|
|
if self.chip_size[chipname] > self.data_limit:
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.add_loc_num(chipname)
|
2024-03-01 18:05:46 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
def run(self):
|
2024-04-19 17:01:37 +08:00
|
|
|
|
# print('# 测试代码')
|
|
|
|
|
|
# self.assign_samples()
|
|
|
|
|
|
# self.assign_again_size()
|
2023-07-05 17:15:46 +08:00
|
|
|
|
try:
|
|
|
|
|
|
self.assign_samples()
|
2024-03-04 17:10:22 +08:00
|
|
|
|
self.assign_again_size()
|
2023-07-05 17:15:46 +08:00
|
|
|
|
except Exception as e:
|
2023-07-12 14:27:18 +08:00
|
|
|
|
self.return_log.append(f'T7排样出错, 请联系!{e}')
|
2023-07-05 17:15:46 +08:00
|
|
|
|
self.index_assignments = {}
|
2023-06-27 13:01:44 +08:00
|
|
|
|
outputname = 'assignments_%s_%s' % (datetime.now().strftime("%m%d%H%M"), os.path.basename(self.path))
|
2024-06-20 11:12:59 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
outputpath = os.path.join(self.output, 'result', outputname)
|
|
|
|
|
|
writer = pd.ExcelWriter(outputpath)
|
2023-12-07 17:47:53 +08:00
|
|
|
|
|
|
|
|
|
|
chip_loc = 1
|
2024-01-02 13:53:43 +08:00
|
|
|
|
librarynum = 0
|
2023-06-27 13:01:44 +08:00
|
|
|
|
for chip_idx, chip_assignments in self.index_assignments.items():
|
2024-01-18 18:31:13 +08:00
|
|
|
|
if not chip_assignments:
|
|
|
|
|
|
continue
|
2023-06-27 13:01:44 +08:00
|
|
|
|
df = pd.DataFrame(chip_assignments)
|
2024-06-20 11:12:59 +08:00
|
|
|
|
# df['receivedtime'] = df['receivedtime'].dt.strftime('%Y-%m-%d')
|
2024-01-18 18:31:13 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if [method for method in df['cycletype'].values if '极致' in method]:
|
2023-06-27 13:01:44 +08:00
|
|
|
|
addname = 'X'
|
|
|
|
|
|
else:
|
|
|
|
|
|
addname = ''
|
2024-03-01 18:05:46 +08:00
|
|
|
|
other_name = ''
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if df['orderdatavolume'].sum() < (self.data_lower - 50) and not addname:
|
2024-03-13 14:24:51 +08:00
|
|
|
|
df['note'] = f'排样数据量不足{self.data_lower - 50}G'
|
2024-02-05 17:13:32 +08:00
|
|
|
|
self.no_assign_data.extend(df.to_dict('records'))
|
|
|
|
|
|
continue
|
|
|
|
|
|
if librarynum > self.librarynum:
|
|
|
|
|
|
df['note'] = '排样管数超标'
|
|
|
|
|
|
self.no_assign_data.extend(df.to_dict('records'))
|
|
|
|
|
|
continue
|
2024-06-20 11:12:59 +08:00
|
|
|
|
librarynum += len(set(df['presamplename'].values))
|
2024-02-05 17:13:32 +08:00
|
|
|
|
|
2023-12-07 17:47:53 +08:00
|
|
|
|
self.dec_barcode_radio(chip_idx)
|
2024-03-01 18:05:46 +08:00
|
|
|
|
chipname = addname + chip_idx + other_name
|
2024-05-07 17:59:42 +08:00
|
|
|
|
sum_list = list()
|
2024-06-20 11:12:59 +08:00
|
|
|
|
for library, library_df in df.groupby('presamplename'):
|
2024-05-07 17:59:42 +08:00
|
|
|
|
sum_list.append(dict(
|
2024-05-22 11:29:11 +08:00
|
|
|
|
预排文库编号=library_df['sampleCode'].values[0],
|
2024-06-20 11:12:59 +08:00
|
|
|
|
预排样本名称=library_df['presamplename'].values[0],
|
2024-05-07 17:59:42 +08:00
|
|
|
|
二次拆分=library,
|
|
|
|
|
|
客户=library_df['companynamea'].values[0],
|
2024-05-22 11:29:11 +08:00
|
|
|
|
类型=library_df['librarystructure'].values[0],
|
2024-05-07 17:59:42 +08:00
|
|
|
|
打折前=library_df['orderdatavolume'].sum()
|
|
|
|
|
|
))
|
|
|
|
|
|
df_sum = pd.DataFrame(sum_list)
|
2024-05-22 11:29:11 +08:00
|
|
|
|
res_df = pd.concat([df, df_sum], axis=1)
|
2024-06-20 11:12:59 +08:00
|
|
|
|
res_df = pd.concat([pd.DataFrame(self.items), res_df]).reset_index(drop=True)
|
2024-05-22 11:29:11 +08:00
|
|
|
|
res_df.to_excel(writer, sheet_name=chipname, index=False)
|
|
|
|
|
|
chip_loc += 1
|
2024-05-07 17:59:42 +08:00
|
|
|
|
|
2024-05-22 11:29:11 +08:00
|
|
|
|
# res_df = pd.DataFrame(res)
|
|
|
|
|
|
# res_df = pd.concat([pd.DataFrame(self.items), res_df]).reset_index(drop=True)
|
|
|
|
|
|
# res_df.to_excel(writer, sheet_name='assignment', index=False)
|
2024-02-29 15:43:48 +08:00
|
|
|
|
|
2024-05-22 11:29:11 +08:00
|
|
|
|
# for sum_sheet in sum_res:
|
|
|
|
|
|
# sheetname = sum_sheet.get('sheetname')
|
|
|
|
|
|
# df_data = sum_sheet.get('data')
|
|
|
|
|
|
# df_data.to_excel(writer, sheet_name=sheetname, index=False)
|
2024-05-07 17:59:42 +08:00
|
|
|
|
|
2023-12-14 10:26:34 +08:00
|
|
|
|
no_assign_df = pd.DataFrame(self.no_assign_data)
|
2024-02-05 17:13:32 +08:00
|
|
|
|
if not no_assign_df.empty:
|
2024-05-22 11:29:11 +08:00
|
|
|
|
no_assign_df_not_balance = ','.join(
|
2024-06-20 11:12:59 +08:00
|
|
|
|
set([lib for lib in no_assign_df['presamplename'] if lib in self.split_lib]))
|
2024-03-30 21:42:23 +08:00
|
|
|
|
if no_assign_df_not_balance:
|
|
|
|
|
|
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
|
|
|
|
|
|
no_assign_df = pd.concat([pd.DataFrame(self.items), no_assign_df]).reset_index(drop=True)
|
|
|
|
|
|
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
2024-05-22 11:29:11 +08:00
|
|
|
|
order_assign_df = pd.DataFrame(self.order_assign_data)
|
2024-06-20 11:12:59 +08:00
|
|
|
|
|
2024-05-22 11:29:11 +08:00
|
|
|
|
if not order_assign_df.empty:
|
|
|
|
|
|
order_assign_df = pd.concat([pd.DataFrame(self.items), order_assign_df]).reset_index(drop=True)
|
|
|
|
|
|
order_assign_df.to_excel(writer, sheet_name='包lane', index=False)
|
2023-06-27 13:01:44 +08:00
|
|
|
|
if self.return_log:
|
|
|
|
|
|
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
|
|
|
|
|
|
writer.close()
|
2024-06-20 11:12:59 +08:00
|
|
|
|
|
2023-06-27 13:01:44 +08:00
|
|
|
|
return outputpath
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2023-07-05 17:15:46 +08:00
|
|
|
|
start_time = time.time()
|
2023-12-07 17:47:53 +08:00
|
|
|
|
filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx')
|
2023-07-05 17:15:46 +08:00
|
|
|
|
output_file = ''
|
2023-12-07 17:47:53 +08:00
|
|
|
|
layout = AutoLayout(filepath, output_file)
|
2023-07-05 17:15:46 +08:00
|
|
|
|
layout.run()
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
|
|
execution_time = end_time - start_time
|
|
|
|
|
|
print(f"代码执行时间为:{execution_time} 秒")
|