main
parent
6ba6b137dd
commit
d1a3778f1c
134
tools/t7.py
134
tools/t7.py
|
|
@ -1,7 +1,7 @@
|
||||||
import copy
|
import copy
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict, Counter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
@ -132,27 +132,27 @@ class AutoLayout:
|
||||||
today_date = datetime.now()
|
today_date = datetime.now()
|
||||||
|
|
||||||
if 'nextera' in row['classification'].lower():
|
if 'nextera' in row['classification'].lower():
|
||||||
return 10
|
return 1000
|
||||||
|
|
||||||
if '华大' in row['classification']:
|
if '华大' in row['classification']:
|
||||||
return 11
|
return 1100
|
||||||
|
|
||||||
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
|
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
|
||||||
return 20
|
return 2000
|
||||||
|
|
||||||
|
if '加急' in row['priority']:
|
||||||
|
return 3000
|
||||||
|
|
||||||
|
if '补测' in row['priority']:
|
||||||
|
return 4000
|
||||||
|
|
||||||
mytime = row['time']
|
mytime = row['time']
|
||||||
# 判断日期是之前的还是之后的
|
# 判断日期是之前的还是之后的
|
||||||
if mytime < today_date:
|
if mytime < today_date:
|
||||||
return 30
|
return 5000
|
||||||
|
|
||||||
if '加急' in row['priority']:
|
|
||||||
return 40
|
|
||||||
|
|
||||||
if '补测' in row['priority']:
|
|
||||||
return 50
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return 1000
|
return 100000
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_rule():
|
def read_rule():
|
||||||
|
|
@ -190,6 +190,49 @@ class AutoLayout:
|
||||||
ori_data[name] = sheet.to_dict('records')
|
ori_data[name] = sheet.to_dict('records')
|
||||||
return ori_data
|
return ori_data
|
||||||
|
|
||||||
|
def combinations_same_barcode(self):
|
||||||
|
"""
|
||||||
|
barcode 有重复的极致样本 进行排列组合,汇集成新的可能性
|
||||||
|
"""
|
||||||
|
# 筛选有重复的行
|
||||||
|
|
||||||
|
# same_barcode_data = [data for data in self.ori_lib_data if data['level'] == 1900]
|
||||||
|
# same_barcode_sorted = sorted(same_barcode_data, key=lambda x: (-x['size']))
|
||||||
|
#
|
||||||
|
# same_barcode_dict = dict()
|
||||||
|
# for index, data in enumerate(same_barcode_sorted):
|
||||||
|
# same_barcode_dict[data['library']] = data['level'] + index + 1
|
||||||
|
# correct_data = list()
|
||||||
|
# for data in self.ori_lib_data:
|
||||||
|
# if data in same_barcode_sorted:
|
||||||
|
# data['level'] = same_barcode_dict[data['library']]
|
||||||
|
# correct_data.append(data)
|
||||||
|
# self.ori_lib_data = correct_data
|
||||||
|
|
||||||
|
same_barcode_df = pd.DataFrame(
|
||||||
|
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
|
||||||
|
|
||||||
|
# 按照 'barcode' 列进行分组
|
||||||
|
if same_barcode_df.empty:
|
||||||
|
return
|
||||||
|
grouped = same_barcode_df.groupby('barcode')
|
||||||
|
|
||||||
|
# 获取具有重复的 'barcode' 分组
|
||||||
|
duplicate_groups = grouped.filter(lambda x: len(x) > 1)
|
||||||
|
|
||||||
|
# 提取这些分组,计算文库重复次数
|
||||||
|
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index()
|
||||||
|
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library'])))
|
||||||
|
new_lst = [spdata for data in random_list for spdata in data]
|
||||||
|
counts = Counter(new_lst)
|
||||||
|
|
||||||
|
correct_data = list()
|
||||||
|
for data in self.ori_lib_data:
|
||||||
|
if data['library'] in counts:
|
||||||
|
data['level'] -= counts[data['library']]
|
||||||
|
correct_data.append(data)
|
||||||
|
self.ori_lib_data = correct_data
|
||||||
|
|
||||||
def add_new_data(self, chipname, library_data, newer=True):
|
def add_new_data(self, chipname, library_data, newer=True):
|
||||||
"""
|
"""
|
||||||
增加新数据到已知芯片上
|
增加新数据到已知芯片上
|
||||||
|
|
@ -241,7 +284,8 @@ class AutoLayout:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def use_rule_exclusive_customer(self, chipname, customer):
|
def use_rule_exclusive_customer(self, chipname, customer):
|
||||||
may_classfic = set(self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
|
may_classfic = set(
|
||||||
|
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
|
||||||
if self.chip_customer[chipname].intersection(may_classfic):
|
if self.chip_customer[chipname].intersection(may_classfic):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
@ -285,8 +329,9 @@ class AutoLayout:
|
||||||
splibrary = False
|
splibrary = False
|
||||||
|
|
||||||
# 甲基化文库不能大于250G
|
# 甲基化文库不能大于250G
|
||||||
|
# 甲基化更改成100G
|
||||||
spmethylibrary = True
|
spmethylibrary = True
|
||||||
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250:
|
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 100:
|
||||||
spmethylibrary = False
|
spmethylibrary = False
|
||||||
|
|
||||||
# 不使用平衡文库
|
# 不使用平衡文库
|
||||||
|
|
@ -304,7 +349,12 @@ class AutoLayout:
|
||||||
if is_not_balance_list:
|
if is_not_balance_list:
|
||||||
base_balance = False
|
base_balance = False
|
||||||
|
|
||||||
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary:
|
# 华大的文库不能超过限制的一半
|
||||||
|
use_huada = True
|
||||||
|
if self.chip_speciallib_huada_size[chipname] > self.data_limit / 2:
|
||||||
|
use_huada = False
|
||||||
|
|
||||||
|
if sizelimit and notrepeatbarcode and exclusive_classific and exclusive_customer and splibrary and base_balance and spmethylibrary and use_huada:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -312,7 +362,7 @@ class AutoLayout:
|
||||||
"""
|
"""
|
||||||
锚定芯片号增加
|
锚定芯片号增加
|
||||||
"""
|
"""
|
||||||
# 有nextera, 华大文库 必须满足大于50G
|
# 有nextera, 华大文库 必须满足大于50G 到了芯片结算
|
||||||
chipname = f'chip{self.loc_chip_num}'
|
chipname = f'chip{self.loc_chip_num}'
|
||||||
nextera_size = self.chip_speciallib_nextera_size[chipname]
|
nextera_size = self.chip_speciallib_nextera_size[chipname]
|
||||||
huada_size = self.chip_speciallib_huada_size[chipname]
|
huada_size = self.chip_speciallib_huada_size[chipname]
|
||||||
|
|
@ -351,7 +401,7 @@ class AutoLayout:
|
||||||
self.loc_chip_num += 1
|
self.loc_chip_num += 1
|
||||||
|
|
||||||
def assign_samples(self):
|
def assign_samples(self):
|
||||||
ori_library_data = list()
|
# ori_library_data = list()
|
||||||
|
|
||||||
if '未测' not in self.ori_data.keys():
|
if '未测' not in self.ori_data.keys():
|
||||||
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
|
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
|
||||||
|
|
@ -396,9 +446,10 @@ class AutoLayout:
|
||||||
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
|
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
|
||||||
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
|
||||||
|
|
||||||
# 极致客户有重复的,把等级调到0,防止放到了最后,到了未测里
|
# 极致客户有重复的,把等级调到19,防止放到了最后,到了未测里
|
||||||
ori_library_df.loc[
|
must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
|
||||||
(ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19
|
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list())
|
||||||
|
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900
|
||||||
|
|
||||||
for library, library_df in ori_library_df.groupby('#library'):
|
for library, library_df in ori_library_df.groupby('#library'):
|
||||||
|
|
||||||
|
|
@ -410,28 +461,11 @@ class AutoLayout:
|
||||||
self.no_assign_data.extend(library_df.to_dict('records'))
|
self.no_assign_data.extend(library_df.to_dict('records'))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 拆分处理
|
# 拆分处理 分为了2个大文库
|
||||||
flag = False
|
|
||||||
if size > (self.data_limit) / 2:
|
if size > (self.data_limit) / 2:
|
||||||
library_df['data_needed'] = library_df['data_needed'] / 2
|
library_df['data_needed'] = library_df['data_needed'] / 2
|
||||||
flag = True
|
|
||||||
|
|
||||||
ori_library_data.append(dict(
|
|
||||||
library=library,
|
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
|
||||||
size=library_df['data_needed'].sum(),
|
|
||||||
split_method=library_df['拆分方式'].values[0],
|
|
||||||
time=library_df['time'].values[0],
|
|
||||||
level=library_df['level'].values[0],
|
|
||||||
customer=library_df['customer'].values[0],
|
|
||||||
classification=library_df['classification'].values[0],
|
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
|
||||||
))
|
|
||||||
|
|
||||||
# 拆分对半
|
|
||||||
if flag:
|
|
||||||
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
|
||||||
ori_library_data.append(dict(
|
self.ori_lib_data.append(dict(
|
||||||
library=library,
|
library=library,
|
||||||
is_balance_lib=library_df['is_balance_lib'].values[0],
|
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||||
size=library_df['data_needed'].sum(),
|
size=library_df['data_needed'].sum(),
|
||||||
|
|
@ -442,9 +476,22 @@ class AutoLayout:
|
||||||
classification=library_df['classification'].values[0],
|
classification=library_df['classification'].values[0],
|
||||||
data=library_df[self.need_cols].to_dict('records')
|
data=library_df[self.need_cols].to_dict('records')
|
||||||
))
|
))
|
||||||
self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
|
|
||||||
|
|
||||||
# self.ori_lib_data = ori_sort_data
|
self.ori_lib_data.append(dict(
|
||||||
|
library=library,
|
||||||
|
is_balance_lib=library_df['is_balance_lib'].values[0],
|
||||||
|
size=library_df['data_needed'].sum(),
|
||||||
|
split_method=library_df['拆分方式'].values[0],
|
||||||
|
time=library_df['time'].values[0],
|
||||||
|
level=library_df['level'].values[0],
|
||||||
|
customer=library_df['customer'].values[0],
|
||||||
|
classification=library_df['classification'].values[0],
|
||||||
|
data=library_df[self.need_cols].to_dict('records')
|
||||||
|
))
|
||||||
|
|
||||||
|
self.combinations_same_barcode()
|
||||||
|
self.ori_lib_data = sorted(self.ori_lib_data, key=lambda x: (x['level'], x['time']))
|
||||||
|
|
||||||
while self.ori_lib_data:
|
while self.ori_lib_data:
|
||||||
library_data = self.ori_lib_data[0]
|
library_data = self.ori_lib_data[0]
|
||||||
chipname = f'chip{self.loc_chip_num}'
|
chipname = f'chip{self.loc_chip_num}'
|
||||||
|
|
@ -474,6 +521,7 @@ class AutoLayout:
|
||||||
self.add_loc_num()
|
self.add_loc_num()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
# print('# 测试代码')
|
||||||
# self.assign_samples()
|
# self.assign_samples()
|
||||||
try:
|
try:
|
||||||
self.assign_samples()
|
self.assign_samples()
|
||||||
|
|
@ -522,7 +570,7 @@ class AutoLayout:
|
||||||
res_df = pd.concat([df, df_sum], axis=1)
|
res_df = pd.concat([df, df_sum], axis=1)
|
||||||
res_df.to_excel(writer, sheet_name=chipname, index=False)
|
res_df.to_excel(writer, sheet_name=chipname, index=False)
|
||||||
chip_loc += 1
|
chip_loc += 1
|
||||||
# self.no_assign_data.extend(self.diffic_assign_data)
|
|
||||||
no_assign_df = pd.DataFrame(self.no_assign_data)
|
no_assign_df = pd.DataFrame(self.no_assign_data)
|
||||||
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
|
||||||
if not no_assign_df.empty:
|
if not no_assign_df.empty:
|
||||||
|
|
@ -543,5 +591,3 @@ if __name__ == '__main__':
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
execution_time = end_time - start_time
|
execution_time = end_time - start_time
|
||||||
print(f"代码执行时间为:{execution_time} 秒")
|
print(f"代码执行时间为:{execution_time} 秒")
|
||||||
|
|
||||||
# server()
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue