main
chaopower 2024-02-05 17:13:32 +08:00
parent c203913bd4
commit 2f07383922
2 changed files with 230 additions and 313 deletions

View File

@ -23,6 +23,8 @@ def recvdata(conn, path):
content_len = header_dic['contentlen'] content_len = header_dic['contentlen']
content_name = header_dic['contentname'] content_name = header_dic['contentname']
librarynum = header_dic['librarynum'] librarynum = header_dic['librarynum']
is_use_balance = header_dic['is_use_balance']
is_use_max = header_dic['is_use_max']
recv_len = 0 recv_len = 0
fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name)) fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
file = open(fielpath, 'wb') file = open(fielpath, 'wb')
@ -31,7 +33,7 @@ def recvdata(conn, path):
file.write(correntrecv) file.write(correntrecv)
recv_len += len(correntrecv) recv_len += len(correntrecv)
file.close() file.close()
return fielpath, librarynum return fielpath, librarynum, is_use_balance, is_use_max
def senddata(conn, path, message=None): def senddata(conn, path, message=None):
@ -77,8 +79,8 @@ def server():
while True: while True:
try: try:
myclient, adddr = myserver.accept() myclient, adddr = myserver.accept()
recv_content, librarynum = recvdata(myclient, os.path.join(basedir, 'example')) recv_content, librarynum, is_use_balance, is_use_max = recvdata(myclient, os.path.join(basedir, 'example'))
layout = T7(recv_content, librarynum) layout = T7(recv_content, librarynum, is_use_balance, is_use_max)
outputpath = layout.run() outputpath = layout.run()
senddata(myclient, outputpath) senddata(myclient, outputpath)
except Exception as e: except Exception as e:
@ -88,7 +90,7 @@ def server():
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) > 1: if len(sys.argv) > 1:
layout = T7(sys.argv[1], sys.argv[2]) layout = T7(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
outputpath = layout.run() outputpath = layout.run()
else: else:
server() server()

View File

@ -5,7 +5,6 @@ from collections import defaultdict
from datetime import datetime from datetime import datetime
import pandas as pd import pandas as pd
from deap import base, creator, tools, algorithms
from tools.common import basedir, log from tools.common import basedir, log
@ -15,7 +14,55 @@ def format_date(date):
return date.strftime('%Y-%m-%d') return date.strftime('%Y-%m-%d')
def count_barcode_radio(data): class AutoLayout:
"""
自动化派样
"""
def __init__(self, path, librarynum, is_use_balance=1, is_use_max=0, output=basedir, data_limit=1750):
self.path = path
self.output = output
self.librarynum = int(librarynum)
self.data_limit = data_limit
self.index_assignments = defaultdict(list)
# 芯片数量量大小
self.chip_size = dict()
# 芯片是否极致
self.chip_type = dict()
# 芯片barcode
self.chip_barcode_recode = defaultdict(set)
# 芯片原始数据读取
self.ori_data = self.read_excel()
# 当前锚芯片
self.loc_chip_num = 1
# 芯片客户
self.chip_customer = defaultdict(set)
# 文库
self.chip_classification = defaultdict(set)
self.rule = self.read_rule()
# 不平衡文库
self.chip_speciallib_size = dict()
# 甲基化文库
self.chip_methylib_size = dict()
# Nextera 文库大小
self.chip_speciallib_nextera_size = dict()
# 华大 文库
self.chip_speciallib_huada_size = dict()
self.logger = log(os.path.basename(f'{path}.txt'))
self.return_log = list()
self.no_assign_data = list()
self.ori_lib_data = list()
self.need_cols = self.read_cols()
self.is_use_balance = is_use_balance
self.is_use_max = is_use_max
def count_barcode_radio(self, data):
df = pd.DataFrame(data) df = pd.DataFrame(data)
ratio_sites = dict() ratio_sites = dict()
is_not_balance_list = [] is_not_balance_list = []
@ -39,7 +86,7 @@ def count_barcode_radio(data):
col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size) col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
ratio = col_df['ratio'].to_dict() ratio = col_df['ratio'].to_dict()
ratio_sites[i] = ratio ratio_sites[i] = ratio
A, B, C, D, E, F = list(), list(), list(), list(), list(), list() A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
for decbase in ['A', 'T', 'C', 'G']: for decbase in ['A', 'T', 'C', 'G']:
if decbase not in ratio: if decbase not in ratio:
ratio[decbase] = 0 ratio[decbase] = 0
@ -56,151 +103,70 @@ def count_barcode_radio(data):
if ratio[decbase] < 0.08: if ratio[decbase] < 0.08:
F.append(decbase) F.append(decbase)
A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F) # 新增一个碱基可行规则
if 0.125 <= ratio[decbase] <= 0.625:
G.append(decbase)
A_num, B_num, C_num, D_num, E_num, F_num, G_num = len(A), len(B), len(C), len(D), len(E), len(F), len(G)
if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or ( if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or ( E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
E_num == 1 and (A_num + B_num + C_num) == 3)): E_num == 1 and (A_num + B_num + C_num) == 3) or (
F_num == 1 and G_num == 3 and self.is_use_max)):
is_not_balance_list.append( is_not_balance_list.append(
'%s位置,算出结果为 %s' % (i, ratio) '%s位置,算出结果为 %s' % (i, ratio)
) )
return ratio_sites, is_not_balance_list return ratio_sites, is_not_balance_list
def dec_barcode_radio(self, chipname):
# 定义遗传算法 data = self.index_assignments[chipname]
class Ga: ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
"""
# 定义遗传算法
"""
def __init__(self, sheets):
self.sheets = sheets
# 定义个体的生成方式
def generate_individual(self):
individual = copy.deepcopy(self.sheets) # 初始解作为个体
return [individual]
# 定义评估函数
@staticmethod
def evaluate(individual):
total_data_needed_sum = 0
xchip = 0
try:
for sheetname, data in individual[0][0].items():
library_data = pd.DataFrame(data)
size = library_data['data_needed'].sum()
# 芯片大小不能超过设定限制
if size > 1700:
return (0, 100000, 100000)
# barcode有重复
if len(library_data['barcode'].values) < len(set(library_data['barcode'].values)):
return (0, 100000, 100000)
# 不平衡文库大于250G 不能添加
if library_data[library_data['is_balance_lib'] == '']['data_needed'].sum() > 250:
return (0, 100000, 100000)
# 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始
ratio_sites, is_not_balance_list = count_barcode_radio(library_data)
if is_not_balance_list: if is_not_balance_list:
return (0, 100000, 100000) desc = '\n'.join(is_not_balance_list)
self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
print(f'芯片{chipname}有碱基不平衡:\n{desc}')
if library_data[library_data['classification'].str.lower() == 'nextera']['data_needed'].sum() <= 50: @staticmethod
return (0, 100000, 100000) def level(row):
# 计算每个sheet的data_needed之和 today_date = datetime.now()
total_data_needed_sum += library_data['data_needed'].sum()
# 记录包含字母"A"的sheet数量 if 'nextera' in row['classification'].lower():
if any('极致' in value for value in library_data['split']): return 10
xchip += 1
except Exception:
return (0, 100000, 100000)
# 返回一个适应度值目标是最大化总的data_needed之和最小化sheet的数量, 最少的极致芯片 if '华大' in row['classification']:
total_data_needed_sum, num_sheets, num_xchip = total_data_needed_sum, len(individual[0]), xchip return 11
return total_data_needed_sum, num_sheets, num_xchip
def run(self): if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
# 定义遗传算法的参数 return 20
pop_size = 50
cxpb = 0.7 # 交叉概率
mutpb = 0.2 # 变异概率
ngen = 100 # 迭代次数
# 初始化遗传算法工具箱 mytime = row['time']
creator.create("FitnessMax", base.Fitness, weights=(1.0, -1.0, -1.0,)) # 三个目标,一个最大化两个最小化 # 判断日期是之前的还是之后的
creator.create("Individual", list, fitness=creator.FitnessMax) if mytime < today_date:
return 30
toolbox = base.Toolbox() if '加急' in row['priority']:
return 40
# 结构初始化器 if '补测' in row['priority']:
toolbox.register("individual", tools.initRepeat, creator.Individual, self.generate_individual, n=3) return 50
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", self.evaluate)
# 注册遗传算法所需的操作 else:
toolbox.register("mate", tools.cxTwoPoint) return 1000
toolbox.register("mutate", tools.mutUniformInt, low=1, up=100, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
# 初始化种群
population = toolbox.population(n=pop_size)
# 运行遗传算法 @staticmethod
algorithms.eaMuPlusLambda(population, toolbox, mu=pop_size, lambda_=pop_size * 2, cxpb=cxpb, mutpb=mutpb, def read_rule():
ngen=ngen, stats=None, halloffame=None) df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
newdf = pd.DataFrame()
newdf['c1'] = df['c2']
newdf['c2'] = df['c1']
res = pd.concat([df, newdf])
return res.reset_index()
# 输出结果 @staticmethod
best_individual = tools.selBest(population, k=1) def read_cols():
print(best_individual) df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
optimized_sheets = best_individual[0] # 获取最优解 cols = list(df['cols'].values)
return cols
# 将优化后的结果输出
# for i, sheet in enumerate(optimized_sheets):
# sheet.to_excel(f'optimized_sheet_{i + 1}.xlsx', index=False)
return optimized_sheets
class AutoLayout:
"""
自动化派样
"""
def __init__(self, path, librarynum, output=basedir, data_limit=1750):
self.path = path
self.output = output
self.librarynum = int(librarynum)
self.data_limit = data_limit
self.index_assignments = defaultdict(list)
# 芯片数量量大小
self.chip_size = dict()
# 芯片是否极致
self.chip_type = dict()
# 芯片barcode
self.chip_barcode_recode = defaultdict(set)
# 芯片原始数据读取
self.ori_data = self.read_excel()
# 当前锚芯片
self.loc_chip_num = 1
# 芯片客户
self.chip_customer = defaultdict(set)
# 文库
self.chip_classification = defaultdict(set)
self.rule = self.read_rule()
# 甲基化文库不大于200,WGBS文库不大于200G
self.chip_speciallib_size = dict()
# Nextera 文库大小
self.chip_speciallib_nextera_size = dict()
self.logger = log(os.path.basename(f'{path}.txt'))
self.return_log = list()
self.no_assign_data = list()
self.need_cols = self.read_cols()
def read_excel(self): def read_excel(self):
""" """
@ -233,122 +199,30 @@ class AutoLayout:
# if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']: # if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']:
if library_data['is_balance_lib'] == '': if library_data['is_balance_lib'] == '':
self.chip_speciallib_size[chipname] = library_data['size'] self.chip_speciallib_size[chipname] = library_data['size']
elif library_data['is_balance_lib'] == '甲基化':
self.chip_methylib_size[chipname] = library_data['size']
else: else:
self.chip_speciallib_size[chipname] = 0 self.chip_speciallib_size[chipname] = 0
self.chip_methylib_size[chipname] = 0
if 'nextera' in library_data['classification'].lower(): if 'nextera' in library_data['classification'].lower():
self.chip_speciallib_nextera_size[chipname] = library_data['size'] self.chip_speciallib_nextera_size[chipname] = library_data['size']
else: else:
self.chip_speciallib_nextera_size[chipname] = 0 self.chip_speciallib_nextera_size[chipname] = 0
if '华大' in library_data['classification']:
self.chip_speciallib_huada_size[chipname] = library_data['size']
else:
self.chip_speciallib_huada_size[chipname] = 0
else: else:
self.chip_size[chipname] += library_data['size'] self.chip_size[chipname] += library_data['size']
if library_data['is_balance_lib'] == '': if library_data['is_balance_lib'] == '':
self.chip_speciallib_size[chipname] += library_data['size'] self.chip_speciallib_size[chipname] += library_data['size']
if library_data['is_balance_lib'] == '甲基化':
self.chip_methylib_size[chipname] += library_data['size']
if 'nextera' in library_data['classification'].lower(): if 'nextera' in library_data['classification'].lower():
self.chip_speciallib_nextera_size[chipname] += library_data['size'] self.chip_speciallib_huada_size[chipname] += library_data['size']
if '华大' in library_data['classification']:
@staticmethod self.chip_speciallib_huada_size[chipname] += library_data['size']
def count_barcode_radio(data):
df = pd.DataFrame(data)
ratio_sites = dict()
is_not_balance_list = []
if df.empty:
return ratio_sites, is_not_balance_list
df['barcode'] = df['barcode'].str.slice(0, 16)
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
total = barcode_df['data_needed'].sum()
for i in range(16):
column = 'T' + str(i)
col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
# 去掉N计数
if 'N' in col_df.index:
base_n_size = col_df.loc['N', 'data_needed']
col_df = col_df.drop('N')
else:
base_n_size = 0
col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
ratio = col_df['ratio'].to_dict()
ratio_sites[i] = ratio
A, B, C, D, E, F = list(), list(), list(), list(), list(), list()
for decbase in ['A', 'T', 'C', 'G']:
if decbase not in ratio:
ratio[decbase] = 0
if ratio[decbase] >= 0.6:
A.append(decbase)
if 0.2 <= ratio[decbase] < 0.6:
B.append(decbase)
if 0.15 <= ratio[decbase] < 0.2:
C.append(decbase)
if 0.1 <= ratio[decbase] < 0.15:
D.append(decbase)
if 0.08 <= ratio[decbase] < 0.1:
E.append(decbase)
if ratio[decbase] < 0.08:
F.append(decbase)
A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F)
if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
E_num == 1 and (A_num + B_num + C_num) == 3)):
is_not_balance_list.append(
'%s位置,算出结果为 %s' % (i, ratio)
)
return ratio_sites, is_not_balance_list
def dec_barcode_radio(self, chipname):
data = self.index_assignments[chipname]
ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
if is_not_balance_list:
desc = '\n'.join(is_not_balance_list)
self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
print(f'芯片{chipname}有碱基不平衡:\n{desc}')
@staticmethod
def level(row):
today_date = datetime.now()
# 将时间字符串转换为 datetime 对象
# mytime = datetime.strptime(row['time'], "%Y-%m-%d")
# mytime = row['time'].strftime("%Y-%m-%d")
if 'nextera' in row['classification'].lower():
return 1
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 2
mytime = row['time']
# 判断日期是之前的还是之后的
if mytime < today_date:
return 3
if '加急' in row['priority']:
return 4
if '补测' in row['priority']:
return 5
else:
return 100
@staticmethod
def read_rule():
df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
newdf = pd.DataFrame()
newdf['c1'] = df['c2']
newdf['c2'] = df['c1']
res = pd.concat([df, newdf])
return res.reset_index()
@staticmethod
def read_cols():
df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
cols = list(df['cols'].values)
return cols
def use_rule(self, chipname, classfication): def use_rule(self, chipname, classfication):
may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2']) may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2'])
@ -357,9 +231,10 @@ class AutoLayout:
return False return False
def judge_data(self, chipname, library_data): def judge_data(self, chipname, library_data):
"""
约束条件
"""
size = library_data['size'] size = library_data['size']
# customer = library_data['customer']
# library = library_data['library']
classification = library_data['classification'] classification = library_data['classification']
is_balance_lib = library_data['is_balance_lib'] is_balance_lib = library_data['is_balance_lib']
@ -383,9 +258,19 @@ class AutoLayout:
if is_balance_lib == '' and self.chip_speciallib_size[chipname] + size > 250: if is_balance_lib == '' and self.chip_speciallib_size[chipname] + size > 250:
splibrary = False splibrary = False
# 甲基化文库不能大于250G
spmethylibrary = True
if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250:
spmethylibrary = False
# 不使用平衡文库
if not self.is_use_balance:
splibrary = True
spmethylibrary = True
# 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始 # 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始
base_balance = True base_balance = True
if self.chip_size[chipname] > 800: if self.chip_size[chipname] > 900:
current_data = copy.deepcopy(self.index_assignments[chipname]) current_data = copy.deepcopy(self.index_assignments[chipname])
new_data = library_data['data'] new_data = library_data['data']
current_data.extend(new_data) current_data.extend(new_data)
@ -393,17 +278,21 @@ class AutoLayout:
if is_not_balance_list: if is_not_balance_list:
base_balance = False base_balance = False
if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance: if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance and spmethylibrary:
return True return True
return False return False
def add_loc_num(self): def add_loc_num(self):
# 有nextera文库 必须满足大于50G """
锚定芯片号增加
"""
# 有nextera, 华大文库 必须满足大于50G
chipname = f'chip{self.loc_chip_num}' chipname = f'chip{self.loc_chip_num}'
nextera_size = self.chip_speciallib_nextera_size[chipname] nextera_size = self.chip_speciallib_nextera_size[chipname]
if nextera_size > 50 or nextera_size == 0: huada_size = self.chip_speciallib_huada_size[chipname]
self.loc_chip_num += 1 print(chipname, huada_size, nextera_size)
else: flag = True
if 0 < nextera_size < 50:
# 有nextera文库但是不满足50G 去除 # 有nextera文库但是不满足50G 去除
nextary_barcode = set() nextary_barcode = set()
no_nextary_data = list() no_nextary_data = list()
@ -416,6 +305,26 @@ class AutoLayout:
self.index_assignments[chipname] = no_nextary_data self.index_assignments[chipname] = no_nextary_data
self.chip_barcode_recode[chipname] -= nextary_barcode self.chip_barcode_recode[chipname] -= nextary_barcode
self.chip_speciallib_nextera_size[chipname] = 0 self.chip_speciallib_nextera_size[chipname] = 0
self.chip_size[chipname] -= nextera_size
flag = False
if 0 < huada_size < 50:
# 有华大文库但是不满足50G 去除
huada_barcode = set()
no_huada_data = list()
for libdata in self.index_assignments[chipname]:
if libdata['classification'] != '华大':
no_huada_data.append(libdata)
else:
self.no_assign_data.append(libdata)
huada_barcode.update(libdata['barcode'])
self.index_assignments[chipname] = no_huada_data
self.chip_barcode_recode[chipname] -= huada_barcode
self.chip_speciallib_huada_size[chipname] = 0
self.chip_size[chipname] -= huada_size
flag = False
if flag:
print(self.loc_chip_num)
self.loc_chip_num += 1
def assign_samples(self): def assign_samples(self):
ori_library_data = list() ori_library_data = list()
@ -424,44 +333,48 @@ class AutoLayout:
raise UserWarning('提供excel没有 未测 sheet ,请核查!') raise UserWarning('提供excel没有 未测 sheet ,请核查!')
ori_library_df = pd.DataFrame(self.ori_data['未测']) ori_library_df = pd.DataFrame(self.ori_data['未测'])
# need_col = ['status', '#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer', # 检查提供excel 是否有必须表头
# 'classification', 'priority', 'time', '拆分方式', 'barcode', 'is_balance_lib', '备注',
# 'TIPS1', 'TIPS2', 'TIPS3'
# ]
self.need_cols = self.read_cols()
get_col = set(ori_library_df.columns) get_col = set(ori_library_df.columns)
unhave_col = set(self.need_cols) - get_col unhave_col = set(self.need_cols) - get_col
if unhave_col: if unhave_col:
unhave_fom = '; '.join(unhave_col) unhave_from = '; '.join(unhave_col)
raise UserWarning(f'未测表里没有{unhave_fom} 表头,请核查!') raise UserWarning(f'未测表里没有{unhave_from} 表头,请核查!')
# 数据标准格式
numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna() numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna() time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()
# 添加处理status列的逻辑 # 添加处理status列的逻辑
status_mask = ori_library_df['status'] == '暂不排样' status_mask = ori_library_df['status'] == '暂不排样'
# 非正常barcode
barcode_mask = ori_library_df['barcode'].str.len() != 16
ori_library_df['note'] = '' ori_library_df['note'] = ''
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字' ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期' ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
ori_library_df.loc[status_mask, 'note'] = '暂不排样' ori_library_df.loc[status_mask, 'note'] = '暂不排样'
# ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode'
# need_col.append('note') no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask | barcode_mask]
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask]
self.no_assign_data.extend(no_ori_data.to_dict('records')) self.no_assign_data.extend(no_ori_data.to_dict('records'))
# 使用布尔索引筛选出不是数字和非日期的行 # 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行
ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask] ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask & ~barcode_mask]
# 某个客户的检测的数据超过1个T就单独处理
# summary = ori_library_df.groupby('customer').agg({'data_needed': 'sum'})
# print(summary)
# 时间格式化
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
# # 极致客户有重复的把等级调到0防止放到了最后到了未测里 # 极致客户有重复的把等级调到0防止放到了最后到了未测里
# duplicate_name = ori_library_df[ori_library_df['level'] == 2].duplicated(subset='barcode', keep=False) ori_library_df.loc[
# # 将 'level' 列的值改为 0 (ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19
# ori_library_df.loc[duplicate_name, 'level'] = 0
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('#library'):
@ -470,10 +383,10 @@ class AutoLayout:
# 文库内部有重复 # 文库内部有重复
if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)): if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)):
library_df['note'] = '文库内部有重复' library_df['note'] = '文库内部有重复'
library_df.loc[:, 'time'] = library_df['time'].apply(format_date)
self.no_assign_data.extend(library_df.to_dict('records')) self.no_assign_data.extend(library_df.to_dict('records'))
continue continue
# 拆分处理
flag = False flag = False
if size > (self.data_limit) / 2: if size > (self.data_limit) / 2:
library_df['data_needed'] = library_df['data_needed'] / 2 library_df['data_needed'] = library_df['data_needed'] / 2
@ -491,6 +404,7 @@ class AutoLayout:
data=library_df[self.need_cols].to_dict('records') data=library_df[self.need_cols].to_dict('records')
)) ))
# 拆分对半
if flag: if flag:
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ') self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
ori_library_data.append(dict( ori_library_data.append(dict(
@ -504,43 +418,40 @@ class AutoLayout:
classification=library_df['classification'].values[0], classification=library_df['classification'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df[self.need_cols].to_dict('records')
)) ))
ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time'], -x['size'])) self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
i = 0 # self.ori_lib_data = ori_sort_data
while ori_sort_data:
library_data = ori_sort_data[0] n = 1
while self.ori_lib_data:
n += 1
print(n)
library_data = self.ori_lib_data[0]
chipname = f'chip{self.loc_chip_num}' chipname = f'chip{self.loc_chip_num}'
# 空白芯片直接添加 # 空白芯片直接添加
if chipname not in self.index_assignments: if chipname not in self.index_assignments:
self.add_new_data(chipname, library_data) self.add_new_data(chipname, library_data)
ori_sort_data.remove(library_data) self.ori_lib_data.remove(library_data)
i += 1
continue continue
# 判断条件 # 判断条件
if self.judge_data(chipname, library_data): if self.judge_data(chipname, library_data):
self.add_new_data(chipname, library_data, newer=False) self.add_new_data(chipname, library_data, newer=False)
ori_sort_data.remove(library_data) self.ori_lib_data.remove(library_data)
i += 1
else: else:
for j in range(len(ori_sort_data)): for j in range(len(self.ori_lib_data)):
newlibrary_data = ori_sort_data[j] newlibrary_data = self.ori_lib_data[j]
if self.judge_data(chipname, newlibrary_data): if self.judge_data(chipname, newlibrary_data):
ori_sort_data.remove(newlibrary_data) self.ori_lib_data.remove(newlibrary_data)
i += 1
self.add_new_data(chipname, newlibrary_data, newer=False) self.add_new_data(chipname, newlibrary_data, newer=False)
break break
j += 1 j += 1
else: else:
# self.loc_chip_num += 1
self.add_loc_num()
if self.chip_size[chipname] > self.data_limit:
# self.loc_chip_num += 1
self.add_loc_num() self.add_loc_num()
def assign_again(self): if self.chip_size[chipname] > self.data_limit:
pass self.add_loc_num()
def run(self): def run(self):
# self.assign_samples() # self.assign_samples()
@ -553,9 +464,6 @@ class AutoLayout:
outputpath = os.path.join(self.output, 'result', outputname) outputpath = os.path.join(self.output, 'result', outputname)
writer = pd.ExcelWriter(outputpath) writer = pd.ExcelWriter(outputpath)
# ga = Ga(sheets=self.index_assignments)
# self.index_assignments = ga.run()
chip_loc = 1 chip_loc = 1
librarynum = 0 librarynum = 0
for chip_idx, chip_assignments in self.index_assignments.items(): for chip_idx, chip_assignments in self.index_assignments.items():
@ -564,15 +472,21 @@ class AutoLayout:
df = pd.DataFrame(chip_assignments) df = pd.DataFrame(chip_assignments)
df['time'] = df['time'].dt.strftime('%Y-%m-%d') df['time'] = df['time'].dt.strftime('%Y-%m-%d')
if df['data_needed'].sum() < 1600 or librarynum > self.librarynum:
df['note'] = '排样数据量不足1600或者排样管数超标'
self.no_assign_data.extend(df.to_dict('records'))
continue
librarynum += len(set(df['#library'].values))
if [method for method in df['拆分方式'].values if '极致' in method]: if [method for method in df['拆分方式'].values if '极致' in method]:
addname = 'X' addname = 'X'
else: else:
addname = '' addname = ''
if df['data_needed'].sum() < 1600 and not addname:
df['note'] = '排样数据量不足1600G'
self.no_assign_data.extend(df.to_dict('records'))
continue
if librarynum > self.librarynum:
df['note'] = '排样管数超标'
self.no_assign_data.extend(df.to_dict('records'))
continue
librarynum += len(set(df['#library'].values))
self.dec_barcode_radio(chip_idx) self.dec_barcode_radio(chip_idx)
chipname = addname + chip_idx chipname = addname + chip_idx
@ -588,8 +502,10 @@ class AutoLayout:
res_df = pd.concat([df, df_sum], axis=1) res_df = pd.concat([df, df_sum], axis=1)
res_df.to_excel(writer, sheet_name=chipname, index=False) res_df.to_excel(writer, sheet_name=chipname, index=False)
chip_loc += 1 chip_loc += 1
# self.no_assign_data.extend(self.diffic_assign_data)
no_assign_df = pd.DataFrame(self.no_assign_data) no_assign_df = pd.DataFrame(self.no_assign_data)
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x) no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
if not no_assign_df.empty:
no_assign_df = no_assign_df[self.need_cols] no_assign_df = no_assign_df[self.need_cols]
no_assign_df.to_excel(writer, sheet_name='未测', index=False) no_assign_df.to_excel(writer, sheet_name='未测', index=False)
if self.return_log: if self.return_log:
@ -601,7 +517,6 @@ class AutoLayout:
if __name__ == '__main__': if __name__ == '__main__':
start_time = time.time() start_time = time.time()
filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx') filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx')
# excel_file = 'example/input排样表.xlsx'
output_file = '' output_file = ''
layout = AutoLayout(filepath, output_file) layout = AutoLayout(filepath, output_file)
layout.run() layout.run()