diff --git a/T7_server.py b/T7_server.py index 12b87ca..070db30 100644 --- a/T7_server.py +++ b/T7_server.py @@ -23,6 +23,8 @@ def recvdata(conn, path): content_len = header_dic['contentlen'] content_name = header_dic['contentname'] librarynum = header_dic['librarynum'] + is_use_balance = header_dic['is_use_balance'] + is_use_max = header_dic['is_use_max'] recv_len = 0 fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name)) file = open(fielpath, 'wb') @@ -31,7 +33,7 @@ def recvdata(conn, path): file.write(correntrecv) recv_len += len(correntrecv) file.close() - return fielpath, librarynum + return fielpath, librarynum, is_use_balance, is_use_max def senddata(conn, path, message=None): @@ -77,8 +79,8 @@ def server(): while True: try: myclient, adddr = myserver.accept() - recv_content, librarynum = recvdata(myclient, os.path.join(basedir, 'example')) - layout = T7(recv_content, librarynum) + recv_content, librarynum, is_use_balance, is_use_max = recvdata(myclient, os.path.join(basedir, 'example')) + layout = T7(recv_content, librarynum, is_use_balance, is_use_max) outputpath = layout.run() senddata(myclient, outputpath) except Exception as e: @@ -88,7 +90,7 @@ def server(): if __name__ == '__main__': if len(sys.argv) > 1: - layout = T7(sys.argv[1], sys.argv[2]) + layout = T7(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) outputpath = layout.run() else: server() diff --git a/tools/t7.py b/tools/t7.py index bc199e4..e276bc6 100644 --- a/tools/t7.py +++ b/tools/t7.py @@ -5,7 +5,6 @@ from collections import defaultdict from datetime import datetime import pandas as pd -from deap import base, creator, tools, algorithms from tools.common import basedir, log @@ -15,161 +14,12 @@ def format_date(date): return date.strftime('%Y-%m-%d') -def count_barcode_radio(data): - df = pd.DataFrame(data) - ratio_sites = dict() - is_not_balance_list = [] - if df.empty: - return ratio_sites, is_not_balance_list - - df['barcode'] = df['barcode'].str.slice(0, 16) - barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values, - columns=['T' + str(x) for x in range(16)]).join(df['data_needed']) - total = barcode_df['data_needed'].sum() - - for i in range(16): - column = 'T' + str(i) - col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'}) - # 去掉N计数 - if 'N' in col_df.index: - base_n_size = col_df.loc['N', 'data_needed'] - col_df = col_df.drop('N') - else: - base_n_size = 0 - col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size) - ratio = col_df['ratio'].to_dict() - ratio_sites[i] = ratio - A, B, C, D, E, F = list(), list(), list(), list(), list(), list() - for decbase in ['A', 'T', 'C', 'G']: - if decbase not in ratio: - ratio[decbase] = 0 - if ratio[decbase] >= 0.6: - A.append(decbase) - if 0.2 <= ratio[decbase] < 0.6: - B.append(decbase) - if 0.15 <= ratio[decbase] < 0.2: - C.append(decbase) - if 0.1 <= ratio[decbase] < 0.15: - D.append(decbase) - if 0.08 <= ratio[decbase] < 0.1: - E.append(decbase) - if ratio[decbase] < 0.08: - F.append(decbase) - - A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F) - if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or ( - E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or ( - E_num == 1 and (A_num + B_num + C_num) == 3)): - is_not_balance_list.append( - '第%s位置,算出结果为 %s' % (i, ratio) - ) - return ratio_sites, is_not_balance_list - - -# 定义遗传算法 -class Ga: - """ - # 定义遗传算法 - """ - - def __init__(self, sheets): - self.sheets = sheets - - # 定义个体的生成方式 - def generate_individual(self): - individual = copy.deepcopy(self.sheets) # 初始解作为个体 - return [individual] - - # 定义评估函数 - @staticmethod - def evaluate(individual): - total_data_needed_sum = 0 - xchip = 0 - try: - for sheetname, data in individual[0][0].items(): - library_data = pd.DataFrame(data) - - size = library_data['data_needed'].sum() - - # 芯片大小不能超过设定限制 - if size > 1700: - return (0, 100000, 100000) - - # barcode有重复 - if len(library_data['barcode'].values) < len(set(library_data['barcode'].values)): - return (0, 100000, 100000) - - # 不平衡文库大于250G 不能添加 - if library_data[library_data['is_balance_lib'] == '否']['data_needed'].sum() > 250: - return (0, 100000, 100000) - - # 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始 - ratio_sites, is_not_balance_list = count_barcode_radio(library_data) - if is_not_balance_list: - return (0, 100000, 100000) - - if library_data[library_data['classification'].str.lower() == 'nextera']['data_needed'].sum() <= 50: - return (0, 100000, 100000) - - # 计算每个sheet的data_needed之和 - total_data_needed_sum += library_data['data_needed'].sum() - - # 记录包含字母"A"的sheet数量 - if any('极致' in value for value in library_data['split']): - xchip += 1 - except Exception: - return (0, 100000, 100000) - - # 返回一个适应度值,目标是最大化总的data_needed之和,最小化sheet的数量, 最少的极致芯片 - total_data_needed_sum, num_sheets, num_xchip = total_data_needed_sum, len(individual[0]), xchip - return total_data_needed_sum, num_sheets, num_xchip - - def run(self): - # 定义遗传算法的参数 - pop_size = 50 - cxpb = 0.7 # 交叉概率 - mutpb = 0.2 # 变异概率 - ngen = 100 # 迭代次数 - - # 初始化遗传算法工具箱 - creator.create("FitnessMax", base.Fitness, weights=(1.0, -1.0, -1.0,)) # 三个目标,一个最大化两个最小化 - creator.create("Individual", list, fitness=creator.FitnessMax) - - toolbox = base.Toolbox() - - # 结构初始化器 - toolbox.register("individual", tools.initRepeat, creator.Individual, self.generate_individual, n=3) - toolbox.register("population", tools.initRepeat, list, toolbox.individual) - toolbox.register("evaluate", self.evaluate) - - # 注册遗传算法所需的操作 - toolbox.register("mate", tools.cxTwoPoint) - toolbox.register("mutate", tools.mutUniformInt, low=1, up=100, indpb=0.2) - toolbox.register("select", tools.selTournament, tournsize=3) - # 初始化种群 - population = toolbox.population(n=pop_size) - - # 运行遗传算法 - algorithms.eaMuPlusLambda(population, toolbox, mu=pop_size, lambda_=pop_size * 2, cxpb=cxpb, mutpb=mutpb, - ngen=ngen, stats=None, halloffame=None) - - # 输出结果 - best_individual = tools.selBest(population, k=1) - print(best_individual) - optimized_sheets = best_individual[0] # 获取最优解 - - # 将优化后的结果输出 - # for i, sheet in enumerate(optimized_sheets): - # sheet.to_excel(f'optimized_sheet_{i + 1}.xlsx', index=False) - return optimized_sheets - - class AutoLayout: """ 自动化派样 """ - def __init__(self, path, librarynum, output=basedir, data_limit=1750): + def __init__(self, path, librarynum, is_use_balance=1, is_use_max=0, output=basedir, data_limit=1750): self.path = path self.output = output self.librarynum = int(librarynum) @@ -191,17 +41,133 @@ class AutoLayout: # 文库 self.chip_classification = defaultdict(set) self.rule = self.read_rule() - # 甲基化文库不大于200,WGBS文库不大于200G + + # 不平衡文库 self.chip_speciallib_size = dict() + # 甲基化文库 + self.chip_methylib_size = dict() + # Nextera 文库大小 self.chip_speciallib_nextera_size = dict() + # 华大 文库 + self.chip_speciallib_huada_size = dict() self.logger = log(os.path.basename(f'{path}.txt')) self.return_log = list() self.no_assign_data = list() + self.ori_lib_data = list() self.need_cols = self.read_cols() + self.is_use_balance = is_use_balance + self.is_use_max = is_use_max + + def count_barcode_radio(self, data): + df = pd.DataFrame(data) + ratio_sites = dict() + is_not_balance_list = [] + if df.empty: + return ratio_sites, is_not_balance_list + + df['barcode'] = df['barcode'].str.slice(0, 16) + barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values, + columns=['T' + str(x) for x in range(16)]).join(df['data_needed']) + total = barcode_df['data_needed'].sum() + + for i in range(16): + column = 'T' + str(i) + col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'}) + # 去掉N计数 + if 'N' in col_df.index: + base_n_size = col_df.loc['N', 'data_needed'] + col_df = col_df.drop('N') + else: + base_n_size = 0 + col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size) + ratio = col_df['ratio'].to_dict() + ratio_sites[i] = ratio + A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list() + for decbase in ['A', 'T', 'C', 'G']: + if decbase not in ratio: + ratio[decbase] = 0 + if ratio[decbase] >= 0.6: + A.append(decbase) + if 0.2 <= ratio[decbase] < 0.6: + B.append(decbase) + if 0.15 <= ratio[decbase] < 0.2: + C.append(decbase) + if 0.1 <= ratio[decbase] < 0.15: + D.append(decbase) + if 0.08 <= ratio[decbase] < 0.1: + E.append(decbase) + if ratio[decbase] < 0.08: + F.append(decbase) + + # 新增一个碱基可行规则 + if 0.125 <= ratio[decbase] <= 0.625: + G.append(decbase) + + A_num, B_num, C_num, D_num, E_num, F_num, G_num = len(A), len(B), len(C), len(D), len(E), len(F), len(G) + if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or ( + E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or ( + E_num == 1 and (A_num + B_num + C_num) == 3) or ( + F_num == 1 and G_num == 3 and self.is_use_max)): + is_not_balance_list.append( + '第%s位置,算出结果为 %s' % (i, ratio) + ) + return ratio_sites, is_not_balance_list + + def dec_barcode_radio(self, chipname): + data = self.index_assignments[chipname] + ratio_sites, is_not_balance_list = self.count_barcode_radio(data) + if is_not_balance_list: + desc = '\n'.join(is_not_balance_list) + self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}') + print(f'芯片{chipname}有碱基不平衡:\n{desc}') + + @staticmethod + def level(row): + + today_date = datetime.now() + + if 'nextera' in row['classification'].lower(): + return 10 + + if '华大' in row['classification']: + return 11 + + if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']: + return 20 + + mytime = row['time'] + # 判断日期是之前的还是之后的 + if mytime < today_date: + return 30 + + if '加急' in row['priority']: + return 40 + + if '补测' in row['priority']: + return 50 + + else: + return 1000 + + @staticmethod + def read_rule(): + df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx')) + newdf = pd.DataFrame() + newdf['c1'] = df['c2'] + newdf['c2'] = df['c1'] + res = pd.concat([df, newdf]) + return res.reset_index() + + @staticmethod + def read_cols(): + df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx')) + cols = list(df['cols'].values) + return cols + def read_excel(self): """ 原始数据处理 @@ -233,122 +199,30 @@ class AutoLayout: # if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']: if library_data['is_balance_lib'] == '否': self.chip_speciallib_size[chipname] = library_data['size'] + elif library_data['is_balance_lib'] == '甲基化': + self.chip_methylib_size[chipname] = library_data['size'] else: self.chip_speciallib_size[chipname] = 0 + self.chip_methylib_size[chipname] = 0 if 'nextera' in library_data['classification'].lower(): self.chip_speciallib_nextera_size[chipname] = library_data['size'] else: self.chip_speciallib_nextera_size[chipname] = 0 + if '华大' in library_data['classification']: + self.chip_speciallib_huada_size[chipname] = library_data['size'] + else: + self.chip_speciallib_huada_size[chipname] = 0 else: self.chip_size[chipname] += library_data['size'] if library_data['is_balance_lib'] == '否': self.chip_speciallib_size[chipname] += library_data['size'] + if library_data['is_balance_lib'] == '甲基化': + self.chip_methylib_size[chipname] += library_data['size'] if 'nextera' in library_data['classification'].lower(): - self.chip_speciallib_nextera_size[chipname] += library_data['size'] - - @staticmethod - def count_barcode_radio(data): - df = pd.DataFrame(data) - ratio_sites = dict() - is_not_balance_list = [] - if df.empty: - return ratio_sites, is_not_balance_list - - df['barcode'] = df['barcode'].str.slice(0, 16) - barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values, - columns=['T' + str(x) for x in range(16)]).join(df['data_needed']) - total = barcode_df['data_needed'].sum() - - for i in range(16): - column = 'T' + str(i) - col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'}) - # 去掉N计数 - if 'N' in col_df.index: - base_n_size = col_df.loc['N', 'data_needed'] - col_df = col_df.drop('N') - else: - base_n_size = 0 - col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size) - ratio = col_df['ratio'].to_dict() - ratio_sites[i] = ratio - A, B, C, D, E, F = list(), list(), list(), list(), list(), list() - for decbase in ['A', 'T', 'C', 'G']: - if decbase not in ratio: - ratio[decbase] = 0 - if ratio[decbase] >= 0.6: - A.append(decbase) - if 0.2 <= ratio[decbase] < 0.6: - B.append(decbase) - if 0.15 <= ratio[decbase] < 0.2: - C.append(decbase) - if 0.1 <= ratio[decbase] < 0.15: - D.append(decbase) - if 0.08 <= ratio[decbase] < 0.1: - E.append(decbase) - if ratio[decbase] < 0.08: - F.append(decbase) - - A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F) - if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or ( - E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or ( - E_num == 1 and (A_num + B_num + C_num) == 3)): - is_not_balance_list.append( - '第%s位置,算出结果为 %s' % (i, ratio) - ) - return ratio_sites, is_not_balance_list - - def dec_barcode_radio(self, chipname): - data = self.index_assignments[chipname] - ratio_sites, is_not_balance_list = self.count_barcode_radio(data) - if is_not_balance_list: - desc = '\n'.join(is_not_balance_list) - self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}') - print(f'芯片{chipname}有碱基不平衡:\n{desc}') - - @staticmethod - def level(row): - - today_date = datetime.now() - - # 将时间字符串转换为 datetime 对象 - # mytime = datetime.strptime(row['time'], "%Y-%m-%d") - # mytime = row['time'].strftime("%Y-%m-%d") - - if 'nextera' in row['classification'].lower(): - return 1 - - if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']: - return 2 - - mytime = row['time'] - # 判断日期是之前的还是之后的 - if mytime < today_date: - return 3 - - if '加急' in row['priority']: - return 4 - - if '补测' in row['priority']: - return 5 - - else: - return 100 - - @staticmethod - def read_rule(): - df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx')) - newdf = pd.DataFrame() - newdf['c1'] = df['c2'] - newdf['c2'] = df['c1'] - res = pd.concat([df, newdf]) - return res.reset_index() - - @staticmethod - def read_cols(): - df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx')) - cols = list(df['cols'].values) - return cols + self.chip_speciallib_huada_size[chipname] += library_data['size'] + if '华大' in library_data['classification']: + self.chip_speciallib_huada_size[chipname] += library_data['size'] def use_rule(self, chipname, classfication): may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2']) @@ -357,9 +231,10 @@ class AutoLayout: return False def judge_data(self, chipname, library_data): + """ + 约束条件 + """ size = library_data['size'] - # customer = library_data['customer'] - # library = library_data['library'] classification = library_data['classification'] is_balance_lib = library_data['is_balance_lib'] @@ -383,9 +258,19 @@ class AutoLayout: if is_balance_lib == '否' and self.chip_speciallib_size[chipname] + size > 250: splibrary = False + # 甲基化文库不能大于250G + spmethylibrary = True + if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250: + spmethylibrary = False + + # 不使用平衡文库 + if not self.is_use_balance: + splibrary = True + spmethylibrary = True + # 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始 base_balance = True - if self.chip_size[chipname] > 800: + if self.chip_size[chipname] > 900: current_data = copy.deepcopy(self.index_assignments[chipname]) new_data = library_data['data'] current_data.extend(new_data) @@ -393,17 +278,21 @@ class AutoLayout: if is_not_balance_list: base_balance = False - if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance: + if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance and spmethylibrary: return True return False def add_loc_num(self): - # 有nextera文库 必须满足大于50G + """ + 锚定芯片号增加 + """ + # 有nextera, 华大文库 必须满足大于50G chipname = f'chip{self.loc_chip_num}' nextera_size = self.chip_speciallib_nextera_size[chipname] - if nextera_size > 50 or nextera_size == 0: - self.loc_chip_num += 1 - else: + huada_size = self.chip_speciallib_huada_size[chipname] + print(chipname, huada_size, nextera_size) + flag = True + if 0 < nextera_size < 50: # 有nextera文库,但是不满足50G 去除 nextary_barcode = set() no_nextary_data = list() @@ -416,6 +305,26 @@ class AutoLayout: self.index_assignments[chipname] = no_nextary_data self.chip_barcode_recode[chipname] -= nextary_barcode self.chip_speciallib_nextera_size[chipname] = 0 + self.chip_size[chipname] -= nextera_size + flag = False + if 0 < huada_size < 50: + # 有华大文库,但是不满足50G 去除 + huada_barcode = set() + no_huada_data = list() + for libdata in self.index_assignments[chipname]: + if libdata['classification'] != '华大': + no_huada_data.append(libdata) + else: + self.no_assign_data.append(libdata) + huada_barcode.update(libdata['barcode']) + self.index_assignments[chipname] = no_huada_data + self.chip_barcode_recode[chipname] -= huada_barcode + self.chip_speciallib_huada_size[chipname] = 0 + self.chip_size[chipname] -= huada_size + flag = False + if flag: + print(self.loc_chip_num) + self.loc_chip_num += 1 def assign_samples(self): ori_library_data = list() @@ -424,44 +333,48 @@ class AutoLayout: raise UserWarning('提供excel没有 未测 sheet ,请核查!') ori_library_df = pd.DataFrame(self.ori_data['未测']) - # need_col = ['status', '#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer', - # 'classification', 'priority', 'time', '拆分方式', 'barcode', 'is_balance_lib', '备注', - # 'TIPS1', 'TIPS2', 'TIPS3' - # ] - self.need_cols = self.read_cols() + # 检查提供excel 是否有必须表头 get_col = set(ori_library_df.columns) unhave_col = set(self.need_cols) - get_col if unhave_col: - unhave_fom = '; '.join(unhave_col) - raise UserWarning(f'未测表里没有{unhave_fom} 表头,请核查!') + unhave_from = '; '.join(unhave_col) + raise UserWarning(f'未测表里没有{unhave_from} 表头,请核查!') + # 数据标准格式 numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna() time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna() # 添加处理status列的逻辑 status_mask = ori_library_df['status'] == '暂不排样' + # 非正常barcode + barcode_mask = ori_library_df['barcode'].str.len() != 16 + ori_library_df['note'] = '' ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字' ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期' ori_library_df.loc[status_mask, 'note'] = '暂不排样' + # ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode' - # need_col.append('note') - - no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask] + no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask | barcode_mask] self.no_assign_data.extend(no_ori_data.to_dict('records')) - # 使用布尔索引筛选出不是数字和非日期的行 - ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask] + # 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行 + ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask & ~barcode_mask] + # 某个客户的检测的数据超过1个T就单独处理 + # summary = ori_library_df.groupby('customer').agg({'data_needed': 'sum'}) + # print(summary) + + # 时间格式化 + ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce') ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) - # # 极致客户有重复的,把等级调到0,防止放到了最后,到了未测里 - # duplicate_name = ori_library_df[ori_library_df['level'] == 2].duplicated(subset='barcode', keep=False) - # # 将 'level' 列的值改为 0 - # ori_library_df.loc[duplicate_name, 'level'] = 0 + # 极致客户有重复的,把等级调到0,防止放到了最后,到了未测里 + ori_library_df.loc[ + (ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19 for library, library_df in ori_library_df.groupby('#library'): @@ -470,10 +383,10 @@ class AutoLayout: # 文库内部有重复 if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)): library_df['note'] = '文库内部有重复' - library_df.loc[:, 'time'] = library_df['time'].apply(format_date) self.no_assign_data.extend(library_df.to_dict('records')) continue + # 拆分处理 flag = False if size > (self.data_limit) / 2: library_df['data_needed'] = library_df['data_needed'] / 2 @@ -491,6 +404,7 @@ class AutoLayout: data=library_df[self.need_cols].to_dict('records') )) + # 拆分对半 if flag: self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ') ori_library_data.append(dict( @@ -504,43 +418,40 @@ class AutoLayout: classification=library_df['classification'].values[0], data=library_df[self.need_cols].to_dict('records') )) - ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time'], -x['size'])) + self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time'])) - i = 0 - while ori_sort_data: - library_data = ori_sort_data[0] + # self.ori_lib_data = ori_sort_data + + n = 1 + while self.ori_lib_data: + n += 1 + print(n) + library_data = self.ori_lib_data[0] chipname = f'chip{self.loc_chip_num}' # 空白芯片直接添加 if chipname not in self.index_assignments: self.add_new_data(chipname, library_data) - ori_sort_data.remove(library_data) - i += 1 + self.ori_lib_data.remove(library_data) continue # 判断条件 if self.judge_data(chipname, library_data): self.add_new_data(chipname, library_data, newer=False) - ori_sort_data.remove(library_data) - i += 1 + self.ori_lib_data.remove(library_data) else: - for j in range(len(ori_sort_data)): - newlibrary_data = ori_sort_data[j] + for j in range(len(self.ori_lib_data)): + newlibrary_data = self.ori_lib_data[j] if self.judge_data(chipname, newlibrary_data): - ori_sort_data.remove(newlibrary_data) - i += 1 + self.ori_lib_data.remove(newlibrary_data) self.add_new_data(chipname, newlibrary_data, newer=False) break j += 1 else: - # self.loc_chip_num += 1 self.add_loc_num() - if self.chip_size[chipname] > self.data_limit: - # self.loc_chip_num += 1 - self.add_loc_num() - def assign_again(self): - pass + if self.chip_size[chipname] > self.data_limit: + self.add_loc_num() def run(self): # self.assign_samples() @@ -553,9 +464,6 @@ class AutoLayout: outputpath = os.path.join(self.output, 'result', outputname) writer = pd.ExcelWriter(outputpath) - # ga = Ga(sheets=self.index_assignments) - # self.index_assignments = ga.run() - chip_loc = 1 librarynum = 0 for chip_idx, chip_assignments in self.index_assignments.items(): @@ -564,15 +472,21 @@ class AutoLayout: df = pd.DataFrame(chip_assignments) df['time'] = df['time'].dt.strftime('%Y-%m-%d') - if df['data_needed'].sum() < 1600 or librarynum > self.librarynum: - df['note'] = '排样数据量不足1600或者排样管数超标' - self.no_assign_data.extend(df.to_dict('records')) - continue - librarynum += len(set(df['#library'].values)) if [method for method in df['拆分方式'].values if '极致' in method]: addname = 'X' else: addname = '' + + if df['data_needed'].sum() < 1600 and not addname: + df['note'] = '排样数据量不足1600G' + self.no_assign_data.extend(df.to_dict('records')) + continue + if librarynum > self.librarynum: + df['note'] = '排样管数超标' + self.no_assign_data.extend(df.to_dict('records')) + continue + librarynum += len(set(df['#library'].values)) + self.dec_barcode_radio(chip_idx) chipname = addname + chip_idx @@ -588,9 +502,11 @@ class AutoLayout: res_df = pd.concat([df, df_sum], axis=1) res_df.to_excel(writer, sheet_name=chipname, index=False) chip_loc += 1 + # self.no_assign_data.extend(self.diffic_assign_data) no_assign_df = pd.DataFrame(self.no_assign_data) no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x) - no_assign_df = no_assign_df[self.need_cols] + if not no_assign_df.empty: + no_assign_df = no_assign_df[self.need_cols] no_assign_df.to_excel(writer, sheet_name='未测', index=False) if self.return_log: pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False) @@ -601,7 +517,6 @@ class AutoLayout: if __name__ == '__main__': start_time = time.time() filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx') - # excel_file = 'example/input排样表.xlsx' output_file = '' layout = AutoLayout(filepath, output_file) layout.run()