更新

2024-02-05 17:13:32 +08:00 · 2024-02-05 17:13:32 +08:00 · 2f07383922
parent c203913bd4
commit 2f07383922
2 changed files with 230 additions and 313 deletions
--- a/T7_server.py
+++ b/T7_server.py
@ -23,6 +23,8 @@ def recvdata(conn, path):
    content_len = header_dic['contentlen']
    content_name = header_dic['contentname']
    librarynum = header_dic['librarynum']
+    is_use_balance = header_dic['is_use_balance']
+    is_use_max = header_dic['is_use_max']
    recv_len = 0
    fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
    file = open(fielpath, 'wb')
@ -31,7 +33,7 @@ def recvdata(conn, path):
        file.write(correntrecv)
        recv_len += len(correntrecv)
    file.close()
-    return fielpath, librarynum
+    return fielpath, librarynum, is_use_balance, is_use_max


 def senddata(conn, path, message=None):
@ -77,8 +79,8 @@ def server():
    while True:
        try:
            myclient, adddr = myserver.accept()
-            recv_content, librarynum = recvdata(myclient, os.path.join(basedir, 'example'))
-            layout = T7(recv_content, librarynum)
+            recv_content, librarynum, is_use_balance, is_use_max = recvdata(myclient, os.path.join(basedir, 'example'))
+            layout = T7(recv_content, librarynum, is_use_balance, is_use_max)
            outputpath = layout.run()
            senddata(myclient, outputpath)
        except Exception as e:
@ -88,7 +90,7 @@ def server():

 if __name__ == '__main__':
    if len(sys.argv) > 1:
-        layout = T7(sys.argv[1], sys.argv[2])
+        layout = T7(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
        outputpath = layout.run()
    else:
        server()
--- a/tools/t7.py
+++ b/tools/t7.py
@ -5,7 +5,6 @@ from collections import defaultdict
 from datetime import datetime

 import pandas as pd
-from deap import base, creator, tools, algorithms

 from tools.common import basedir, log

@ -15,161 +14,12 @@ def format_date(date):
    return date.strftime('%Y-%m-%d')


-def count_barcode_radio(data):
-    df = pd.DataFrame(data)
-    ratio_sites = dict()
-    is_not_balance_list = []
-    if df.empty:
-        return ratio_sites, is_not_balance_list
-
-    df['barcode'] = df['barcode'].str.slice(0, 16)
-    barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
-                              columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
-    total = barcode_df['data_needed'].sum()
-
-    for i in range(16):
-        column = 'T' + str(i)
-        col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
-        # 去掉N计数
-        if 'N' in col_df.index:
-            base_n_size = col_df.loc['N', 'data_needed']
-            col_df = col_df.drop('N')
-        else:
-            base_n_size = 0
-        col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
-        ratio = col_df['ratio'].to_dict()
-        ratio_sites[i] = ratio
-        A, B, C, D, E, F = list(), list(), list(), list(), list(), list()
-        for decbase in ['A', 'T', 'C', 'G']:
-            if decbase not in ratio:
-                ratio[decbase] = 0
-            if ratio[decbase] >= 0.6:
-                A.append(decbase)
-            if 0.2 <= ratio[decbase] < 0.6:
-                B.append(decbase)
-            if 0.15 <= ratio[decbase] < 0.2:
-                C.append(decbase)
-            if 0.1 <= ratio[decbase] < 0.15:
-                D.append(decbase)
-            if 0.08 <= ratio[decbase] < 0.1:
-                E.append(decbase)
-            if ratio[decbase] < 0.08:
-                F.append(decbase)
-
-        A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F)
-        if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
-                E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
-                        E_num == 1 and (A_num + B_num + C_num) == 3)):
-            is_not_balance_list.append(
-                '第%s位置，算出结果为 %s' % (i, ratio)
-            )
-    return ratio_sites, is_not_balance_list
-
-
-# 定义遗传算法
-class Ga:
-    """
-    # 定义遗传算法
-    """
-
-    def __init__(self, sheets):
-        self.sheets = sheets
-
-    # 定义个体的生成方式
-    def generate_individual(self):
-        individual = copy.deepcopy(self.sheets)  # 初始解作为个体
-        return [individual]
-
-    # 定义评估函数
-    @staticmethod
-    def evaluate(individual):
-        total_data_needed_sum = 0
-        xchip = 0
-        try:
-            for sheetname, data in individual[0][0].items():
-                library_data = pd.DataFrame(data)
-
-                size = library_data['data_needed'].sum()
-
-                # 芯片大小不能超过设定限制
-                if size > 1700:
-                    return (0, 100000, 100000)
-
-                # barcode有重复
-                if len(library_data['barcode'].values) < len(set(library_data['barcode'].values)):
-                    return (0, 100000, 100000)
-
-                # 不平衡文库大于250G 不能添加
-                if library_data[library_data['is_balance_lib'] == '否']['data_needed'].sum() > 250:
-                    return (0, 100000, 100000)
-
-                # 碱基不平衡不过不添加，保证前面的数据, 在数据达到1200G的时候开始
-                ratio_sites, is_not_balance_list = count_barcode_radio(library_data)
-                if is_not_balance_list:
-                    return (0, 100000, 100000)
-
-                if library_data[library_data['classification'].str.lower() == 'nextera']['data_needed'].sum() <= 50:
-                    return (0, 100000, 100000)
-
-                # 计算每个sheet的data_needed之和
-                total_data_needed_sum += library_data['data_needed'].sum()
-
-                # 记录包含字母"A"的sheet数量
-                if any('极致' in value for value in library_data['split']):
-                    xchip += 1
-        except Exception:
-            return (0, 100000, 100000)
-
-        # 返回一个适应度值，目标是最大化总的data_needed之和，最小化sheet的数量, 最少的极致芯片
-        total_data_needed_sum, num_sheets, num_xchip = total_data_needed_sum, len(individual[0]), xchip
-        return total_data_needed_sum, num_sheets, num_xchip
-
-    def run(self):
-        # 定义遗传算法的参数
-        pop_size = 50
-        cxpb = 0.7  # 交叉概率
-        mutpb = 0.2  # 变异概率
-        ngen = 100  # 迭代次数
-
-        # 初始化遗传算法工具箱
-        creator.create("FitnessMax", base.Fitness, weights=(1.0, -1.0, -1.0,))  # 三个目标，一个最大化两个最小化
-        creator.create("Individual", list, fitness=creator.FitnessMax)
-
-        toolbox = base.Toolbox()
-
-        # 结构初始化器
-        toolbox.register("individual", tools.initRepeat, creator.Individual, self.generate_individual, n=3)
-        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
-        toolbox.register("evaluate", self.evaluate)
-
-        # 注册遗传算法所需的操作
-        toolbox.register("mate", tools.cxTwoPoint)
-        toolbox.register("mutate", tools.mutUniformInt, low=1, up=100, indpb=0.2)
-        toolbox.register("select", tools.selTournament, tournsize=3)
-        # 初始化种群
-        population = toolbox.population(n=pop_size)
-
-        # 运行遗传算法
-        algorithms.eaMuPlusLambda(population, toolbox, mu=pop_size, lambda_=pop_size * 2, cxpb=cxpb, mutpb=mutpb,
-                                  ngen=ngen, stats=None, halloffame=None)
-
-        # 输出结果
-        best_individual = tools.selBest(population, k=1)
-        print(best_individual)
-        optimized_sheets = best_individual[0]  # 获取最优解
-
-        # 将优化后的结果输出
-        # for i, sheet in enumerate(optimized_sheets):
-        #     sheet.to_excel(f'optimized_sheet_{i + 1}.xlsx', index=False)
-        return optimized_sheets
-
-
 class AutoLayout:
    """
    自动化派样
    """

-    def __init__(self, path, librarynum, output=basedir, data_limit=1750):
+    def __init__(self, path, librarynum, is_use_balance=1, is_use_max=0, output=basedir, data_limit=1750):
        self.path = path
        self.output = output
        self.librarynum = int(librarynum)
@ -191,17 +41,133 @@ class AutoLayout:
        # 文库
        self.chip_classification = defaultdict(set)
        self.rule = self.read_rule()
-        # 甲基化文库不大于200,WGBS文库不大于200G
+
+        # 不平衡文库
        self.chip_speciallib_size = dict()

+        # 甲基化文库
+        self.chip_methylib_size = dict()
+
        # Nextera 文库大小
        self.chip_speciallib_nextera_size = dict()
+        # 华大 文库
+        self.chip_speciallib_huada_size = dict()

        self.logger = log(os.path.basename(f'{path}.txt'))
        self.return_log = list()
        self.no_assign_data = list()
+        self.ori_lib_data = list()
        self.need_cols = self.read_cols()

+        self.is_use_balance = is_use_balance
+        self.is_use_max = is_use_max
+
+    def count_barcode_radio(self, data):
+        df = pd.DataFrame(data)
+        ratio_sites = dict()
+        is_not_balance_list = []
+        if df.empty:
+            return ratio_sites, is_not_balance_list
+
+        df['barcode'] = df['barcode'].str.slice(0, 16)
+        barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
+                                  columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
+        total = barcode_df['data_needed'].sum()
+
+        for i in range(16):
+            column = 'T' + str(i)
+            col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
+            # 去掉N计数
+            if 'N' in col_df.index:
+                base_n_size = col_df.loc['N', 'data_needed']
+                col_df = col_df.drop('N')
+            else:
+                base_n_size = 0
+            col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
+            ratio = col_df['ratio'].to_dict()
+            ratio_sites[i] = ratio
+            A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
+            for decbase in ['A', 'T', 'C', 'G']:
+                if decbase not in ratio:
+                    ratio[decbase] = 0
+                if ratio[decbase] >= 0.6:
+                    A.append(decbase)
+                if 0.2 <= ratio[decbase] < 0.6:
+                    B.append(decbase)
+                if 0.15 <= ratio[decbase] < 0.2:
+                    C.append(decbase)
+                if 0.1 <= ratio[decbase] < 0.15:
+                    D.append(decbase)
+                if 0.08 <= ratio[decbase] < 0.1:
+                    E.append(decbase)
+                if ratio[decbase] < 0.08:
+                    F.append(decbase)
+
+                # 新增一个碱基可行规则
+                if 0.125 <= ratio[decbase] <= 0.625:
+                    G.append(decbase)
+
+            A_num, B_num, C_num, D_num, E_num, F_num, G_num = len(A), len(B), len(C), len(D), len(E), len(F), len(G)
+            if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
+                    E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
+                            E_num == 1 and (A_num + B_num + C_num) == 3) or (
+                            F_num == 1 and G_num == 3 and self.is_use_max)):
+                is_not_balance_list.append(
+                    '第%s位置，算出结果为 %s' % (i, ratio)
+                )
+        return ratio_sites, is_not_balance_list
+
+    def dec_barcode_radio(self, chipname):
+        data = self.index_assignments[chipname]
+        ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
+        if is_not_balance_list:
+            desc = '\n'.join(is_not_balance_list)
+            self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
+            print(f'芯片{chipname}有碱基不平衡:\n{desc}')
+
+    @staticmethod
+    def level(row):
+
+        today_date = datetime.now()
+
+        if 'nextera' in row['classification'].lower():
+            return 10
+
+        if '华大' in row['classification']:
+            return 11
+
+        if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
+            return 20
+
+        mytime = row['time']
+        # 判断日期是之前的还是之后的
+        if mytime < today_date:
+            return 30
+
+        if '加急' in row['priority']:
+            return 40
+
+        if '补测' in row['priority']:
+            return 50
+
+        else:
+            return 1000
+
+    @staticmethod
+    def read_rule():
+        df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
+        newdf = pd.DataFrame()
+        newdf['c1'] = df['c2']
+        newdf['c2'] = df['c1']
+        res = pd.concat([df, newdf])
+        return res.reset_index()
+
+    @staticmethod
+    def read_cols():
+        df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
+        cols = list(df['cols'].values)
+        return cols
+
    def read_excel(self):
        """
        原始数据处理
@ -233,122 +199,30 @@ class AutoLayout:
            # if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']:
            if library_data['is_balance_lib'] == '否':
                self.chip_speciallib_size[chipname] = library_data['size']
+            elif library_data['is_balance_lib'] == '甲基化':
+                self.chip_methylib_size[chipname] = library_data['size']
            else:
                self.chip_speciallib_size[chipname] = 0
+                self.chip_methylib_size[chipname] = 0
            if 'nextera' in library_data['classification'].lower():
                self.chip_speciallib_nextera_size[chipname] = library_data['size']
            else:
                self.chip_speciallib_nextera_size[chipname] = 0
+            if '华大' in library_data['classification']:
+                self.chip_speciallib_huada_size[chipname] = library_data['size']
+            else:
+                self.chip_speciallib_huada_size[chipname] = 0

        else:
            self.chip_size[chipname] += library_data['size']
            if library_data['is_balance_lib'] == '否':
                self.chip_speciallib_size[chipname] += library_data['size']
+            if library_data['is_balance_lib'] == '甲基化':
+                self.chip_methylib_size[chipname] += library_data['size']
            if 'nextera' in library_data['classification'].lower():
-                self.chip_speciallib_nextera_size[chipname] += library_data['size']
-
-    @staticmethod
-    def count_barcode_radio(data):
-        df = pd.DataFrame(data)
-        ratio_sites = dict()
-        is_not_balance_list = []
-        if df.empty:
-            return ratio_sites, is_not_balance_list
-
-        df['barcode'] = df['barcode'].str.slice(0, 16)
-        barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
-                                  columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
-        total = barcode_df['data_needed'].sum()
-
-        for i in range(16):
-            column = 'T' + str(i)
-            col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
-            # 去掉N计数
-            if 'N' in col_df.index:
-                base_n_size = col_df.loc['N', 'data_needed']
-                col_df = col_df.drop('N')
-            else:
-                base_n_size = 0
-            col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size)
-            ratio = col_df['ratio'].to_dict()
-            ratio_sites[i] = ratio
-            A, B, C, D, E, F = list(), list(), list(), list(), list(), list()
-            for decbase in ['A', 'T', 'C', 'G']:
-                if decbase not in ratio:
-                    ratio[decbase] = 0
-                if ratio[decbase] >= 0.6:
-                    A.append(decbase)
-                if 0.2 <= ratio[decbase] < 0.6:
-                    B.append(decbase)
-                if 0.15 <= ratio[decbase] < 0.2:
-                    C.append(decbase)
-                if 0.1 <= ratio[decbase] < 0.15:
-                    D.append(decbase)
-                if 0.08 <= ratio[decbase] < 0.1:
-                    E.append(decbase)
-                if ratio[decbase] < 0.08:
-                    F.append(decbase)
-
-            A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F)
-            if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
-                    E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
-                            E_num == 1 and (A_num + B_num + C_num) == 3)):
-                is_not_balance_list.append(
-                    '第%s位置，算出结果为 %s' % (i, ratio)
-                )
-        return ratio_sites, is_not_balance_list
-
-    def dec_barcode_radio(self, chipname):
-        data = self.index_assignments[chipname]
-        ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
-        if is_not_balance_list:
-            desc = '\n'.join(is_not_balance_list)
-            self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
-            print(f'芯片{chipname}有碱基不平衡:\n{desc}')
-
-    @staticmethod
-    def level(row):
-
-        today_date = datetime.now()
-
-        # 将时间字符串转换为 datetime 对象
-        # mytime = datetime.strptime(row['time'], "%Y-%m-%d")
-        # mytime = row['time'].strftime("%Y-%m-%d")
-
-        if 'nextera' in row['classification'].lower():
-            return 1
-
-        if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
-            return 2
-
-        mytime = row['time']
-        # 判断日期是之前的还是之后的
-        if mytime < today_date:
-            return 3
-
-        if '加急' in row['priority']:
-            return 4
-
-        if '补测' in row['priority']:
-            return 5
-
-        else:
-            return 100
-
-    @staticmethod
-    def read_rule():
-        df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
-        newdf = pd.DataFrame()
-        newdf['c1'] = df['c2']
-        newdf['c2'] = df['c1']
-        res = pd.concat([df, newdf])
-        return res.reset_index()
-
-    @staticmethod
-    def read_cols():
-        df = pd.read_excel(os.path.join(basedir, 'rule', 'columns.xlsx'))
-        cols = list(df['cols'].values)
-        return cols
+                self.chip_speciallib_huada_size[chipname] += library_data['size']
+            if '华大' in library_data['classification']:
+                self.chip_speciallib_huada_size[chipname] += library_data['size']

    def use_rule(self, chipname, classfication):
        may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2'])
@ -357,9 +231,10 @@ class AutoLayout:
        return False

    def judge_data(self, chipname, library_data):
+        """
+        约束条件
+        """
        size = library_data['size']
-        # customer = library_data['customer']
-        # library = library_data['library']
        classification = library_data['classification']
        is_balance_lib = library_data['is_balance_lib']

@ -383,9 +258,19 @@ class AutoLayout:
        if is_balance_lib == '否' and self.chip_speciallib_size[chipname] + size > 250:
            splibrary = False

+        # 甲基化文库不能大于250G
+        spmethylibrary = True
+        if is_balance_lib == '甲基化' and self.chip_methylib_size[chipname] + size > 250:
+            spmethylibrary = False
+
+        # 不使用平衡文库
+        if not self.is_use_balance:
+            splibrary = True
+            spmethylibrary = True
+
        # 碱基不平衡不过不添加，保证前面的数据, 在数据达到1200G的时候开始
        base_balance = True
-        if self.chip_size[chipname] > 800:
+        if self.chip_size[chipname] > 900:
            current_data = copy.deepcopy(self.index_assignments[chipname])
            new_data = library_data['data']
            current_data.extend(new_data)
@ -393,17 +278,21 @@ class AutoLayout:
            if is_not_balance_list:
                base_balance = False

-        if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance:
+        if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance and spmethylibrary:
            return True
        return False

    def add_loc_num(self):
-        # 有nextera文库 必须满足大于50G
+        """
+        锚定芯片号增加
+        """
+        # 有nextera, 华大文库 必须满足大于50G
        chipname = f'chip{self.loc_chip_num}'
        nextera_size = self.chip_speciallib_nextera_size[chipname]
-        if nextera_size > 50 or nextera_size == 0:
-            self.loc_chip_num += 1
-        else:
+        huada_size = self.chip_speciallib_huada_size[chipname]
+        print(chipname, huada_size, nextera_size)
+        flag = True
+        if 0 < nextera_size < 50:
            # 有nextera文库，但是不满足50G 去除
            nextary_barcode = set()
            no_nextary_data = list()
@ -416,6 +305,26 @@ class AutoLayout:
            self.index_assignments[chipname] = no_nextary_data
            self.chip_barcode_recode[chipname] -= nextary_barcode
            self.chip_speciallib_nextera_size[chipname] = 0
+            self.chip_size[chipname] -= nextera_size
+            flag = False
+        if 0 < huada_size < 50:
+            # 有华大文库，但是不满足50G 去除
+            huada_barcode = set()
+            no_huada_data = list()
+            for libdata in self.index_assignments[chipname]:
+                if libdata['classification'] != '华大':
+                    no_huada_data.append(libdata)
+                else:
+                    self.no_assign_data.append(libdata)
+                    huada_barcode.update(libdata['barcode'])
+            self.index_assignments[chipname] = no_huada_data
+            self.chip_barcode_recode[chipname] -= huada_barcode
+            self.chip_speciallib_huada_size[chipname] = 0
+            self.chip_size[chipname] -= huada_size
+            flag = False
+        if flag:
+            print(self.loc_chip_num)
+            self.loc_chip_num += 1

    def assign_samples(self):
        ori_library_data = list()
@ -424,44 +333,48 @@ class AutoLayout:
            raise UserWarning('提供excel没有 未测 sheet ,请核查！')
        ori_library_df = pd.DataFrame(self.ori_data['未测'])

-        # need_col = ['status', '#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer',
-        #             'classification', 'priority', 'time', '拆分方式', 'barcode', 'is_balance_lib', '备注',
-        #             'TIPS1', 'TIPS2', 'TIPS3'
-        #             ]
-        self.need_cols = self.read_cols()
+        # 检查提供excel 是否有必须表头
        get_col = set(ori_library_df.columns)
        unhave_col = set(self.need_cols) - get_col

        if unhave_col:
-            unhave_fom = '; '.join(unhave_col)
-            raise UserWarning(f'未测表里没有{unhave_fom}  表头,请核查！')
+            unhave_from = '; '.join(unhave_col)
+            raise UserWarning(f'未测表里没有{unhave_from}  表头,请核查！')

+        # 数据标准格式
        numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
        time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()

        # 添加处理status列的逻辑
        status_mask = ori_library_df['status'] == '暂不排样'

+        # 非正常barcode
+        barcode_mask = ori_library_df['barcode'].str.len() != 16
+
        ori_library_df['note'] = ''
        ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
        ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
        ori_library_df.loc[status_mask, 'note'] = '暂不排样'
+        # ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode'

-        # need_col.append('note')
-
-        no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask]
+        no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask | barcode_mask]

        self.no_assign_data.extend(no_ori_data.to_dict('records'))

-        # 使用布尔索引筛选出不是数字和非日期的行
-        ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask]
+        # 使用布尔索引筛选出不是数字和非日期的行，并且不是暂不排样的行
+        ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask & ~barcode_mask]

+        # 某个客户的检测的数据超过1个T就单独处理
+        # summary = ori_library_df.groupby('customer').agg({'data_needed': 'sum'})
+        # print(summary)
+
+        # 时间格式化
+        ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce')
        ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)

-        # # 极致客户有重复的，把等级调到0，防止放到了最后，到了未测里
-        # duplicate_name = ori_library_df[ori_library_df['level'] == 2].duplicated(subset='barcode', keep=False)
-        # # 将 'level' 列的值改为 0
-        # ori_library_df.loc[duplicate_name, 'level'] = 0
+        # 极致客户有重复的，把等级调到0，防止放到了最后，到了未测里
+        ori_library_df.loc[
+            (ori_library_df.duplicated(subset='barcode')) & (ori_library_df['level'] == 20), 'level'] = 19

        for library, library_df in ori_library_df.groupby('#library'):

@ -470,10 +383,10 @@ class AutoLayout:
            # 文库内部有重复
            if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)):
                library_df['note'] = '文库内部有重复'
-                library_df.loc[:, 'time'] = library_df['time'].apply(format_date)
                self.no_assign_data.extend(library_df.to_dict('records'))
                continue

+            # 拆分处理
            flag = False
            if size > (self.data_limit) / 2:
                library_df['data_needed'] = library_df['data_needed'] / 2
@ -491,6 +404,7 @@ class AutoLayout:
                data=library_df[self.need_cols].to_dict('records')
            ))

+            # 拆分对半
            if flag:
                self.return_log.append(f'文库{library} 已做拆分处理, 请注意！！！ ')
                ori_library_data.append(dict(
@ -504,43 +418,40 @@ class AutoLayout:
                    classification=library_df['classification'].values[0],
                    data=library_df[self.need_cols].to_dict('records')
                ))
-        ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time'], -x['size']))
+        self.ori_lib_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))

-        i = 0
-        while ori_sort_data:
-            library_data = ori_sort_data[0]
+        # self.ori_lib_data = ori_sort_data
+
+        n = 1
+        while self.ori_lib_data:
+            n += 1
+            print(n)
+            library_data = self.ori_lib_data[0]
            chipname = f'chip{self.loc_chip_num}'

            # 空白芯片直接添加
            if chipname not in self.index_assignments:
                self.add_new_data(chipname, library_data)
-                ori_sort_data.remove(library_data)
-                i += 1
+                self.ori_lib_data.remove(library_data)
                continue

            # 判断条件
            if self.judge_data(chipname, library_data):
                self.add_new_data(chipname, library_data, newer=False)
-                ori_sort_data.remove(library_data)
-                i += 1
+                self.ori_lib_data.remove(library_data)
            else:
-                for j in range(len(ori_sort_data)):
-                    newlibrary_data = ori_sort_data[j]
+                for j in range(len(self.ori_lib_data)):
+                    newlibrary_data = self.ori_lib_data[j]
                    if self.judge_data(chipname, newlibrary_data):
-                        ori_sort_data.remove(newlibrary_data)
-                        i += 1
+                        self.ori_lib_data.remove(newlibrary_data)
                        self.add_new_data(chipname, newlibrary_data, newer=False)
                        break
                    j += 1
                else:
-                    # self.loc_chip_num += 1
                    self.add_loc_num()
-            if self.chip_size[chipname] > self.data_limit:
-                # self.loc_chip_num += 1
-                self.add_loc_num()

-    def assign_again(self):
-        pass
+            if self.chip_size[chipname] > self.data_limit:
+                self.add_loc_num()

    def run(self):
        # self.assign_samples()
@ -553,9 +464,6 @@ class AutoLayout:
        outputpath = os.path.join(self.output, 'result', outputname)
        writer = pd.ExcelWriter(outputpath)

-        # ga = Ga(sheets=self.index_assignments)
-        # self.index_assignments = ga.run()
-
        chip_loc = 1
        librarynum = 0
        for chip_idx, chip_assignments in self.index_assignments.items():
@ -564,15 +472,21 @@ class AutoLayout:
            df = pd.DataFrame(chip_assignments)
            df['time'] = df['time'].dt.strftime('%Y-%m-%d')

-            if df['data_needed'].sum() < 1600 or librarynum > self.librarynum:
-                df['note'] = '排样数据量不足1600或者排样管数超标'
-                self.no_assign_data.extend(df.to_dict('records'))
-                continue
-            librarynum += len(set(df['#library'].values))
            if [method for method in df['拆分方式'].values if '极致' in method]:
                addname = 'X'
            else:
                addname = ''
+
+            if df['data_needed'].sum() < 1600 and not addname:
+                df['note'] = '排样数据量不足1600G'
+                self.no_assign_data.extend(df.to_dict('records'))
+                continue
+            if librarynum > self.librarynum:
+                df['note'] = '排样管数超标'
+                self.no_assign_data.extend(df.to_dict('records'))
+                continue
+            librarynum += len(set(df['#library'].values))
+
            self.dec_barcode_radio(chip_idx)
            chipname = addname + chip_idx

@ -588,9 +502,11 @@ class AutoLayout:
            res_df = pd.concat([df, df_sum], axis=1)
            res_df.to_excel(writer, sheet_name=chipname, index=False)
            chip_loc += 1
+        # self.no_assign_data.extend(self.diffic_assign_data)
        no_assign_df = pd.DataFrame(self.no_assign_data)
        no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
-        no_assign_df = no_assign_df[self.need_cols]
+        if not no_assign_df.empty:
+            no_assign_df = no_assign_df[self.need_cols]
        no_assign_df.to_excel(writer, sheet_name='未测', index=False)
        if self.return_log:
            pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
@ -601,7 +517,6 @@ class AutoLayout:
 if __name__ == '__main__':
    start_time = time.time()
    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx')
-    # excel_file = 'example/input排样表.xlsx'
    output_file = ''
    layout = AutoLayout(filepath, output_file)
    layout.run()