T7更新

2023-12-07 17:47:53 +08:00 · 2023-12-07 17:47:53 +08:00 · 1bffac2029
parent 3b36898ba2
commit 1bffac2029
45 changed files with 287 additions and 48 deletions
--- a/T7_client.py
+++ b/T7_client.py
@ -0,0 +1,96 @@
 import json
 import os
 import socket
 import struct
 import sys
 import PySimpleGUI as sg
 def recvdata(conn, filepath):
    header_size = struct.unpack('i', conn.recv(4))[0]
    header_bytes = conn.recv(header_size)
    header_json = header_bytes.decode('utf-8')
    header_dic = json.loads(header_json)
    content_len = header_dic['contentlen']
    content_name = header_dic['contentname']
    recv_len = 0
    pdf = os.path.join(filepath, content_name)
    with open(pdf, 'wb') as file:
        while recv_len < content_len:
            correntrecv = conn.recv(1024 * 1000)
            file.write(correntrecv)
            recv_len += len(correntrecv)
 def senddata(conn, path, chipum):
    name = os.path.basename(os.path.realpath(path))
    try:
        with open(path, 'rb') as file:
            content = file.read()
            headerdic = dict(
                contentlen=len(content),
                contentname=name,
                chipum=chipum
            )
            headerjson = json.dumps(headerdic)
            headerbytes = headerjson.encode('utf-8')
            headersize = len(headerbytes)
            conn.send(struct.pack('i', headersize))
            conn.send(headerbytes)
            conn.sendall(content)
    except ConnectionResetError:
        print('不存在这个文件！')
 def connect():
    client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    dest_ip = '192.168.11.121'
    dest_port = int(8191)
    client.connect((dest_ip, dest_port))
    return client
 def transclient(sendfile, resfile, chipnum):
    conn = connect()
    senddata(conn, sendfile, chipnum)
    recvdata(conn, resfile)
 def make_gui():
    sg.theme('DarkBlack1')
    layout = [
        [sg.Text('排样芯片数'), sg.Spin([i for i in range(20)], initial_value=5, size=(3, 1), key='_CHIPNUM_')],
        [sg.Text()],
        [
            sg.Text('导入排样excel')],
        [
            sg.Input(key='_FILE1_'), sg.FileBrowse('选择文件')],
        [sg.Text()],
        [
            sg.Text('生成排样位置')],
        [
            sg.Input(key='_FILE2_'), sg.FolderBrowse('选择文件夹')],
        [sg.Text()],
        [
            sg.OK('生成'), sg.Cancel('取消')]]
    # iconpath = os.path.join(os.path.abspath(sys.path[0]), 'other', 'icon.ico')
    window = sg.Window('解码排样T7程序', layout, font='Helvetica 11')
    while True:
        event, values = window.Read()
        if event == '生成':
            if not values['_FILE1_'] or not values['_FILE2_']:
                sg.popup_non_blocking('请正确提供参数')
            else:
                transclient(values['_FILE1_'], os.path.join(values['_FILE2_']), values['_CHIPNUM_'])
                sg.Popup('排样成功！')
                window.Close()
        else:
            window.Close()
            break
 if __name__ == '__main__':
    make_gui()
--- a/T7_server.py
+++ b/T7_server.py
@ -0,0 +1,87 @@
 import json
 import os
 import socket
 import struct
 import sys
 from datetime import datetime
 from tools.common import basedir
 from tools.t7 import AutoLayout as T7
 def recvdata(conn, path):
    """
    接受文件
    :param conn:
    :param path:
    :return:
    """
    header_size = struct.unpack('i', conn.recv(4))[0]
    header_bytes = conn.recv(header_size)
    header_json = header_bytes.decode('utf-8')
    header_dic = json.loads(header_json)
    content_len = header_dic['contentlen']
    content_name = header_dic['contentname']
    chipnum = header_dic['chipnum']
    recv_len = 0
    fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
    file = open(fielpath, 'wb')
    while recv_len < content_len:
        correntrecv = conn.recv(1024 * 1000)
        file.write(correntrecv)
        recv_len += len(correntrecv)
    file.close()
    return fielpath, chipnum
 def senddata(conn, path, message=None):
    name = os.path.basename(os.path.realpath(path))
    if not message:
        with open(path, 'rb') as file:
            content = file.read()
        headerdic = dict(
            contentlen=len(content),
            contentname=name
        )
        headerjson = json.dumps(headerdic)
        headerbytes = headerjson.encode('utf-8')
        headersize = len(headerbytes)
        conn.send(struct.pack('i', headersize))
        conn.send(headerbytes)
        conn.sendall(content)
    else:
        headerdic = dict(
            contentlen=len(path),
            contentname='message'
        )
        headerjson = json.dumps(headerdic)
        headerbytes = headerjson.encode('utf-8')
        headersize = len(headerbytes)
        conn.send(struct.pack('i', headersize))
        conn.send(headerbytes)
        conn.sendall(path.encode('utf-8'))
 def server():
    myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    adrss = ("", 8191)
    myserver.bind(adrss)
    myserver.listen(5)
    while True:
        try:
            myclient, adddr = myserver.accept()
            recv_content, chipnum = recvdata(myclient, os.path.join(basedir, 'example'))
            layout = T7(recv_content, chipnum)
            outputpath = layout.run()
            senddata(myclient, outputpath)
        except Exception as e:
            print(e)
            continue
 if __name__ == '__main__':
    if len(sys.argv) > 1:
        layout = T7(sys.argv[1], sys.argv[2])
        outputpath = layout.run()
    else:
        server()
--- a/example/06051713_0526_T7_1.xlsx
+++ b/example/06051713_0526_T7_1.xlsx
--- a/example/06051727_0526_T7_1(1).xlsx
+++ b/example/06051727_0526_T7_1(1).xlsx
--- a/example/06061757_0522_T7_1new.xlsx
+++ b/example/06061757_0522_T7_1new.xlsx
--- a/example/06061800_0522_T7_1new.xlsx
+++ b/example/06061800_0522_T7_1new.xlsx
--- a/example/06061803_0522_T7_1new.xlsx
+++ b/example/06061803_0522_T7_1new.xlsx
--- a/example/06061805_0522_T7_1new.xlsx
+++ b/example/06061805_0522_T7_1new.xlsx
--- a/example/06070902_0607_T7_1_散样.xlsx
+++ b/example/06070902_0607_T7_1_散样.xlsx
--- a/example/06070904_0607_T7_1_散样.xlsx
+++ b/example/06070904_0607_T7_1_散样.xlsx
--- a/example/06071517_张超测试.xlsx
+++ b/example/06071517_张超测试.xlsx
--- a/example/06071529_张超测试.xlsx
+++ b/example/06071529_张超测试.xlsx
--- a/example/06071530_张超测试.xlsx
+++ b/example/06071530_张超测试.xlsx
--- a/example/06071700_张超测试.xlsx
+++ b/example/06071700_张超测试.xlsx
--- a/example/06071712_202306071702-pq.xlsx
+++ b/example/06071712_202306071702-pq.xlsx
--- a/example/06071714_202306071702-pq.xlsx
+++ b/example/06071714_202306071702-pq.xlsx
--- a/example/06071746_0608.xlsx
+++ b/example/06071746_0608.xlsx
--- a/example/06071808_0608(1)(1).xlsx
+++ b/example/06071808_0608(1)(1).xlsx
--- a/example/06081359_0608(1)(1).xlsx
+++ b/example/06081359_0608(1)(1).xlsx
--- a/example/06081800_T7-6-8.xlsx
+++ b/example/06081800_T7-6-8.xlsx
--- a/example/06081801_T7-6-8.xlsx
+++ b/example/06081801_T7-6-8.xlsx
--- a/example/06090958_06081801_T7-6-8.xlsx
+++ b/example/06090958_06081801_T7-6-8.xlsx
--- a/example/06211426_包lane广西(1).xlsx
+++ b/example/06211426_包lane广西(1).xlsx
--- a/example/06211429_包lane广西.xlsx
+++ b/example/06211429_包lane广西.xlsx
--- a/example/06211548_06211429_包lane广西.xlsx
+++ b/example/06211548_06211429_包lane广西.xlsx
--- a/result/assignments_06051713_06051713_0526_T7_1.xlsx
+++ b/result/assignments_06051713_06051713_0526_T7_1.xlsx
--- a/result/assignments_06051727_06051727_0526_T7_1(1).xlsx
+++ b/result/assignments_06051727_06051727_0526_T7_1(1).xlsx
--- a/result/assignments_06061805_06061805_0522_T7_1new.xlsx
+++ b/result/assignments_06061805_06061805_0522_T7_1new.xlsx
--- a/result/assignments_06070904_06070904_0607_T7_1_散样.xlsx
+++ b/result/assignments_06070904_06070904_0607_T7_1_散样.xlsx
--- a/result/assignments_06071517_06071517_张超测试.xlsx
+++ b/result/assignments_06071517_06071517_张超测试.xlsx
--- a/result/assignments_06071529_06071529_张超测试.xlsx
+++ b/result/assignments_06071529_06071529_张超测试.xlsx
--- a/result/assignments_06071530_06071530_张超测试.xlsx
+++ b/result/assignments_06071530_06071530_张超测试.xlsx
--- a/result/assignments_06071700_06071700_张超测试.xlsx
+++ b/result/assignments_06071700_06071700_张超测试.xlsx
--- a/result/assignments_06071808_06071808_0608(1)(1).xlsx
+++ b/result/assignments_06071808_06071808_0608(1)(1).xlsx
--- a/result/assignments_06081359_06081359_0608(1)(1).xlsx
+++ b/result/assignments_06081359_06081359_0608(1)(1).xlsx
--- a/result/assignments_06081801_06081801_T7-6-8.xlsx
+++ b/result/assignments_06081801_06081801_T7-6-8.xlsx
--- a/result/assignments_06090959_06090958_06081801_T7-6-8.xlsx
+++ b/result/assignments_06090959_06090958_06081801_T7-6-8.xlsx
--- a/result/assignments_06211429_06211429_包lane广西.xlsx
+++ b/result/assignments_06211429_06211429_包lane广西.xlsx
--- a/result/assignments_06211548_06211548_06211429_包lane广西.xlsx
+++ b/result/assignments_06211548_06211548_06211429_包lane广西.xlsx
--- a/result/assignments_06300937_06300937_t1.xlsx
+++ b/result/assignments_06300937_06300937_t1.xlsx
--- a/result/assignments_07041654_07041654_0704_nova_1.xlsx
+++ b/result/assignments_07041654_07041654_0704_nova_1.xlsx
--- a/result/assignments_07051629_07051629_0608(1)(1).xlsx
+++ b/result/assignments_07051629_07051629_0608(1)(1).xlsx
--- a/result/assignments_07051649_07051649_0704_nova_1.xlsx
+++ b/result/assignments_07051649_07051649_0704_nova_1.xlsx
--- a/rule/exclusive_classfication.xlsx
+++ b/rule/exclusive_classfication.xlsx
--- a/tools/t7.py
+++ b/tools/t7.py
@ -1,10 +1,11 @@
-import pandas as pd
+import os
 import time
 from collections import defaultdict
 from datetime import datetime
 import time
 import os
-from .common import basedir, log
+import pandas as pd
 from tools.common import basedir, log
 class AutoLayout:
@ -12,9 +13,10 @@ class AutoLayout:
    自动化派样
    """
-    def __init__(self, path, output=basedir, data_limit=1520):
+    def __init__(self, path, chipnum, output=basedir, data_limit=1520):
        self.path = path
        self.output = output
        self.chipnum = int(chipnum)
        self.data_limit = data_limit
        self.index_assignments = defaultdict(list)
@ -30,11 +32,14 @@ class AutoLayout:
        self.loc_chip_num = 1
        # 芯片客户
        self.chip_customer = defaultdict(set)
        # 文库
        self.chip_classification = defaultdict(set)
        self.rule = self.read_rule()
        # 甲基化文库不大于200,WGBS文库不大于200G
        self.chip_speciallib_size = dict()
        self.logger = log(os.path.basename(f'{path}.txt'))
        self.return_log = list()
        self.no_assign_data = list()
    def read_excel(self):
        """
@ -71,6 +76,7 @@ class AutoLayout:
                self.chip_speciallib_size[chipname] += library_data['size']
        self.chip_customer[chipname].add(library_data['customer'])
        self.chip_classification[chipname].add(library_data['classification'])
    def add_new_chip(self, library_data):
        """
@ -109,6 +115,7 @@ class AutoLayout:
    def dec_barcode_radio(self, chipname):
        data = self.index_assignments[chipname]
        df = pd.DataFrame(data)
        df['barcode'] = df['barcode'].str.slice(0, 16)
        barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
                                  columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
        total = barcode_df['data_needed'].sum()
@ -124,18 +131,26 @@ class AutoLayout:
                base_N_size = 0
            col_df['ratio'] = (col_df['data_needed']) / (total - base_N_size)
-            is_need_base = col_df.index[col_df['ratio'] < 0.088]
+            # is_need_base = col_df.index[col_df['ratio'] < 0.088]
            need_base_list = list(is_need_base)
            A, B, C, D = list(), list(), list(), list(),
            need_base_list = list()
            ratio = col_df['ratio'].to_dict()
            for decbase in ['A', 'T', 'C', 'G']:
                if decbase not in ratio:
                    ratio[decbase] = 0
-                    need_base_list.append(decbase)
+                if ratio[decbase] >= 0.6:
-            # 小于标准的base 是不是空的，空的说明都满足
+                    A.append(decbase)
-            if need_base_list:
+                if 0.2 <= ratio[decbase] < 0.6:
                    B.append(decbase)
                if 0.08 <= ratio[decbase] < 0.2:
                    C.append(decbase)
                if ratio[decbase] <= 0.8:
                    D.append(decbase)
            if not ((len(B) + len(C) == 4) or (len(D) == 1 and len(C) == 3)):
                is_not_balance_list.append(
-                    '%s 第%s位置, %s 有碱基不平衡，算出结果为 %s' % (chipname, i, need_base_list, ratio)
+                    '%s 第%s位置,有碱基不平衡，算出结果为 %s' % (chipname, i, ratio)
                )
        if len(is_not_balance_list) > 2:
@ -145,46 +160,49 @@ class AutoLayout:
    @staticmethod
    def level(row):
-        if row['customer'] == '贞固':
+
-            return 1
+        today_date = datetime.now()
-        if row['split_method'] == '极致周期':
+
        # 将时间字符串转换为 datetime 对象
        # mytime = datetime.strptime(row['time'], "%Y-%m-%d")
        # mytime = row['time'].strftime("%Y-%m-%d")
        mytime = row['time']
        if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
            return 2
-        # 医沐、清港泉、WES（自己建库）也是极致周期，
+        # 判断日期是之前的还是之后的
-        if row['customer'] == '医沐' or row['customer'] == '清港泉':
+        if mytime < today_date:
            return 3
-        # 赛福、桐树基因的文库尽量跟极致周期测人的样本排一起上机
+        if '加急' in row['priority']:
-        if row['customer'] == '赛福' or row['customer'] == '桐树基因':
+            return 4
            return 7
-        if row['classification'] == 'Nextera':
+        if '补测' in row['priority']:
            return 5
        if '华大' in row['classification']:
            return 6
        else:
            return 100
    @staticmethod
    def read_rule():
-        df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive.xlsx'))
+        df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
        newdf = pd.DataFrame()
-        newdf['customer1'] = df['customer2']
+        newdf['c1'] = df['c2']
-        newdf['customer1'] = df['customer1']
+        newdf['c2'] = df['c1']
-        return pd.concat([df, newdf])
+        res = pd.concat([df, newdf])
        return res.reset_index()
-    def use_rule(self, chipname, customer):
+    def use_rule(self, chipname, classfication):
-        may_customer = set(self.rule[self.rule['customer1'] == customer]['customer2'])
+        may_classfic= set(self.rule[self.rule['c1'] == classfication]['c2'])
-        if self.chip_customer[chipname].intersection(may_customer):
+        if self.chip_customer[chipname].intersection(may_classfic):
            return True
        return False
    def judge_data(self, chipname, library_data):
        size = library_data['size']
-        customer = library_data['customer']
+        # customer = library_data['customer']
        library = library_data['library']
        classification = library_data['classification']
        # 芯片大小不能超过设定限制
        sizelimit = True
@ -196,37 +214,71 @@ class AutoLayout:
        if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}):
            notrepeatbarcode = False
            self.logger.error(f'{library} {chipname} 文库有barcode重复')
-        # 互斥的客户
+        # # 互斥的客户
-        exclusivecostom = True
+        # exclusivecostom = True
-        if self.use_rule(chipname, customer):
+        # if self.use_rule(chipname, customer):
-            exclusivecostom = False
+        #     exclusivecostom = False
        #     self.logger.error(f'{library} {chipname} 有互斥单位')
        # 互斥的文库
        exclusive_classific= True
        if self.use_rule(chipname, classification):
            exclusive_classific= False
            self.logger.error(f'{library} {chipname} 有互斥单位')
        # 不平衡文库大于200G 不能添加
        splibrary = True
-        if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \
+        if classification in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \
                and self.chip_speciallib_size[chipname] + size > 200:
            splibrary = False
            self.logger.error(f'{library} {chipname} 不平衡文库相加大于设定限制')
-        if sizelimit and notrepeatbarcode and exclusivecostom and splibrary:
+        if sizelimit and notrepeatbarcode and exclusive_classific and splibrary:
            return True
        return False
    def assign_samples(self):
        ori_library_data = list()
        if '未测' not in self.ori_data.keys():
            raise UserWarning('提供excel没有 未测 sheet ,请核查！')
        ori_library_df = pd.DataFrame(self.ori_data['未测'])
        need_col = ['#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer',
                    'classification', 'priority', 'time', '拆分方式', 'barcode'
                    ]
        get_col = set(ori_library_df.columns)
        unhave_col = set(need_col) - get_col
        if unhave_col:
            unhave_fom = '; '.join(unhave_col)
            raise UserWarning(f'未测表里没有{unhave_fom}  表头,请核查！')
        numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
        time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()
        ori_library_df['note'] = ''
        ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
        ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
        need_col.append('note')
        self.no_assign_data.extend(ori_library_df[~(numeric_mask & time_mask)].to_dict('records'))
        # 使用布尔索引筛选出不是数字和非日期的行
        ori_library_df = ori_library_df[numeric_mask & time_mask]
        ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
        for library, library_df in ori_library_df.groupby('#library'):
            ori_library_data.append(dict(
                library=library,
                size=library_df['data_needed'].sum(),
-                split_method=library_df['split_method'].values[0],
+                split_method=library_df['拆分方式'].values[0],
                time=library_df['time'].values[0],
                level=library_df['level'].values[0],
                customer=library_df['customer'].values[0],
                classification=library_df['classification'].values[0],
-                data=library_df.to_dict('records')
+                data=library_df[need_col].to_dict('records')
            ))
-        ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], -x['size'], x['time']))
+        ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
        i = 0
        while ori_sort_data:
@ -263,6 +315,7 @@ class AutoLayout:
        pass
    def run(self):
        self.assign_samples()
        try:
            self.assign_samples()
        except Exception as e:
@ -271,19 +324,21 @@ class AutoLayout:
        outputname = 'assignments_%s_%s' % (datetime.now().strftime("%m%d%H%M"), os.path.basename(self.path))
        outputpath = os.path.join(self.output, 'result', outputname)
        writer = pd.ExcelWriter(outputpath)
-        no_assign_data = list()
+
        chip_loc = 1
        for chip_idx, chip_assignments in self.index_assignments.items():
            self.dec_barcode_radio(chip_idx)
            df = pd.DataFrame(chip_assignments)
-            if df['data_needed'].sum() < 1400:
+            if df['data_needed'].sum() < 1400 or chip_loc > self.chipnum:
-                no_assign_data.extend(chip_assignments)
+                self.no_assign_data.extend(chip_assignments)
                continue
-            if '极致周期' in df['split_method'].values:
+            if '极致' in df['拆分方式'].values:
                addname = 'X'
            else:
                addname = ''
            self.dec_barcode_radio(chip_idx)
            df.to_excel(writer, sheet_name=addname + chip_idx, index=False)
-        pd.DataFrame(no_assign_data).to_excel(writer, sheet_name='未测', index=False)
+            chip_loc += 1
        pd.DataFrame(self.no_assign_data).to_excel(writer, sheet_name='未测', index=False)
        if self.return_log:
            pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
        writer.close()
@ -292,9 +347,10 @@ class AutoLayout:
 if __name__ == '__main__':
    start_time = time.time()
-    excel_file = 'example/07031754_20230703.xlsx'
+    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx')
    # excel_file = 'example/input排样表.xlsx'
    output_file = ''
-    layout = AutoLayout(excel_file, output_file)
+    layout = AutoLayout(filepath, output_file)
    layout.run()
    end_time = time.time()
    execution_time = end_time - start_time