main
chaopower 2023-12-07 17:47:53 +08:00
parent 3b36898ba2
commit 1bffac2029
45 changed files with 287 additions and 48 deletions

96
T7_client.py 100644
View File

@ -0,0 +1,96 @@
import json
import os
import socket
import struct
import sys
import PySimpleGUI as sg
def recvdata(conn, filepath):
header_size = struct.unpack('i', conn.recv(4))[0]
header_bytes = conn.recv(header_size)
header_json = header_bytes.decode('utf-8')
header_dic = json.loads(header_json)
content_len = header_dic['contentlen']
content_name = header_dic['contentname']
recv_len = 0
pdf = os.path.join(filepath, content_name)
with open(pdf, 'wb') as file:
while recv_len < content_len:
correntrecv = conn.recv(1024 * 1000)
file.write(correntrecv)
recv_len += len(correntrecv)
def senddata(conn, path, chipum):
name = os.path.basename(os.path.realpath(path))
try:
with open(path, 'rb') as file:
content = file.read()
headerdic = dict(
contentlen=len(content),
contentname=name,
chipum=chipum
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(content)
except ConnectionResetError:
print('不存在这个文件!')
def connect():
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
dest_ip = '192.168.11.121'
dest_port = int(8191)
client.connect((dest_ip, dest_port))
return client
def transclient(sendfile, resfile, chipnum):
conn = connect()
senddata(conn, sendfile, chipnum)
recvdata(conn, resfile)
def make_gui():
sg.theme('DarkBlack1')
layout = [
[sg.Text('排样芯片数'), sg.Spin([i for i in range(20)], initial_value=5, size=(3, 1), key='_CHIPNUM_')],
[sg.Text()],
[
sg.Text('导入排样excel')],
[
sg.Input(key='_FILE1_'), sg.FileBrowse('选择文件')],
[sg.Text()],
[
sg.Text('生成排样位置')],
[
sg.Input(key='_FILE2_'), sg.FolderBrowse('选择文件夹')],
[sg.Text()],
[
sg.OK('生成'), sg.Cancel('取消')]]
# iconpath = os.path.join(os.path.abspath(sys.path[0]), 'other', 'icon.ico')
window = sg.Window('解码排样T7程序', layout, font='Helvetica 11')
while True:
event, values = window.Read()
if event == '生成':
if not values['_FILE1_'] or not values['_FILE2_']:
sg.popup_non_blocking('请正确提供参数')
else:
transclient(values['_FILE1_'], os.path.join(values['_FILE2_']), values['_CHIPNUM_'])
sg.Popup('排样成功!')
window.Close()
else:
window.Close()
break
if __name__ == '__main__':
make_gui()

87
T7_server.py 100644
View File

@ -0,0 +1,87 @@
import json
import os
import socket
import struct
import sys
from datetime import datetime
from tools.common import basedir
from tools.t7 import AutoLayout as T7
def recvdata(conn, path):
"""
接受文件
:param conn:
:param path:
:return:
"""
header_size = struct.unpack('i', conn.recv(4))[0]
header_bytes = conn.recv(header_size)
header_json = header_bytes.decode('utf-8')
header_dic = json.loads(header_json)
content_len = header_dic['contentlen']
content_name = header_dic['contentname']
chipnum = header_dic['chipnum']
recv_len = 0
fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
file = open(fielpath, 'wb')
while recv_len < content_len:
correntrecv = conn.recv(1024 * 1000)
file.write(correntrecv)
recv_len += len(correntrecv)
file.close()
return fielpath, chipnum
def senddata(conn, path, message=None):
name = os.path.basename(os.path.realpath(path))
if not message:
with open(path, 'rb') as file:
content = file.read()
headerdic = dict(
contentlen=len(content),
contentname=name
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(content)
else:
headerdic = dict(
contentlen=len(path),
contentname='message'
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(path.encode('utf-8'))
def server():
myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
adrss = ("", 8191)
myserver.bind(adrss)
myserver.listen(5)
while True:
try:
myclient, adddr = myserver.accept()
recv_content, chipnum = recvdata(myclient, os.path.join(basedir, 'example'))
layout = T7(recv_content, chipnum)
outputpath = layout.run()
senddata(myclient, outputpath)
except Exception as e:
print(e)
continue
if __name__ == '__main__':
if len(sys.argv) > 1:
layout = T7(sys.argv[1], sys.argv[2])
outputpath = layout.run()
else:
server()

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,10 +1,11 @@
import pandas as pd import os
import time
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
import time
import os
from .common import basedir, log import pandas as pd
from tools.common import basedir, log
class AutoLayout: class AutoLayout:
@ -12,9 +13,10 @@ class AutoLayout:
自动化派样 自动化派样
""" """
def __init__(self, path, output=basedir, data_limit=1520): def __init__(self, path, chipnum, output=basedir, data_limit=1520):
self.path = path self.path = path
self.output = output self.output = output
self.chipnum = int(chipnum)
self.data_limit = data_limit self.data_limit = data_limit
self.index_assignments = defaultdict(list) self.index_assignments = defaultdict(list)
@ -30,11 +32,14 @@ class AutoLayout:
self.loc_chip_num = 1 self.loc_chip_num = 1
# 芯片客户 # 芯片客户
self.chip_customer = defaultdict(set) self.chip_customer = defaultdict(set)
# 文库
self.chip_classification = defaultdict(set)
self.rule = self.read_rule() self.rule = self.read_rule()
# 甲基化文库不大于200,WGBS文库不大于200G # 甲基化文库不大于200,WGBS文库不大于200G
self.chip_speciallib_size = dict() self.chip_speciallib_size = dict()
self.logger = log(os.path.basename(f'{path}.txt')) self.logger = log(os.path.basename(f'{path}.txt'))
self.return_log = list() self.return_log = list()
self.no_assign_data = list()
def read_excel(self): def read_excel(self):
""" """
@ -71,6 +76,7 @@ class AutoLayout:
self.chip_speciallib_size[chipname] += library_data['size'] self.chip_speciallib_size[chipname] += library_data['size']
self.chip_customer[chipname].add(library_data['customer']) self.chip_customer[chipname].add(library_data['customer'])
self.chip_classification[chipname].add(library_data['classification'])
def add_new_chip(self, library_data): def add_new_chip(self, library_data):
""" """
@ -109,6 +115,7 @@ class AutoLayout:
def dec_barcode_radio(self, chipname): def dec_barcode_radio(self, chipname):
data = self.index_assignments[chipname] data = self.index_assignments[chipname]
df = pd.DataFrame(data) df = pd.DataFrame(data)
df['barcode'] = df['barcode'].str.slice(0, 16)
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values, barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
columns=['T' + str(x) for x in range(16)]).join(df['data_needed']) columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
total = barcode_df['data_needed'].sum() total = barcode_df['data_needed'].sum()
@ -124,18 +131,26 @@ class AutoLayout:
base_N_size = 0 base_N_size = 0
col_df['ratio'] = (col_df['data_needed']) / (total - base_N_size) col_df['ratio'] = (col_df['data_needed']) / (total - base_N_size)
is_need_base = col_df.index[col_df['ratio'] < 0.088] # is_need_base = col_df.index[col_df['ratio'] < 0.088]
need_base_list = list(is_need_base)
A, B, C, D = list(), list(), list(), list(),
need_base_list = list()
ratio = col_df['ratio'].to_dict() ratio = col_df['ratio'].to_dict()
for decbase in ['A', 'T', 'C', 'G']: for decbase in ['A', 'T', 'C', 'G']:
if decbase not in ratio: if decbase not in ratio:
ratio[decbase] = 0 ratio[decbase] = 0
need_base_list.append(decbase) if ratio[decbase] >= 0.6:
# 小于标准的base 是不是空的,空的说明都满足 A.append(decbase)
if need_base_list: if 0.2 <= ratio[decbase] < 0.6:
B.append(decbase)
if 0.08 <= ratio[decbase] < 0.2:
C.append(decbase)
if ratio[decbase] <= 0.8:
D.append(decbase)
if not ((len(B) + len(C) == 4) or (len(D) == 1 and len(C) == 3)):
is_not_balance_list.append( is_not_balance_list.append(
'%s%s位置, %s 有碱基不平衡,算出结果为 %s' % (chipname, i, need_base_list, ratio) '%s%s位置,有碱基不平衡,算出结果为 %s' % (chipname, i, ratio)
) )
if len(is_not_balance_list) > 2: if len(is_not_balance_list) > 2:
@ -145,46 +160,49 @@ class AutoLayout:
@staticmethod @staticmethod
def level(row): def level(row):
if row['customer'] == '贞固':
return 1 today_date = datetime.now()
if row['split_method'] == '极致周期':
# 将时间字符串转换为 datetime 对象
# mytime = datetime.strptime(row['time'], "%Y-%m-%d")
# mytime = row['time'].strftime("%Y-%m-%d")
mytime = row['time']
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 2 return 2
# 医沐、清港泉、WES自己建库也是极致周期 # 判断日期是之前的还是之后的
if row['customer'] == '医沐' or row['customer'] == '清港泉': if mytime < today_date:
return 3 return 3
# 赛福、桐树基因的文库尽量跟极致周期测人的样本排一起上机 if '加急' in row['priority']:
if row['customer'] == '赛福' or row['customer'] == '桐树基因': return 4
return 7
if row['classification'] == 'Nextera': if '补测' in row['priority']:
return 5 return 5
if '华大' in row['classification']:
return 6
else: else:
return 100 return 100
@staticmethod @staticmethod
def read_rule(): def read_rule():
df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive.xlsx')) df = pd.read_excel(os.path.join(basedir, 'rule', 'exclusive_classfication.xlsx'))
newdf = pd.DataFrame() newdf = pd.DataFrame()
newdf['customer1'] = df['customer2'] newdf['c1'] = df['c2']
newdf['customer1'] = df['customer1'] newdf['c2'] = df['c1']
return pd.concat([df, newdf]) res = pd.concat([df, newdf])
return res.reset_index()
def use_rule(self, chipname, customer): def use_rule(self, chipname, classfication):
may_customer = set(self.rule[self.rule['customer1'] == customer]['customer2']) may_classfic= set(self.rule[self.rule['c1'] == classfication]['c2'])
if self.chip_customer[chipname].intersection(may_customer): if self.chip_customer[chipname].intersection(may_classfic):
return True return True
return False return False
def judge_data(self, chipname, library_data): def judge_data(self, chipname, library_data):
size = library_data['size'] size = library_data['size']
customer = library_data['customer'] # customer = library_data['customer']
library = library_data['library'] library = library_data['library']
classification = library_data['classification']
# 芯片大小不能超过设定限制 # 芯片大小不能超过设定限制
sizelimit = True sizelimit = True
@ -196,37 +214,71 @@ class AutoLayout:
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}): if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}):
notrepeatbarcode = False notrepeatbarcode = False
self.logger.error(f'{library} {chipname} 文库有barcode重复') self.logger.error(f'{library} {chipname} 文库有barcode重复')
# 互斥的客户 # # 互斥的客户
exclusivecostom = True # exclusivecostom = True
if self.use_rule(chipname, customer): # if self.use_rule(chipname, customer):
exclusivecostom = False # exclusivecostom = False
# self.logger.error(f'{library} {chipname} 有互斥单位')
# 互斥的文库
exclusive_classific= True
if self.use_rule(chipname, classification):
exclusive_classific= False
self.logger.error(f'{library} {chipname} 有互斥单位') self.logger.error(f'{library} {chipname} 有互斥单位')
# 不平衡文库大于200G 不能添加 # 不平衡文库大于200G 不能添加
splibrary = True splibrary = True
if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \ if classification in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \
and self.chip_speciallib_size[chipname] + size > 200: and self.chip_speciallib_size[chipname] + size > 200:
splibrary = False splibrary = False
self.logger.error(f'{library} {chipname} 不平衡文库相加大于设定限制') self.logger.error(f'{library} {chipname} 不平衡文库相加大于设定限制')
if sizelimit and notrepeatbarcode and exclusivecostom and splibrary: if sizelimit and notrepeatbarcode and exclusive_classific and splibrary:
return True return True
return False return False
def assign_samples(self): def assign_samples(self):
ori_library_data = list() ori_library_data = list()
if '未测' not in self.ori_data.keys():
raise UserWarning('提供excel没有 未测 sheet ,请核查!')
ori_library_df = pd.DataFrame(self.ori_data['未测']) ori_library_df = pd.DataFrame(self.ori_data['未测'])
need_col = ['#library', 'sublibrary', 'i5', 'i7', 'data_needed', 'real_data', 'customer',
'classification', 'priority', 'time', '拆分方式', 'barcode'
]
get_col = set(ori_library_df.columns)
unhave_col = set(need_col) - get_col
if unhave_col:
unhave_fom = '; '.join(unhave_col)
raise UserWarning(f'未测表里没有{unhave_fom} 表头,请核查!')
numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna()
time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna()
ori_library_df['note'] = ''
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
need_col.append('note')
self.no_assign_data.extend(ori_library_df[~(numeric_mask & time_mask)].to_dict('records'))
# 使用布尔索引筛选出不是数字和非日期的行
ori_library_df = ori_library_df[numeric_mask & time_mask]
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('#library'):
ori_library_data.append(dict( ori_library_data.append(dict(
library=library, library=library,
size=library_df['data_needed'].sum(), size=library_df['data_needed'].sum(),
split_method=library_df['split_method'].values[0], split_method=library_df['拆分方式'].values[0],
time=library_df['time'].values[0], time=library_df['time'].values[0],
level=library_df['level'].values[0], level=library_df['level'].values[0],
customer=library_df['customer'].values[0], customer=library_df['customer'].values[0],
classification=library_df['classification'].values[0], classification=library_df['classification'].values[0],
data=library_df.to_dict('records') data=library_df[need_col].to_dict('records')
)) ))
ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], -x['size'], x['time'])) ori_sort_data = sorted(ori_library_data, key=lambda x: (x['level'], x['time']))
i = 0 i = 0
while ori_sort_data: while ori_sort_data:
@ -263,6 +315,7 @@ class AutoLayout:
pass pass
def run(self): def run(self):
self.assign_samples()
try: try:
self.assign_samples() self.assign_samples()
except Exception as e: except Exception as e:
@ -271,19 +324,21 @@ class AutoLayout:
outputname = 'assignments_%s_%s' % (datetime.now().strftime("%m%d%H%M"), os.path.basename(self.path)) outputname = 'assignments_%s_%s' % (datetime.now().strftime("%m%d%H%M"), os.path.basename(self.path))
outputpath = os.path.join(self.output, 'result', outputname) outputpath = os.path.join(self.output, 'result', outputname)
writer = pd.ExcelWriter(outputpath) writer = pd.ExcelWriter(outputpath)
no_assign_data = list()
chip_loc = 1
for chip_idx, chip_assignments in self.index_assignments.items(): for chip_idx, chip_assignments in self.index_assignments.items():
self.dec_barcode_radio(chip_idx)
df = pd.DataFrame(chip_assignments) df = pd.DataFrame(chip_assignments)
if df['data_needed'].sum() < 1400: if df['data_needed'].sum() < 1400 or chip_loc > self.chipnum:
no_assign_data.extend(chip_assignments) self.no_assign_data.extend(chip_assignments)
continue continue
if '极致周期' in df['split_method'].values: if '极致' in df['拆分方式'].values:
addname = 'X' addname = 'X'
else: else:
addname = '' addname = ''
self.dec_barcode_radio(chip_idx)
df.to_excel(writer, sheet_name=addname + chip_idx, index=False) df.to_excel(writer, sheet_name=addname + chip_idx, index=False)
pd.DataFrame(no_assign_data).to_excel(writer, sheet_name='未测', index=False) chip_loc += 1
pd.DataFrame(self.no_assign_data).to_excel(writer, sheet_name='未测', index=False)
if self.return_log: if self.return_log:
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False) pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
writer.close() writer.close()
@ -292,9 +347,10 @@ class AutoLayout:
if __name__ == '__main__': if __name__ == '__main__':
start_time = time.time() start_time = time.time()
excel_file = 'example/07031754_20230703.xlsx' filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'example', 'input排样表.xlsx')
# excel_file = 'example/input排样表.xlsx'
output_file = '' output_file = ''
layout = AutoLayout(excel_file, output_file) layout = AutoLayout(filepath, output_file)
layout.run() layout.run()
end_time = time.time() end_time = time.time()
execution_time = end_time - start_time execution_time = end_time - start_time