Compare commits

..

4 Commits

Author SHA1 Message Date
chaopower 82c31b8d56 更新 2024-04-19 17:01:37 +08:00
chaopower 62ca5126be 更新 2024-03-30 21:42:23 +08:00
chaopower 70355323d1 微调 2024-03-22 15:43:06 +08:00
chaopower 1a936abc29 删除一些文件 2024-03-22 14:21:23 +08:00
5 changed files with 346 additions and 173 deletions

116
T7_client_sanwei.py 100644
View File

@ -0,0 +1,116 @@
import json
import os
import socket
import struct
import PySimpleGUI as sg
def recvdata(conn, filepath):
header_size = struct.unpack('i', conn.recv(4))[0]
header_bytes = conn.recv(header_size)
header_json = header_bytes.decode('utf-8')
header_dic = json.loads(header_json)
content_len = header_dic['contentlen']
content_name = header_dic['contentname']
recv_len = 0
pdf = os.path.join(filepath, content_name)
with open(pdf, 'wb') as file:
while recv_len < content_len:
correntrecv = conn.recv(1024 * 1000)
file.write(correntrecv)
recv_len += len(correntrecv)
def senddata(conn, path, librarynum, is_use_balance, is_use_max, datalimit, datalower):
name = os.path.basename(os.path.realpath(path))
try:
with open(path, 'rb') as file:
content = file.read()
headerdic = dict(
contentlen=len(content),
contentname=name,
librarynum=librarynum,
is_use_balance=is_use_balance,
is_use_max=is_use_max,
datalimit=datalimit,
datalower=datalower
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(content)
except ConnectionResetError:
print('不存在这个文件!')
def connect():
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
dest_ip = '192.168.38.90'
dest_port = int(8291)
client.connect((dest_ip, dest_port))
return client
def transclient(sendfile, resfile, librarynum, is_use_balance, is_use_max, datalimit, datalower):
conn = connect()
senddata(conn, sendfile, librarynum, is_use_balance, is_use_max, datalimit, datalower)
recvdata(conn, resfile)
def make_gui():
sg.theme('DarkGreen1')
layout = [
[sg.Text('排样管数'), sg.Spin([i for i in range(150)], initial_value=130, size=(3, 1), key='_LIBRARYNUM_'),
sg.Text('单芯片量上限'), sg.Spin([i for i in range(2000)], initial_value=1750, size=(4, 1), key='_DATALIMIT_'),
sg.Text('单芯片量下限'), sg.Spin([i for i in range(2000)], initial_value=1700, size=(4, 1), key='_DATALOWER_')
],
[sg.Text()],
[sg.Text('使用平衡文库'), sg.Radio("", "is_use_balance", key='is_use_balance_key', default=True),
sg.Radio("", "is_use_balance", key='is_not_use_balance_key')],
[sg.Text('使用扩容平衡性'), sg.Radio("", "is_use_max", key='is_use_max_key'),
sg.Radio("", "is_use_max", key='is_not_use_max_key', default=True)],
[sg.Text()],
[
sg.Text('导入排样excel')],
[
sg.Input(key='_FILE1_'), sg.FileBrowse('选择文件')],
[sg.Text()],
[
sg.Text('生成排样位置')],
[
sg.Input(key='_FILE2_'), sg.FolderBrowse('选择文件夹')],
[sg.Text()],
[
sg.OK('生成'), sg.Cancel('取消')]]
# iconpath = os.path.join(os.path.abspath(sys.path[0]), 'other', 'icon.ico')
window = sg.Window('解码排样T7程序_sanwei', layout, font='Helvetica 11', icon=r'D:\project\autulayout\other\icon.ico')
while True:
event, values = window.read()
if event == sg.WINDOW_CLOSED:
# 用户关闭了窗口,终止循环
break
is_use_balance = 1 if values['is_use_balance_key'] else 0
is_use_max = 1 if values['is_use_max_key'] else 0
print(is_use_balance, is_use_max)
if event == '生成':
if not values['_FILE1_'] or not values['_FILE2_']:
sg.popup_non_blocking('请正确提供参数')
else:
transclient(values['_FILE1_'], os.path.join(values['_FILE2_']), values['_LIBRARYNUM_'],
is_use_balance, is_use_max, values['_DATALIMIT_'], values['_DATALOWER_'])
sg.Popup('排样成功!')
window.Close()
else:
window.Close()
break
if __name__ == '__main__':
make_gui()

View File

@ -0,0 +1,96 @@
import json
import os
import socket
import struct
import sys
from datetime import datetime
from tools.common import basedir
from tools.t7 import AutoLayout as T7
def recvdata(conn, path):
"""
接受文件
:param conn:
:param path:
:return:
"""
header_size = struct.unpack('i', conn.recv(4))[0]
header_bytes = conn.recv(header_size)
header_json = header_bytes.decode('utf-8')
header_dic = json.loads(header_json)
content_len = header_dic['contentlen']
content_name = header_dic['contentname']
librarynum = header_dic['librarynum']
is_use_balance = header_dic['is_use_balance']
is_use_max = header_dic['is_use_max']
datalimit = header_dic['datalimit']
datalower = header_dic['datalower']
recv_len = 0
fielpath = os.path.join(path, '%s_%s' % (datetime.now().strftime("%m%d%H%M"), content_name))
file = open(fielpath, 'wb')
while recv_len < content_len:
correntrecv = conn.recv(1024 * 1000)
file.write(correntrecv)
recv_len += len(correntrecv)
file.close()
return fielpath, librarynum, is_use_balance, is_use_max, datalimit, datalower
def senddata(conn, path, message=None):
name = os.path.basename(os.path.realpath(path))
if not message:
with open(path, 'rb') as file:
content = file.read()
headerdic = dict(
contentlen=len(content),
contentname=name
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(content)
else:
headerdic = dict(
contentlen=len(path),
contentname='message'
)
headerjson = json.dumps(headerdic)
headerbytes = headerjson.encode('utf-8')
headersize = len(headerbytes)
conn.send(struct.pack('i', headersize))
conn.send(headerbytes)
conn.sendall(path.encode('utf-8'))
def server():
myserver = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
adrss = ("", 8291)
myserver.bind(adrss)
myserver.listen(5)
while True:
try:
myclient, adddr = myserver.accept()
recv_content, librarynum, is_use_balance, is_use_max, datalimit, datalower = recvdata(myclient,
os.path.join(basedir,
'example'))
print(recv_content, librarynum, is_use_balance, is_use_max, datalimit, datalower)
layout = T7(recv_content, librarynum, is_use_balance, is_use_max, data_limit=datalimit,
data_lower=datalower)
outputpath = layout.run()
senddata(myclient, outputpath)
except Exception as e:
print(e)
continue
if __name__ == '__main__':
if len(sys.argv) > 1:
layout = T7(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
outputpath = layout.run()
else:
server()

Binary file not shown.

Binary file not shown.

View File

@ -26,6 +26,8 @@ class AutoLayout:
self.librarynum = int(librarynum) self.librarynum = int(librarynum)
self.data_limit = data_limit self.data_limit = data_limit
self.data_lower = data_lower self.data_lower = data_lower
self.get_col = list()
self.items = list()
# 芯片原始数据读取 # 芯片原始数据读取
self.ori_data = self.read_excel() self.ori_data = self.read_excel()
@ -54,7 +56,7 @@ class AutoLayout:
self.rule = self.read_rule() self.rule = self.read_rule()
self.rule_exclusive_customer = self.read_rule_exclusive_customer() self.rule_exclusive_customer = self.read_rule_exclusive_customer()
# 子文库名称 # subsamplename
self.chip_sublib = defaultdict(set) self.chip_sublib = defaultdict(set)
# 不平衡文库 # 不平衡文库
@ -93,11 +95,12 @@ class AutoLayout:
原始数据处理 原始数据处理
:return: :return:
""" """
merge = pd.read_excel(self.path, None) # 获取表头备注
ori_data = dict() nrow = pd.read_excel(self.path, nrows=1)
for name, sheet in merge.items(): self.items = nrow.to_dict('records')
sheet.fillna('', inplace=True) merge = pd.read_excel(self.path, skiprows=[1])
ori_data[name] = sheet.to_dict('records') merge.fillna('', inplace=True)
ori_data = merge.to_dict('records')
return ori_data return ori_data
@staticmethod @staticmethod
@ -125,26 +128,26 @@ class AutoLayout:
if df.empty: if df.empty:
return ratio_sites, is_not_balance_list return ratio_sites, is_not_balance_list
s, e = 0, 16 s, e = 0, 16
if maxt == 'i7': if maxt == 'indexi7':
s, e = 8, 16 s, e = 8, 16
if maxt == 'i5': if maxt == 'indexi5':
s, e = 0, 8 s, e = 0, 8
num = e - s num = e - s
df['barcode'] = df['barcode'].str.slice(s, e) df['indexi5i7'] = df['indexi5i7'].str.slice(s, e)
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values, barcode_df = pd.DataFrame(df['indexi5i7'].str.split('', expand=True).iloc[:, 1:-1].values,
columns=['T' + str(x) for x in range(num)]).join(df['data_needed']) columns=['T' + str(x) for x in range(num)]).join(df['orderdatavolume'])
total = barcode_df['data_needed'].sum() total = barcode_df['orderdatavolume'].sum()
for i in range(num): for i in range(num):
column = 'T' + str(i) column = 'T' + str(i)
col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'}) col_df = barcode_df.groupby(column).agg({'orderdatavolume': 'sum'})
# 去掉N计数 # 去掉N计数
if 'N' in col_df.index: if 'N' in col_df.index:
base_n_size = col_df.loc['N', 'data_needed'] base_n_size = col_df.loc['N', 'orderdatavolume']
col_df = col_df.drop('N') col_df = col_df.drop('N')
else: else:
base_n_size = 0 base_n_size = 0
col_df['ratio'] = (col_df['data_needed']) / (total - base_n_size) col_df['ratio'] = (col_df['orderdatavolume']) / (total - base_n_size)
ratio = col_df['ratio'].to_dict() ratio = col_df['ratio'].to_dict()
ratio_sites[i] = ratio ratio_sites[i] = ratio
A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list() A, B, C, D, E, F, G = list(), list(), list(), list(), list(), list(), list()
@ -191,25 +194,19 @@ class AutoLayout:
today_date = datetime.now() today_date = datetime.now()
if 'nextera' in row['classification'].lower(): if 'nextera' in row['librarystructure'].lower():
return 1000 return 1000
if '华大' in row['classification']: if '华大' in row['librarystructure']:
return 1100 return 1100
if '超加急' in row['priority']: if row['cycletype'] == '极致周期' or '极致' in row['cycletype']:
return 1500
if row['拆分方式'] == '极致周期' or '极致' in row['拆分方式']:
return 2000 return 2000
if '加急' in row['priority']: if row['retestflag'] == '':
return 3000 return 3000
if '补测' in row['priority']: mytime = row['receivedtime']
return 4000
mytime = row['time']
# 判断日期是之前的还是之后的 # 判断日期是之前的还是之后的
if mytime < today_date: if mytime < today_date:
return 5000 return 5000
@ -221,35 +218,20 @@ class AutoLayout:
""" """
barcode 有重复的极致样本 进行排列组合汇集成新的可能性 barcode 有重复的极致样本 进行排列组合汇集成新的可能性
""" """
# 筛选有重复的行
# same_barcode_data = [data for data in self.ori_lib_data if data['level'] == 1900]
# same_barcode_sorted = sorted(same_barcode_data, key=lambda x: (-x['size']))
#
# same_barcode_dict = dict()
# for index, data in enumerate(same_barcode_sorted):
# same_barcode_dict[data['library']] = data['level'] + index + 1
# correct_data = list()
# for data in self.ori_lib_data:
# if data in same_barcode_sorted:
# data['level'] = same_barcode_dict[data['library']]
# correct_data.append(data)
# self.ori_lib_data = correct_data
same_barcode_df = pd.DataFrame( same_barcode_df = pd.DataFrame(
[spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']]) [spdata for data in self.ori_lib_data if data['level'] == 1900 for spdata in data['data']])
# 按照 'barcode' 列进行分组 # 按照 'indexi5i7' 列进行分组
if same_barcode_df.empty: if same_barcode_df.empty:
return return
grouped = same_barcode_df.groupby('barcode') grouped = same_barcode_df.groupby('indexi5i7')
# 获取具有重复的 'barcode' 分组 # 获取具有重复的 'indexi5i7' 分组
duplicate_groups = grouped.filter(lambda x: len(x) > 1) duplicate_groups = grouped.filter(lambda x: len(x) > 1)
# 提取这些分组,计算文库重复次数 # 提取这些分组,计算文库重复次数
grouped_names = duplicate_groups.groupby('barcode')['#library'].apply(list).reset_index() grouped_names = duplicate_groups.groupby('indexi5i7')['samplename'].apply(list).reset_index()
random_list = list(set(tuple(sublst) for sublst in list(grouped_names['#library']))) random_list = list(set(tuple(sublst) for sublst in list(grouped_names['samplename'])))
new_lst = [spdata for data in random_list for spdata in data] new_lst = [spdata for data in random_list for spdata in data]
counts = Counter(new_lst) counts = Counter(new_lst)
@ -270,17 +252,17 @@ class AutoLayout:
""" """
self.index_assignments[chipname].extend(library_data['data']) self.index_assignments[chipname].extend(library_data['data'])
self.chip_barcode_recode[chipname].update({item['barcode'] for item in library_data['data']}) self.chip_barcode_recode[chipname].update({item['indexi5i7'] for item in library_data['data']})
self.chip_barcodei7_recode[chipname].update({item['i7'] for item in library_data['data']}) self.chip_barcodei7_recode[chipname].update({item['indexi7'] for item in library_data['data']})
self.chip_barcodei5_recode[chipname].update({item['i5'] for item in library_data['data']}) self.chip_barcodei5_recode[chipname].update({item['indexi5'] for item in library_data['data']})
# 华大的 文库 i7 不能重复添加N+i7 # 华大的 文库 i7 不能重复添加N+i7
if '华大' in library_data['classification']: if '华大' in library_data['classification']:
self.chip_barcode_recode[chipname].update({'N' * 8 + item['i7'] for item in library_data['data']}) self.chip_barcode_recode[chipname].update({'N' * 8 + item['indexi7'] for item in library_data['data']})
# self.chip_barcode_recode[chipname].update({item['i5'] + 'N' * 8 for item in library_data['data']}) # self.chip_barcode_recode[chipname].update({item['indexi5'] + 'N' * 8 for item in library_data['data']})
# 子文库 # 子文库
self.chip_sublib[chipname].update({item['sublibrary'] for item in library_data['data']}) self.chip_sublib[chipname].update({item['subsamplename'] for item in library_data['data']})
self.chip_customer[chipname].add(library_data['customer']) self.chip_customer[chipname].add(library_data['customer'])
self.chip_classification[chipname].add(library_data['classification']) self.chip_classification[chipname].add(library_data['classification'])
@ -288,13 +270,11 @@ class AutoLayout:
if newer: if newer:
self.chip_size[chipname] = library_data['size'] self.chip_size[chipname] = library_data['size']
self.chip_size_N[chipname] = 0 self.chip_size_N[chipname] = 0
if 'N' in library_data['data'][0]['barcode']: if 'N' in library_data['data'][0]['indexi5i7']:
# print(library_data['data'][0]['barcode'])
self.chip_size_N[chipname] = library_data['size'] self.chip_size_N[chipname] = library_data['size']
# if library_data['classification'] in ['扩增子', '不平衡文库', '单细胞文库以及甲基化']:
if library_data['is_balance_lib'] == '': if library_data['is_balance_lib'] == '':
self.chip_speciallib_size[chipname] = library_data['size'] self.chip_speciallib_size[chipname] = library_data['size']
elif library_data['is_balance_lib'] == '甲基化': elif '甲基化' in library_data['classification']:
self.chip_methylib_size[chipname] = library_data['size'] self.chip_methylib_size[chipname] = library_data['size']
else: else:
self.chip_speciallib_size[chipname] = 0 self.chip_speciallib_size[chipname] = 0
@ -312,24 +292,26 @@ class AutoLayout:
self.chip_size[chipname] += library_data['size'] self.chip_size[chipname] += library_data['size']
if library_data['is_balance_lib'] == '': if library_data['is_balance_lib'] == '':
self.chip_speciallib_size[chipname] += library_data['size'] self.chip_speciallib_size[chipname] += library_data['size']
if library_data['is_balance_lib'] == '甲基化': if '甲基化' in library_data['classification']:
self.chip_methylib_size[chipname] += library_data['size'] self.chip_methylib_size[chipname] += library_data['size']
if 'nextera' in library_data['classification'].lower(): if 'nextera' in library_data['classification'].lower():
self.chip_speciallib_nextera_size[chipname] += library_data['size'] self.chip_speciallib_nextera_size[chipname] += library_data['size']
if '华大' in library_data['classification']: if '华大' in library_data['classification']:
self.chip_speciallib_huada_size[chipname] += library_data['size'] self.chip_speciallib_huada_size[chipname] += library_data['size']
if 'N' in library_data['data'][0]['indexi5i7']:
if 'N' in library_data['data'][0]['barcode']:
# print(library_data['data'][0]['barcode'])
self.chip_size_N[chipname] += library_data['size'] self.chip_size_N[chipname] += library_data['size']
def use_rule(self, chipname, classfication): def use_rule_exclusive_classfication(self, chipname, classfication):
"""
文库不能排在一起
"""
may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2']) may_classfic = set(self.rule[self.rule['c1'] == classfication]['c2'])
if self.chip_customer[chipname].intersection(may_classfic): if self.chip_customer[chipname].intersection(may_classfic):
return True return True
return False return False
def use_rule_exclusive_customer(self, chipname, customer): def use_rule_exclusive_customer(self, chipname, customer):
"""文库不能排在一起"""
may_classfic = set( may_classfic = set(
self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2']) self.rule_exclusive_customer[self.rule_exclusive_customer['customer1'] == customer]['customer2'])
if self.chip_customer[chipname].intersection(may_classfic): if self.chip_customer[chipname].intersection(may_classfic):
@ -342,12 +324,12 @@ class AutoLayout:
""" """
size = library_data['size'] size = library_data['size']
size_N = 0 size_N = 0
if 'N' in library_data['data'][0]['barcode']: if 'N' in library_data['data'][0]['indexi5i7']:
size_N = library_data['size'] size_N = library_data['size']
classification = library_data['classification'] classification = library_data['classification']
customer = library_data['customer'] customer = library_data['customer']
is_balance_lib = library_data['is_balance_lib'] is_balance_lib = library_data['is_balance_lib']
library = library_data['library'] # library = library_data['library']
# 芯片大小不能超过设定限制 # 芯片大小不能超过设定限制
sizelimit = True sizelimit = True
@ -357,17 +339,17 @@ class AutoLayout:
# barcode有重复 # barcode有重复
notrepeatbarcode = True notrepeatbarcode = True
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}) or \ if self.chip_barcode_recode[chipname].intersection({item['indexi5i7'] for item in library_data['data']}) or \
self.chip_barcode_recode[chipname].intersection( self.chip_barcode_recode[chipname].intersection(
{'N' * 8 + item['i7'] for item in library_data['data']}) or \ {'N' * 8 + item['indexi7'] for item in library_data['data']}) or \
self.chip_barcode_recode[chipname].intersection( self.chip_barcode_recode[chipname].intersection(
{item['i5'] + 'N' * 8 for item in library_data['data']}): {item['indexi5'] + 'N' * 8 for item in library_data['data']}):
notrepeatbarcode = False notrepeatbarcode = False
# print(chipname, library, 'barcode有重复') # print(chipname, library, 'barcode有重复')
# 互斥的文库 # 互斥的文库
exclusive_classific = True exclusive_classific = True
if self.use_rule(chipname, classification): if self.use_rule_exclusive_classfication(chipname, classification):
exclusive_classific = False exclusive_classific = False
# print(chipname, library, '互斥的文库') # print(chipname, library, '互斥的文库')
@ -422,15 +404,15 @@ class AutoLayout:
base_balance = True base_balance = True
notrepeatbarcode = True notrepeatbarcode = True
if self.chip_barcodei7_recode[chipname].intersection( if self.chip_barcodei7_recode[chipname].intersection(
{item['i7'] for item in library_data['data']}) and max_barcode == 'i7': {item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
notrepeatbarcode = False notrepeatbarcode = False
if self.chip_barcodei5_recode[chipname].intersection( if self.chip_barcodei5_recode[chipname].intersection(
{item['i5'] for item in library_data['data']}) and max_barcode == 'i5': {item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
notrepeatbarcode = False notrepeatbarcode = False
# 是个N的取消 # 是个N的取消
if ('N' * 8 in {item['i5'] for item in library_data['data']}) and max_barcode == 'i5': if ('N' * 8 in {item['indexi5'] for item in library_data['data']}) and max_barcode == 'indexi5':
notrepeatbarcode = False notrepeatbarcode = False
if ('N' * 8 in {item['i7'] for item in library_data['data']}) and max_barcode == 'i7': if ('N' * 8 in {item['indexi7'] for item in library_data['data']}) and max_barcode == 'indexi7':
notrepeatbarcode = False notrepeatbarcode = False
if self.chip_size[chipname] > 900: if self.chip_size[chipname] > 900:
current_data = copy.deepcopy(self.index_assignments[chipname]) current_data = copy.deepcopy(self.index_assignments[chipname])
@ -442,7 +424,7 @@ class AutoLayout:
# 子文库名不能重复 # 子文库名不能重复
notrepeatsublib = True notrepeatsublib = True
if self.chip_sublib[chipname].intersection({item['sublibrary'] for item in library_data['data']}): if self.chip_sublib[chipname].intersection({item['subsamplename'] for item in library_data['data']}):
notrepeatsublib = False notrepeatsublib = False
if sizelimit and notrepeatbarcode and \ if sizelimit and notrepeatbarcode and \
@ -475,7 +457,7 @@ class AutoLayout:
no_nextary_data.append(libdata) no_nextary_data.append(libdata)
else: else:
self.no_assign_data.append(libdata) self.no_assign_data.append(libdata)
nextary_barcode.update(libdata['barcode']) nextary_barcode.update(libdata['indexi5i7'])
self.index_assignments[chipname] = no_nextary_data self.index_assignments[chipname] = no_nextary_data
self.chip_barcode_recode[chipname] -= nextary_barcode self.chip_barcode_recode[chipname] -= nextary_barcode
self.chip_speciallib_nextera_size[chipname] = 0 self.chip_speciallib_nextera_size[chipname] = 0
@ -490,7 +472,7 @@ class AutoLayout:
no_huada_data.append(libdata) no_huada_data.append(libdata)
else: else:
self.no_assign_data.append(libdata) self.no_assign_data.append(libdata)
huada_barcode.update(libdata['barcode']) huada_barcode.update(libdata['indexi5i7'])
self.index_assignments[chipname] = no_huada_data self.index_assignments[chipname] = no_huada_data
self.chip_barcode_recode[chipname] -= huada_barcode self.chip_barcode_recode[chipname] -= huada_barcode
self.chip_speciallib_huada_size[chipname] = 0 self.chip_speciallib_huada_size[chipname] = 0
@ -500,63 +482,45 @@ class AutoLayout:
self.loc_chip_num += 1 self.loc_chip_num += 1
def assign_samples(self): def assign_samples(self):
# ori_library_data = list()
if '未测' not in self.ori_data.keys(): # if '未测' not in self.ori_data.keys():
raise UserWarning('提供excel没有 未测 sheet ,请核查!') # raise UserWarning('提供excel没有 未测 sheet ,请核查!')
ori_library_df = pd.DataFrame(self.ori_data['未测']) ori_library_df = pd.DataFrame(self.ori_data)
# 检查提供excel 是否有必须表头
get_col = set(ori_library_df.columns)
unhave_col = set(self.need_cols) - get_col
if unhave_col:
unhave_from = '; '.join(unhave_col)
raise UserWarning(f'未测表里没有{unhave_from} 表头,请核查!')
# 数据标准格式 # 数据标准格式
numeric_mask = pd.to_numeric(ori_library_df['data_needed'], errors='coerce').notna() numeric_mask = pd.to_numeric(ori_library_df['orderdatavolume'], errors='coerce').notna()
time_mask = pd.to_datetime(ori_library_df['time'], errors='coerce').notna() time_mask = pd.to_datetime(ori_library_df['receivedtime'], errors='coerce').notna()
# 添加处理status列的逻辑
status_mask = ori_library_df['status'] == '暂不排样'
# 非正常barcode # 非正常barcode
barcode_mask = ori_library_df['barcode'].str.len() != 16 barcode_mask = ori_library_df['indexi5i7'].str.len() != 16
ori_library_df['note'] = '' ori_library_df['note'] = ''
ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字' ori_library_df.loc[~numeric_mask, 'note'] = 'data_needed 列非数字'
ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期' ori_library_df.loc[~time_mask, 'note'] = 'time 列非日期'
ori_library_df.loc[status_mask, 'note'] = '暂不排样'
ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode' ori_library_df.loc[barcode_mask, 'note'] = '非16位barcode'
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | barcode_mask]
no_ori_data = ori_library_df[~(numeric_mask & time_mask) | status_mask | barcode_mask]
self.no_assign_data.extend(no_ori_data.to_dict('records')) self.no_assign_data.extend(no_ori_data.to_dict('records'))
# 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行 # 使用布尔索引筛选出不是数字和非日期的行,并且不是暂不排样的行, 以及非16位置barcode
ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~status_mask & ~barcode_mask] ori_library_df = ori_library_df[(numeric_mask & time_mask) & ~barcode_mask]
# 某个客户的检测的数据超过1个T就单独处理
# summary = ori_library_df.groupby('customer').agg({'data_needed': 'sum'})
# print(summary)
# 时间格式化 # 时间格式化
ori_library_df['time'] = pd.to_datetime(ori_library_df['time'], errors='coerce') ori_library_df['receivedtime'] = pd.to_datetime(ori_library_df['receivedtime'], errors='coerce')
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
# 极致客户有重复的把等级调到19,防止放到了最后,到了未测里 # 极致客户有重复的把等级调到1900,防止放到了最后,到了未测里
must_lib_df = ori_library_df[ori_library_df['level'] == 2000] must_lib_df = ori_library_df[ori_library_df['level'] == 2000]
must_lib = set(must_lib_df[must_lib_df.duplicated(subset='barcode', keep=False)]['#library'].to_list()) must_lib = set(must_lib_df[must_lib_df.duplicated(subset='indexi5i7', keep=False)]['samplename'].to_list())
ori_library_df.loc[ori_library_df['#library'].isin(must_lib), 'level'] = 1900 ori_library_df.loc[ori_library_df['samplename'].isin(must_lib), 'level'] = 1900
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('samplename'):
size = library_df['data_needed'].sum() size = library_df['orderdatavolume'].sum()
is_balance_lib = library_df['is_balance_lib'].values[0] is_balance_lib = library_df['librarybalancedflag'].values[0]
# 文库内部有重复 # 文库内部有重复
if len(library_df['barcode'].values) > len(set(library_df['barcode'].values)): if len(library_df['indexi5i7'].values) > len(set(library_df['indexi5i7'].values)):
library_df['note'] = '文库内部有重复' library_df['note'] = '文库内部有重复'
self.no_assign_data.extend(library_df.to_dict('records')) self.no_assign_data.extend(library_df.to_dict('records'))
continue continue
@ -564,53 +528,53 @@ class AutoLayout:
# 不平衡文库 大于250G 的数据 先进行拆分 # 不平衡文库 大于250G 的数据 先进行拆分
if is_balance_lib == '' and size > 250: if is_balance_lib == '' and size > 250:
self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ') self.return_log.append(f'文库{library} 是不平衡文库, 数据为{size}, 大于250G, 已做拆分处理, 请注意!!! ')
data_needed = library_df['data_needed'].copy() data_needed = library_df['orderdatavolume'].copy()
for num in range(int(size), 0, -200): for num in range(int(size), 0, -200):
addnum = 200 addnum = 200
if num <= 200: if num <= 200:
addnum = num addnum = num
library_df['data_needed'] = (addnum / size) * data_needed library_df['orderdatavolume'] = (addnum / size) * data_needed
self.ori_lib_data.append(dict( self.ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['librarybalancedflag'].values[0],
size=library_df['data_needed'].sum(), size=library_df['orderdatavolume'].sum(),
split_method=library_df['拆分方式'].values[0], split_method=library_df['cycletype'].values[0],
time=library_df['time'].values[0], time=library_df['receivedtime'].values[0],
level=1950, level=1950,
customer=library_df['customer'].values[0], customer=library_df['companynamea'].values[0],
classification=library_df['classification'].values[0], classification=library_df['librarystructure'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df.to_dict('records')
)) ))
self.split_lib.add(library) self.split_lib.add(library)
continue continue
# 拆分处理 分为了2个大文库 # 拆分处理 分为了2个大文库
if size > (self.data_limit) / 2: if size > self.data_limit / 2:
library_df['data_needed'] = library_df['data_needed'] / 2 library_df['orderdatavolume'] = library_df['orderdatavolume'] / 2
self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ') self.return_log.append(f'文库{library} 已做拆分处理, 请注意!!! ')
self.ori_lib_data.append(dict( self.ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['librarybalancedflag'].values[0],
size=library_df['data_needed'].sum(), size=library_df['orderdatavolume'].sum(),
split_method=library_df['拆分方式'].values[0], split_method=library_df['cycletype'].values[0],
time=library_df['time'].values[0], time=library_df['receivedtime'].values[0],
level=library_df['level'].values[0], level=library_df['level'].values[0],
customer=library_df['customer'].values[0], customer=library_df['companynamea'].values[0],
classification=library_df['classification'].values[0], classification=library_df['librarystructure'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df.to_dict('records')
)) ))
self.ori_lib_data.append(dict( self.ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['librarybalancedflag'].values[0],
size=library_df['data_needed'].sum(), size=library_df['orderdatavolume'].sum(),
split_method=library_df['拆分方式'].values[0], split_method=library_df['cycletype'].values[0],
time=library_df['time'].values[0], time=library_df['receivedtime'].values[0],
level=library_df['level'].values[0], level=library_df['level'].values[0],
customer=library_df['customer'].values[0], customer=library_df['companynamea'].values[0],
classification=library_df['classification'].values[0], classification=library_df['librarystructure'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df.to_dict('records')
)) ))
self.combinations_same_barcode() self.combinations_same_barcode()
@ -655,37 +619,38 @@ class AutoLayout:
if not chip_assignments: if not chip_assignments:
continue continue
df = pd.DataFrame(chip_assignments) df = pd.DataFrame(chip_assignments)
if df['data_needed'].sum() < self.data_lower: if df['orderdatavolume'].sum() < self.data_lower:
left_data.extend(chip_assignments) left_data.extend(chip_assignments)
no_need_chipname.append(chip_idx) no_need_chipname.append(chip_idx)
for chip_idx in no_need_chipname: for chip_idx in no_need_chipname:
del self.index_assignments[chip_idx] del self.index_assignments[chip_idx]
if not left_data:
return
ori_library_df = pd.DataFrame(left_data) ori_library_df = pd.DataFrame(left_data)
ori_library_df['level'] = ori_library_df.apply(self.level, axis=1) ori_library_df['level'] = ori_library_df.apply(self.level, axis=1)
ori_lib_data = list() ori_lib_data = list()
for library, library_df in ori_library_df.groupby('#library'): for library, library_df in ori_library_df.groupby('samplename'):
level = library_df['level'].values[0] level = library_df['level'].values[0]
if library in self.split_lib: if library in self.split_lib:
level = 1950 level = 1950
ori_lib_data.append(dict( ori_lib_data.append(dict(
library=library, library=library,
is_balance_lib=library_df['is_balance_lib'].values[0], is_balance_lib=library_df['librarybalancedflag'].values[0],
size=library_df['data_needed'].sum(), size=library_df['orderdatavolume'].sum(),
split_method=library_df['拆分方式'].values[0], split_method=library_df['cycletype'].values[0],
time=library_df['time'].values[0], time=library_df['receivedtime'].values[0],
level=level, level=level,
customer=library_df['customer'].values[0], customer=library_df['companynamea'].values[0],
classification=library_df['classification'].values[0], classification=library_df['librarystructure'].values[0],
data=library_df[self.need_cols].to_dict('records') data=library_df.to_dict('records')
)) ))
ori_lib_data = sorted(ori_lib_data, key=lambda x: (x['level'], x['time'], -x['size'])) ori_lib_data = sorted(ori_lib_data, key=lambda x: (x['level'], x['time'], -x['size']))
self.loc_chip_num = 100 self.loc_chip_num = 100
while ori_lib_data: while ori_lib_data:
library_data = ori_lib_data[0] library_data = ori_lib_data[0]
chipname = f'chipB{self.loc_chip_num}_{max_barcode}' if max_barcode != 'all' else f'chipB{self.loc_chip_num}' chipname = f'chip{self.loc_chip_num}_{max_barcode}' if max_barcode != 'all' else f'chip{self.loc_chip_num}'
# 空白芯片直接添加 # 空白芯片直接添加
if chipname not in self.index_assignments: if chipname not in self.index_assignments:
@ -714,13 +679,12 @@ class AutoLayout:
def run(self): def run(self):
# print('# 测试代码') # print('# 测试代码')
# self.assign_samples() # self.assign_samples()
# self.assign_again() # self.assign_again_size()
try: try:
self.assign_samples() self.assign_samples()
self.assign_again_size() self.assign_again_size()
# self.assign_again_size(max_barcode='i7') # self.assign_again_size(max_barcode='indexi7')
# self.assign_again_size(max_barcode='i5') # self.assign_again_size(max_barcode='indexi5')
# self.assign_again_size()
except Exception as e: except Exception as e:
self.return_log.append(f'T7排样出错 请联系!{e}') self.return_log.append(f'T7排样出错 请联系!{e}')
self.index_assignments = {} self.index_assignments = {}
@ -728,23 +692,22 @@ class AutoLayout:
outputpath = os.path.join(self.output, 'result', outputname) outputpath = os.path.join(self.output, 'result', outputname)
writer = pd.ExcelWriter(outputpath) writer = pd.ExcelWriter(outputpath)
res = list()
chip_loc = 1 chip_loc = 1
librarynum = 0 librarynum = 0
for chip_idx, chip_assignments in self.index_assignments.items(): for chip_idx, chip_assignments in self.index_assignments.items():
if not chip_assignments: if not chip_assignments:
continue continue
df = pd.DataFrame(chip_assignments) df = pd.DataFrame(chip_assignments)
df['time'] = df['time'].dt.strftime('%Y-%m-%d') df['receivedtime'] = df['receivedtime'].dt.strftime('%Y-%m-%d')
if [method for method in df['拆分方式'].values if '极致' in method]: if [method for method in df['cycletype'].values if '极致' in method]:
addname = 'X' addname = 'X'
else: else:
addname = '' addname = ''
other_name = '' other_name = ''
# if 'chipB' in chip_idx and df['barcode'].duplicated().any():
# other_name = '_i7'
if df['data_needed'].sum() < (self.data_lower - 50) and not addname: if df['orderdatavolume'].sum() < (self.data_lower - 50) and not addname:
df['note'] = f'排样数据量不足{self.data_lower - 50}G' df['note'] = f'排样数据量不足{self.data_lower - 50}G'
self.no_assign_data.extend(df.to_dict('records')) self.no_assign_data.extend(df.to_dict('records'))
continue continue
@ -752,31 +715,29 @@ class AutoLayout:
df['note'] = '排样管数超标' df['note'] = '排样管数超标'
self.no_assign_data.extend(df.to_dict('records')) self.no_assign_data.extend(df.to_dict('records'))
continue continue
librarynum += len(set(df['#library'].values)) librarynum += len(set(df['samplename'].values))
self.dec_barcode_radio(chip_idx) self.dec_barcode_radio(chip_idx)
chipname = addname + chip_idx + other_name chipname = addname + chip_idx + other_name
df['lanepackcode'] = chipname
sum_list = list() # df = pd.concat([pd.DataFrame(self.items), df]).reset_index(drop=True)
for library, library_df in df.groupby('#library'): #
sum_list.append(dict( # df.to_excel(writer, sheet_name=chipname, index=False)
二次拆分=library, res.extend(df.to_dict('records'))
客户=library_df['customer'].values[0],
类型=library_df['classification'].values[0],
打折前=library_df['data_needed'].sum()
))
df_sum = pd.DataFrame(sum_list)
res_df = pd.concat([df, df_sum], axis=1)
res_df.to_excel(writer, sheet_name=chipname, index=False)
chip_loc += 1 chip_loc += 1
res_df = pd.DataFrame(res)
res_df = pd.concat([pd.DataFrame(self.items), res_df]).reset_index(drop=True)
res_df.to_excel(writer, sheet_name='assignment', index=False)
no_assign_df = pd.DataFrame(self.no_assign_data) no_assign_df = pd.DataFrame(self.no_assign_data)
if not no_assign_df.empty:
no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x) no_assign_df = no_assign_df.applymap(lambda x: format_date(x) if isinstance(x, pd.Timestamp) else x)
no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['#library'] if lib in self.split_lib])) no_assign_df_not_balance = ','.join(set([lib for lib in no_assign_df['samplename'] if lib in self.split_lib]))
if no_assign_df_not_balance: if no_assign_df_not_balance:
self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!') self.return_log.append(f'文库{no_assign_df_not_balance}有做不平衡文库拆分处理,并且没有排完,请核查!')
if not no_assign_df.empty: # if not no_assign_df.empty:
no_assign_df = no_assign_df[self.need_cols] # no_assign_df = no_assign_df[self.need_cols]
no_assign_df = pd.concat([pd.DataFrame(self.items), no_assign_df]).reset_index(drop=True)
no_assign_df.to_excel(writer, sheet_name='未测', index=False) no_assign_df.to_excel(writer, sheet_name='未测', index=False)
if self.return_log: if self.return_log:
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False) pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)