增加碱基不平衡
parent
40204a8837
commit
0f0e2f9768
112
tools/t7.py
112
tools/t7.py
|
|
@ -1,3 +1,4 @@
|
||||||
|
import copy
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
@ -13,7 +14,7 @@ class AutoLayout:
|
||||||
自动化派样
|
自动化派样
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, path, chipnum, output=basedir, data_limit=1800):
|
def __init__(self, path, chipnum, output=basedir, data_limit=1750):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.output = output
|
self.output = output
|
||||||
self.chipnum = int(chipnum)
|
self.chipnum = int(chipnum)
|
||||||
|
|
@ -78,48 +79,18 @@ class AutoLayout:
|
||||||
self.chip_customer[chipname].add(library_data['customer'])
|
self.chip_customer[chipname].add(library_data['customer'])
|
||||||
self.chip_classification[chipname].add(library_data['classification'])
|
self.chip_classification[chipname].add(library_data['classification'])
|
||||||
|
|
||||||
# def add_new_chip(self, library_data):
|
def count_barcode_radio(self, data):
|
||||||
# """
|
|
||||||
# 要新增到芯片上的数据
|
|
||||||
# :param library_data:
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
# chip_num_tmp = self.loc_chip_num
|
|
||||||
# while True:
|
|
||||||
# chip_num_tmp += 1
|
|
||||||
# chipname_tmp = f'chip{chip_num_tmp}'
|
|
||||||
# library = library_data['library']
|
|
||||||
# if chipname_tmp not in self.index_assignments:
|
|
||||||
# self.logger.error(f'{library} {chipname_tmp} 常规添加')
|
|
||||||
# self.add_new_data(chipname_tmp, library_data)
|
|
||||||
# break
|
|
||||||
# else:
|
|
||||||
# is_same_barcode = self.chip_barcode_recode[chipname_tmp].intersection(
|
|
||||||
# {item['barcode'] for item in library_data['data']})
|
|
||||||
# # 没有从重复的index,并且也不互斥的
|
|
||||||
# if ((self.chip_size[chipname_tmp] + library_data['size']) > self.data_limit):
|
|
||||||
# self.logger.error(f'{library} {chipname_tmp} 文库相加大于设定限制')
|
|
||||||
# if ((self.chip_speciallib_size[chipname_tmp] + library_data['size']) >= 200):
|
|
||||||
# self.logger.error(f'{library} {chipname_tmp} 不平衡文库相加大于设定限制')
|
|
||||||
# if is_same_barcode:
|
|
||||||
# self.logger.error(f'{library} {chipname_tmp} 文库有barcode重复')
|
|
||||||
# if self.use_rule(chipname_tmp, library_data['customer']):
|
|
||||||
# self.logger.error(f'{library} {chipname_tmp} 有互斥单位')
|
|
||||||
# if ((self.chip_size[chipname_tmp] + library_data['size']) <= self.data_limit) \
|
|
||||||
# and ((self.chip_speciallib_size[chipname_tmp] + library_data['size']) < 200) \
|
|
||||||
# and (not is_same_barcode) \
|
|
||||||
# and (not self.use_rule(chipname_tmp, library_data['customer'])):
|
|
||||||
# self.add_new_data(chipname_tmp, library_data, newer=False)
|
|
||||||
# break
|
|
||||||
|
|
||||||
def dec_barcode_radio(self, chipname):
|
|
||||||
data = self.index_assignments[chipname]
|
|
||||||
df = pd.DataFrame(data)
|
df = pd.DataFrame(data)
|
||||||
|
ratio_sites = dict()
|
||||||
|
is_not_balance_list = []
|
||||||
|
if df.empty:
|
||||||
|
return ratio_sites, is_not_balance_list
|
||||||
|
|
||||||
df['barcode'] = df['barcode'].str.slice(0, 16)
|
df['barcode'] = df['barcode'].str.slice(0, 16)
|
||||||
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
|
barcode_df = pd.DataFrame(df['barcode'].str.split('', expand=True).iloc[:, 1:-1].values,
|
||||||
columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
|
columns=['T' + str(x) for x in range(16)]).join(df['data_needed'])
|
||||||
total = barcode_df['data_needed'].sum()
|
total = barcode_df['data_needed'].sum()
|
||||||
is_not_balance_list = []
|
|
||||||
for i in range(16):
|
for i in range(16):
|
||||||
column = 'T' + str(i)
|
column = 'T' + str(i)
|
||||||
col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
|
col_df = barcode_df.groupby(column).agg({'data_needed': 'sum'})
|
||||||
|
|
@ -129,12 +100,10 @@ class AutoLayout:
|
||||||
col_df = col_df.drop('N')
|
col_df = col_df.drop('N')
|
||||||
else:
|
else:
|
||||||
base_N_size = 0
|
base_N_size = 0
|
||||||
|
|
||||||
col_df['ratio'] = (col_df['data_needed']) / (total - base_N_size)
|
col_df['ratio'] = (col_df['data_needed']) / (total - base_N_size)
|
||||||
# is_need_base = col_df.index[col_df['ratio'] < 0.088]
|
|
||||||
|
|
||||||
A, B, C, D = list(), list(), list(), list(),
|
|
||||||
ratio = col_df['ratio'].to_dict()
|
ratio = col_df['ratio'].to_dict()
|
||||||
|
ratio_sites[i] = ratio
|
||||||
|
A, B, C, D, E, F = list(), list(), list(), list(), list(), list()
|
||||||
for decbase in ['A', 'T', 'C', 'G']:
|
for decbase in ['A', 'T', 'C', 'G']:
|
||||||
if decbase not in ratio:
|
if decbase not in ratio:
|
||||||
ratio[decbase] = 0
|
ratio[decbase] = 0
|
||||||
|
|
@ -142,19 +111,31 @@ class AutoLayout:
|
||||||
A.append(decbase)
|
A.append(decbase)
|
||||||
if 0.2 <= ratio[decbase] < 0.6:
|
if 0.2 <= ratio[decbase] < 0.6:
|
||||||
B.append(decbase)
|
B.append(decbase)
|
||||||
if 0.08 <= ratio[decbase] < 0.2:
|
if 0.15 <= ratio[decbase] < 0.2:
|
||||||
C.append(decbase)
|
C.append(decbase)
|
||||||
if ratio[decbase] <= 0.8:
|
if 0.1 <= ratio[decbase] < 0.15:
|
||||||
D.append(decbase)
|
D.append(decbase)
|
||||||
if not ((len(B) + len(C) == 4) or (len(D) == 1 and len(C) == 3)):
|
if 0.08 <= ratio[decbase] < 0.1:
|
||||||
is_not_balance_list.append(
|
E.append(decbase)
|
||||||
'%s 第%s位置,有碱基不平衡,算出结果为 %s' % (chipname, i, ratio)
|
if ratio[decbase] < 0.08:
|
||||||
)
|
F.append(decbase)
|
||||||
|
|
||||||
if len(is_not_balance_list):
|
A_num, B_num, C_num, D_num, E_num, F_num = len(A), len(B), len(C), len(D), len(E), len(F)
|
||||||
self.return_log.append('有碱基不平衡性!')
|
if not ((B_num + C_num + D_num == 4) or (F_num == 1 and (A_num + B_num) == 3) or (
|
||||||
self.return_log.extend(is_not_balance_list)
|
E_num == 1 and D_num == 1 and (A_num + B_num + C_num) == 2) or (
|
||||||
print('有碱基不平衡性!\n', '\n'.join(is_not_balance_list))
|
E_num == 1 and (A_num + B_num + C_num) == 3)):
|
||||||
|
is_not_balance_list.append(
|
||||||
|
'第%s位置,算出结果为 %s' % (i, ratio)
|
||||||
|
)
|
||||||
|
return ratio_sites, is_not_balance_list
|
||||||
|
|
||||||
|
def dec_barcode_radio(self, chipname):
|
||||||
|
data = self.index_assignments[chipname]
|
||||||
|
ratio_sites, is_not_balance_list = self.count_barcode_radio(data)
|
||||||
|
if is_not_balance_list:
|
||||||
|
desc = '\n'.join(is_not_balance_list)
|
||||||
|
self.return_log.append(f'芯片{chipname}有碱基不平衡:\n{desc}')
|
||||||
|
print(f'芯片{chipname}有碱基不平衡:\n{desc}')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def level(row):
|
def level(row):
|
||||||
|
|
@ -206,31 +187,34 @@ class AutoLayout:
|
||||||
sizelimit = True
|
sizelimit = True
|
||||||
if self.chip_size[chipname] + size > self.data_limit:
|
if self.chip_size[chipname] + size > self.data_limit:
|
||||||
sizelimit = False
|
sizelimit = False
|
||||||
self.logger.error(f'{library} {chipname} 文库相加大于设定限制')
|
|
||||||
# barcode有重复
|
# barcode有重复
|
||||||
notrepeatbarcode = True
|
notrepeatbarcode = True
|
||||||
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}):
|
if self.chip_barcode_recode[chipname].intersection({item['barcode'] for item in library_data['data']}):
|
||||||
notrepeatbarcode = False
|
notrepeatbarcode = False
|
||||||
self.logger.error(f'{library} {chipname} 文库有barcode重复')
|
|
||||||
# # 互斥的客户
|
|
||||||
# exclusivecostom = True
|
|
||||||
# if self.use_rule(chipname, customer):
|
|
||||||
# exclusivecostom = False
|
|
||||||
# self.logger.error(f'{library} {chipname} 有互斥单位')
|
|
||||||
|
|
||||||
# 互斥的文库
|
# 互斥的文库
|
||||||
exclusive_classific = True
|
exclusive_classific = True
|
||||||
if self.use_rule(chipname, classification):
|
if self.use_rule(chipname, classification):
|
||||||
exclusive_classific = False
|
exclusive_classific = False
|
||||||
self.logger.error(f'{library} {chipname} 有互斥单位')
|
|
||||||
|
|
||||||
# 不平衡文库大于200G 不能添加
|
# 不平衡文库大于200G 不能添加
|
||||||
splibrary = True
|
splibrary = True
|
||||||
if classification in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \
|
if classification in ['扩增子', '不平衡文库', '单细胞文库', '甲基化'] \
|
||||||
and self.chip_speciallib_size[chipname] + size > 250:
|
and self.chip_speciallib_size[chipname] + size > 250:
|
||||||
splibrary = False
|
splibrary = False
|
||||||
self.logger.error(f'{library} {chipname} 不平衡文库相加大于设定限制')
|
|
||||||
if sizelimit and notrepeatbarcode and exclusive_classific and splibrary:
|
# 碱基不平衡不过不添加,保证前面的数据, 在数据达到1200G的时候开始
|
||||||
|
base_balance = True
|
||||||
|
if self.chip_size[chipname] > 800:
|
||||||
|
current_data = copy.deepcopy(self.index_assignments[chipname])
|
||||||
|
new_data = library_data['data']
|
||||||
|
current_data.extend(new_data)
|
||||||
|
ratio_sites, is_not_balance_list = self.count_barcode_radio(current_data)
|
||||||
|
if is_not_balance_list:
|
||||||
|
base_balance = False
|
||||||
|
|
||||||
|
if sizelimit and notrepeatbarcode and exclusive_classific and splibrary and base_balance:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -314,6 +298,7 @@ class AutoLayout:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
# self.assign_samples()
|
||||||
try:
|
try:
|
||||||
self.assign_samples()
|
self.assign_samples()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -328,7 +313,7 @@ class AutoLayout:
|
||||||
df = pd.DataFrame(chip_assignments)
|
df = pd.DataFrame(chip_assignments)
|
||||||
df['time'] = df['time'].dt.strftime('%Y-%m-%d')
|
df['time'] = df['time'].dt.strftime('%Y-%m-%d')
|
||||||
if df['data_needed'].sum() < 1500 or chip_loc > self.chipnum:
|
if df['data_needed'].sum() < 1500 or chip_loc > self.chipnum:
|
||||||
self.no_assign_data.extend(chip_assignments)
|
self.no_assign_data.extend(df.to_dict('records'))
|
||||||
continue
|
continue
|
||||||
if [method for method in df['拆分方式'].values if '极致' in method]:
|
if [method for method in df['拆分方式'].values if '极致' in method]:
|
||||||
addname = 'X'
|
addname = 'X'
|
||||||
|
|
@ -338,7 +323,6 @@ class AutoLayout:
|
||||||
df.to_excel(writer, sheet_name=addname + chip_idx, index=False)
|
df.to_excel(writer, sheet_name=addname + chip_idx, index=False)
|
||||||
chip_loc += 1
|
chip_loc += 1
|
||||||
no_assign_df = pd.DataFrame(self.no_assign_data)
|
no_assign_df = pd.DataFrame(self.no_assign_data)
|
||||||
# no_assign_df['time'] = no_assign_df['time'].dt.strftime('%Y-%m-%d')
|
|
||||||
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
no_assign_df.to_excel(writer, sheet_name='未测', index=False)
|
||||||
if self.return_log:
|
if self.return_log:
|
||||||
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
|
pd.DataFrame(self.return_log).to_excel(writer, sheet_name='log', index=False)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue