化疗修改,不在vcf中出现的从bam文件中获取深度
parent
39f51ccf2e
commit
bf1d9de10e
|
|
@ -10,7 +10,7 @@ import pysam
|
||||||
|
|
||||||
class ChemoRun:
|
class ChemoRun:
|
||||||
|
|
||||||
def __init__(self, database, probe, cancer, project, output_dir, name, vcf):
|
def __init__(self, database, probe, cancer, project, output_dir, name, vcf, bam, ref):
|
||||||
self.database = database
|
self.database = database
|
||||||
self.probe = probe
|
self.probe = probe
|
||||||
self.output_dir = output_dir
|
self.output_dir = output_dir
|
||||||
|
|
@ -18,6 +18,8 @@ class ChemoRun:
|
||||||
self.vcf = vcf
|
self.vcf = vcf
|
||||||
self.cancer = cancer
|
self.cancer = cancer
|
||||||
self.project = project
|
self.project = project
|
||||||
|
self.bam_file = pysam.AlignmentFile(bam, "rb")
|
||||||
|
self.ref_file = pysam.FastaFile(ref)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_info(project):
|
def read_info(project):
|
||||||
|
|
@ -62,6 +64,22 @@ class ChemoRun:
|
||||||
records[(chrom, pos)] = record
|
records[(chrom, pos)] = record
|
||||||
return records
|
return records
|
||||||
|
|
||||||
|
def get_position_depth(self, chr, pos):
|
||||||
|
"""
|
||||||
|
获取位点测序深度 >=30X
|
||||||
|
"""
|
||||||
|
|
||||||
|
depths = self.bam_file.count_coverage(chr, start=pos - 1, stop=pos)
|
||||||
|
depth_sum = sum([sum(arr) for arr in depths])
|
||||||
|
return depth_sum
|
||||||
|
|
||||||
|
def get_ref_base(self, chr, pos):
|
||||||
|
"""
|
||||||
|
获取参考基因组
|
||||||
|
"""
|
||||||
|
base = self.ref_file.fetch(chr, pos - 1, pos)
|
||||||
|
return base
|
||||||
|
|
||||||
def parsedata(self):
|
def parsedata(self):
|
||||||
"""
|
"""
|
||||||
处理chemo_drug_rsid_snppos.xlsx, 并生成对应bed文件
|
处理chemo_drug_rsid_snppos.xlsx, 并生成对应bed文件
|
||||||
|
|
@ -97,20 +115,33 @@ class ChemoRun:
|
||||||
for _, row in beddata.iterrows():
|
for _, row in beddata.iterrows():
|
||||||
chrom = row['chr']
|
chrom = row['chr']
|
||||||
end = row['end']
|
end = row['end']
|
||||||
fliter = pd.DataFrame()
|
# bed 文件在 vcf 中
|
||||||
if (chrom, end) in records:
|
if (chrom, end) in records:
|
||||||
record = records[(chrom, end)]
|
record = records[(chrom, end)]
|
||||||
ref = record.ref
|
ref = record.ref
|
||||||
alt = record.alts[0]
|
alt = record.alts[0]
|
||||||
# gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT')))))
|
|
||||||
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
|
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
|
||||||
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
|
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
|
||||||
|
|
||||||
if freq > 0.9:
|
if freq > 0.9:
|
||||||
gt = '1/1'
|
gt = '1/1'
|
||||||
elif 0.9 >= freq > 0.1:
|
elif 0.9 >= freq > 0.1:
|
||||||
gt = '0/1'
|
gt = '0/1'
|
||||||
else:
|
else:
|
||||||
gt = '0/0'
|
gt = '0/0'
|
||||||
|
# 不在vcf中 去bam文件中获取该位点的测序深度 去ref 获取碱基 没有的就为NA了
|
||||||
|
else:
|
||||||
|
depth = self.get_position_depth(chrom, end)
|
||||||
|
ref = self.get_ref_base(chrom, end)
|
||||||
|
alt = ref
|
||||||
|
freq = 1
|
||||||
|
gt = '0/0'
|
||||||
|
# 测序深度小于30 排除
|
||||||
|
if depth < 30:
|
||||||
|
alt = 'NA'
|
||||||
|
freq = 'NA'
|
||||||
|
gt = 'NA'
|
||||||
|
|
||||||
match_drug_rsid_data = drug_rsid_data[
|
match_drug_rsid_data = drug_rsid_data[
|
||||||
(drug_rsid_data['chr'] == chrom) &
|
(drug_rsid_data['chr'] == chrom) &
|
||||||
(drug_rsid_data['end'] == end) &
|
(drug_rsid_data['end'] == end) &
|
||||||
|
|
@ -119,33 +150,22 @@ class ChemoRun:
|
||||||
(drug_rsid_data['genotype'] == gt)
|
(drug_rsid_data['genotype'] == gt)
|
||||||
]
|
]
|
||||||
match_drug_rsid_data = match_drug_rsid_data.reset_index()
|
match_drug_rsid_data = match_drug_rsid_data.reset_index()
|
||||||
|
if match_drug_rsid_data.empty:
|
||||||
|
match_drug_rsid_data = pd.DataFrame([
|
||||||
|
dict(
|
||||||
|
ref=ref,
|
||||||
|
alt=alt,
|
||||||
|
genotype=gt
|
||||||
|
)
|
||||||
|
])
|
||||||
match_drug_rsid_data['chr'] = chrom
|
match_drug_rsid_data['chr'] = chrom
|
||||||
match_drug_rsid_data['pos'] = end
|
match_drug_rsid_data['pos'] = end
|
||||||
match_drug_rsid_data['freq'] = freq
|
match_drug_rsid_data['freq'] = freq
|
||||||
match_drug_rsid_data['depth'] = depth
|
match_drug_rsid_data['depth'] = depth
|
||||||
fliter = pd.concat([fliter, match_drug_rsid_data])
|
fliterdata = pd.concat([fliterdata, match_drug_rsid_data])
|
||||||
|
|
||||||
if fliter.empty:
|
|
||||||
match_drug_rsid_data = drug_rsid_data[
|
|
||||||
(drug_rsid_data['chr'] == chrom) &
|
|
||||||
(drug_rsid_data['end'] == end) &
|
|
||||||
(drug_rsid_data['genotype'] == '0/0')
|
|
||||||
]
|
|
||||||
match_drug_rsid_data = match_drug_rsid_data.reset_index()
|
|
||||||
match_drug_rsid_data['chr'] = chrom
|
|
||||||
match_drug_rsid_data['pos'] = end
|
|
||||||
match_drug_rsid_data['freq'] = '.'
|
|
||||||
match_drug_rsid_data['depth'] = '.'
|
|
||||||
fliter = pd.concat([fliter, match_drug_rsid_data])
|
|
||||||
|
|
||||||
if fliter.empty:
|
|
||||||
raise UserWarning(
|
|
||||||
'chr: %s , end: %s 数据库未能匹配, 野生型0/0也未能匹配' % (chrom, end))
|
|
||||||
fliterdata = pd.concat([fliterdata, fliter])
|
|
||||||
# 生成过滤之后文件
|
# 生成过滤之后文件
|
||||||
respath = os.path.join(self.output_dir, f'{self.name}.chemo.res.csv')
|
respath = os.path.join(self.output_dir, f'{self.name}.chemo.res.txt')
|
||||||
if not fliterdata.empty:
|
|
||||||
fliterdata['probe'] = self.probe
|
|
||||||
fliterdata.to_csv(respath, sep='\t', index=False)
|
fliterdata.to_csv(respath, sep='\t', index=False)
|
||||||
|
|
||||||
# 分类汇总 同位点,药物合并 drug.infos.txt
|
# 分类汇总 同位点,药物合并 drug.infos.txt
|
||||||
|
|
@ -256,6 +276,9 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('-c', '--cancer', help="cancer", required=True)
|
parser.add_argument('-c', '--cancer', help="cancer", required=True)
|
||||||
parser.add_argument('-p', '--project', help="Project", required=True)
|
parser.add_argument('-p', '--project', help="Project", required=True)
|
||||||
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
|
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
|
||||||
|
parser.add_argument('-b', '--bam', help="Bamfile for sample", required=True)
|
||||||
|
parser.add_argument('-r', '--ref', help="ref fasta", required=True)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
chemorun = ChemoRun(args.database, args.probe, args.cancer, args.project, args.output_dir, args.name, args.vcf)
|
chemorun = ChemoRun(args.database, args.probe, args.cancer, args.project, args.output_dir, args.name, args.vcf,
|
||||||
|
args.bam, args.ref)
|
||||||
chemorun.run()
|
chemorun.run()
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,11 @@ workflow pipeline {
|
||||||
probe=probe,
|
probe=probe,
|
||||||
vcf=call_mutation.germline_vcf,
|
vcf=call_mutation.germline_vcf,
|
||||||
cancer=cancer,
|
cancer=cancer,
|
||||||
project=project
|
project=project,
|
||||||
|
normal=normal,
|
||||||
|
tumor_rmdup_bam=alignment.tumor_rmdup_bam,
|
||||||
|
normal_rmdup_bam=alignment.normal_rmdup_bam,
|
||||||
|
ref=ref
|
||||||
}
|
}
|
||||||
|
|
||||||
call neoantigen.call_neoantigen as call_neoantigen {
|
call neoantigen.call_neoantigen as call_neoantigen {
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,14 @@ task run_chemo {
|
||||||
String vcf
|
String vcf
|
||||||
String cancer
|
String cancer
|
||||||
String project
|
String project
|
||||||
|
String bam
|
||||||
|
String ref
|
||||||
command {
|
command {
|
||||||
if [ ! -d ${output_dir}/chemo ];then
|
if [ ! -d ${output_dir}/chemo ];then
|
||||||
mkdir ${output_dir}/chemo
|
mkdir ${output_dir}/chemo
|
||||||
fi
|
fi
|
||||||
|
|
||||||
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
|
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project} -b ${bam} -r ${ref}
|
||||||
|
|
||||||
}
|
}
|
||||||
output {
|
output {
|
||||||
|
|
@ -25,23 +27,45 @@ workflow call_chemo {
|
||||||
Boolean run=true
|
Boolean run=true
|
||||||
|
|
||||||
String name
|
String name
|
||||||
|
String? normal
|
||||||
String output_dir
|
String output_dir
|
||||||
String probe
|
String probe
|
||||||
String vcf
|
String vcf
|
||||||
String cancer
|
String cancer
|
||||||
String project
|
String project
|
||||||
|
String tumor_rmdup_bam
|
||||||
|
String? normal_rmdup_bam
|
||||||
|
String ref
|
||||||
|
|
||||||
if (run) {
|
if (run) {
|
||||||
call run_chemo {
|
if (defined(normal)) {
|
||||||
|
call run_chemo as run_chemo_normal {
|
||||||
input:
|
input:
|
||||||
name=name,
|
name=name,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
probe=probe,
|
probe=probe,
|
||||||
vcf=vcf,
|
vcf=vcf,
|
||||||
cancer=cancer,
|
cancer=cancer,
|
||||||
project=project
|
project=project,
|
||||||
|
bam=normal_rmdup_bam,
|
||||||
|
ref=ref
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!defined(normal)) {
|
||||||
|
call run_chemo as run_chemo_tumor {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
output_dir=output_dir,
|
||||||
|
probe=probe,
|
||||||
|
vcf=vcf,
|
||||||
|
cancer=cancer,
|
||||||
|
project=project,
|
||||||
|
bam=tumor_rmdup_bam,
|
||||||
|
ref=ref
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
|
String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue