化疗修改,不在vcf中出现的从bam文件中获取深度

master
chaopower 2024-02-20 15:13:06 +08:00
parent 39f51ccf2e
commit bf1d9de10e
3 changed files with 97 additions and 46 deletions

View File

@ -10,7 +10,7 @@ import pysam
class ChemoRun:
def __init__(self, database, probe, cancer, project, output_dir, name, vcf):
def __init__(self, database, probe, cancer, project, output_dir, name, vcf, bam, ref):
self.database = database
self.probe = probe
self.output_dir = output_dir
@ -18,6 +18,8 @@ class ChemoRun:
self.vcf = vcf
self.cancer = cancer
self.project = project
self.bam_file = pysam.AlignmentFile(bam, "rb")
self.ref_file = pysam.FastaFile(ref)
@staticmethod
def read_info(project):
@ -62,6 +64,22 @@ class ChemoRun:
records[(chrom, pos)] = record
return records
def get_position_depth(self, chr, pos):
"""
获取位点测序深度 >=30X
"""
depths = self.bam_file.count_coverage(chr, start=pos - 1, stop=pos)
depth_sum = sum([sum(arr) for arr in depths])
return depth_sum
def get_ref_base(self, chr, pos):
"""
获取参考基因组
"""
base = self.ref_file.fetch(chr, pos - 1, pos)
return base
def parsedata(self):
"""
处理chemo_drug_rsid_snppos.xlsx, 并生成对应bed文件
@ -97,20 +115,33 @@ class ChemoRun:
for _, row in beddata.iterrows():
chrom = row['chr']
end = row['end']
fliter = pd.DataFrame()
# bed 文件在 vcf 中
if (chrom, end) in records:
record = records[(chrom, end)]
ref = record.ref
alt = record.alts[0]
# gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT')))))
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
if freq > 0.9:
gt = '1/1'
elif 0.9 >= freq > 0.1:
gt = '0/1'
else:
gt = '0/0'
# 不在vcf中 去bam文件中获取该位点的测序深度 去ref 获取碱基 没有的就为NA了
else:
depth = self.get_position_depth(chrom, end)
ref = self.get_ref_base(chrom, end)
alt = ref
freq = 1
gt = '0/0'
# 测序深度小于30 排除
if depth < 30:
alt = 'NA'
freq = 'NA'
gt = 'NA'
match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) &
@ -119,33 +150,22 @@ class ChemoRun:
(drug_rsid_data['genotype'] == gt)
]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
if match_drug_rsid_data.empty:
match_drug_rsid_data = pd.DataFrame([
dict(
ref=ref,
alt=alt,
genotype=gt
)
])
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = freq
match_drug_rsid_data['depth'] = depth
fliter = pd.concat([fliter, match_drug_rsid_data])
fliterdata = pd.concat([fliterdata, match_drug_rsid_data])
if fliter.empty:
match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) &
(drug_rsid_data['genotype'] == '0/0')
]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = '.'
match_drug_rsid_data['depth'] = '.'
fliter = pd.concat([fliter, match_drug_rsid_data])
if fliter.empty:
raise UserWarning(
'chr: %s , end: %s 数据库未能匹配, 野生型0/0也未能匹配' % (chrom, end))
fliterdata = pd.concat([fliterdata, fliter])
# 生成过滤之后文件
respath = os.path.join(self.output_dir, f'{self.name}.chemo.res.csv')
if not fliterdata.empty:
fliterdata['probe'] = self.probe
respath = os.path.join(self.output_dir, f'{self.name}.chemo.res.txt')
fliterdata.to_csv(respath, sep='\t', index=False)
# 分类汇总 同位点,药物合并 drug.infos.txt
@ -256,6 +276,9 @@ if __name__ == "__main__":
parser.add_argument('-c', '--cancer', help="cancer", required=True)
parser.add_argument('-p', '--project', help="Project", required=True)
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='')
parser.add_argument('-b', '--bam', help="Bamfile for sample", required=True)
parser.add_argument('-r', '--ref', help="ref fasta", required=True)
args = parser.parse_args()
chemorun = ChemoRun(args.database, args.probe, args.cancer, args.project, args.output_dir, args.name, args.vcf)
chemorun = ChemoRun(args.database, args.probe, args.cancer, args.project, args.output_dir, args.name, args.vcf,
args.bam, args.ref)
chemorun.run()

View File

@ -166,7 +166,11 @@ workflow pipeline {
probe=probe,
vcf=call_mutation.germline_vcf,
cancer=cancer,
project=project
project=project,
normal=normal,
tumor_rmdup_bam=alignment.tumor_rmdup_bam,
normal_rmdup_bam=alignment.normal_rmdup_bam,
ref=ref
}
call neoantigen.call_neoantigen as call_neoantigen {

View File

@ -7,12 +7,14 @@ task run_chemo {
String vcf
String cancer
String project
String bam
String ref
command {
if [ ! -d ${output_dir}/chemo ];then
mkdir ${output_dir}/chemo
fi
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project} -b ${bam} -r ${ref}
}
output {
@ -25,23 +27,45 @@ workflow call_chemo {
Boolean run=true
String name
String? normal
String output_dir
String probe
String vcf
String cancer
String project
String tumor_rmdup_bam
String? normal_rmdup_bam
String ref
if (run) {
call run_chemo {
if (defined(normal)) {
call run_chemo as run_chemo_normal {
input:
name=name,
output_dir=output_dir,
probe=probe,
vcf=vcf,
cancer=cancer,
project=project
project=project,
bam=normal_rmdup_bam,
ref=ref
}
}
if (!defined(normal)) {
call run_chemo as run_chemo_tumor {
input:
name=name,
output_dir=output_dir,
probe=probe,
vcf=vcf,
cancer=cancer,
project=project,
bam=tumor_rmdup_bam,
ref=ref
}
}
}
output {
String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"