流程更新
parent
c9f525b2bf
commit
bd66bc6612
|
|
@ -1,4 +1,5 @@
|
||||||
#! /usr/bin/env python3
|
#! /usr/bin/env python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
|
||||||
|
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/python3
|
|
||||||
# -*- coding: UTF-8 -*-
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
if len(sys.argv) != 3:
|
|
||||||
print(" ".join(['usage:python3', sys.argv[0], 'output_dir', 'name']))
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
output_dir = sys.argv[1]
|
|
||||||
name = sys.argv[2]
|
|
||||||
snv_file = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.txt')
|
|
||||||
snv_file_new = os.path.join(output_dir, 'mutation', f'{name}.somatic.hg19_multianno.filter.sum.pos.dedup.txt')
|
|
||||||
|
|
||||||
fusion_file = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.txt')
|
|
||||||
fusion_file_new = os.path.join(output_dir, 'fusion', f'{name}.fusion.hg19_multianno.filter.fusion.pos.dedup.txt')
|
|
||||||
|
|
||||||
cnv_file = os.path.join(output_dir, 'cnv', f'{name}.rmdup.cns.filter.pos.txt')
|
|
||||||
cnv_file_new = os.path.join(output_dir, 'cnvkit', f'{name}.rmdup.cns.filter.pos.dedup.txt')
|
|
||||||
|
|
||||||
# gm_snv_file = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.txt')
|
|
||||||
# gm_snv_file_new = os.path.join(output_dir, '/mutation/', name, '.snvindel.Germline.pos.dedup.txt')
|
|
||||||
|
|
||||||
open(snv_file_new, "w")
|
|
||||||
open(fusion_file_new, "w")
|
|
||||||
open(cnv_file_new, "w")
|
|
||||||
# open(gm_snv_file_new, "w")
|
|
||||||
##Evidence_Source_C及标签排序
|
|
||||||
df_mapping_1 = pd.DataFrame({
|
|
||||||
'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'],
|
|
||||||
})
|
|
||||||
sort_mapping_1 = df_mapping_1.reset_index().set_index('Evidence_Source_C')
|
|
||||||
df_mapping_2 = pd.DataFrame({'标签': ['适应症', '非适应症', '.']})
|
|
||||||
sort_mapping_2 = df_mapping_2.reset_index().set_index('标签')
|
|
||||||
|
|
||||||
##snvindel处理
|
|
||||||
snv_size = os.path.getsize(snv_file)
|
|
||||||
if snv_size > 0:
|
|
||||||
data = pd.read_table(snv_file, sep="\t")
|
|
||||||
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
|
|
||||||
data['level2'] = data['标签'].map(sort_mapping_2['index'])
|
|
||||||
data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True)
|
|
||||||
data.drop(['level1', 'level2'], axis=1, inplace=True)
|
|
||||||
info = {}
|
|
||||||
for index, row in data.iterrows():
|
|
||||||
if re.search(r'敏感', row['Response_Type_C']):
|
|
||||||
if row['标签'] == '适应症':
|
|
||||||
info[row['AAChange.refGene'] + row['Drug']] = '1'
|
|
||||||
else:
|
|
||||||
if (row['AAChange.refGene'] + row['Drug']) in info.keys():
|
|
||||||
data.drop([index], inplace=True)
|
|
||||||
data.insert(0, '可信', 1)
|
|
||||||
data.to_csv(snv_file_new, index=False, sep='\t')
|
|
||||||
|
|
||||||
# ##germline snv/indel处理
|
|
||||||
# gm_snv_size = os.path.getsize(gm_snv_file)
|
|
||||||
# if gm_snv_size > 0:
|
|
||||||
# data = pd.read_table(gm_snv_file, sep="\t")
|
|
||||||
# data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
|
|
||||||
# data['level2'] = data['标签'].map(sort_mapping_2['index'])
|
|
||||||
# data.sort_values(by=['AAChange.refGene', 'level2', 'level1'], ascending=True, inplace=True)
|
|
||||||
# data.drop(['level1', 'level2'], axis=1, inplace=True)
|
|
||||||
# info = {}
|
|
||||||
# for index, row in data.iterrows():
|
|
||||||
# if re.search(r'敏感', row['Response_Type_C']):
|
|
||||||
# if row['标签'] == '适应症':
|
|
||||||
# info[row['AAChange.refGene'] + row['Drug']] = '1'
|
|
||||||
# else:
|
|
||||||
# if (row['AAChange.refGene'] + row['Drug']) in info.keys():
|
|
||||||
# data.drop([index], inplace=True)
|
|
||||||
# data.insert(0, '可信', 1)
|
|
||||||
# data.to_csv(gm_snv_file_new, index=False, sep='\t')
|
|
||||||
|
|
||||||
##fusion处理
|
|
||||||
fusion_size = os.path.getsize(fusion_file)
|
|
||||||
if fusion_size > 0:
|
|
||||||
data = pd.read_table(fusion_file, sep="\t")
|
|
||||||
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
|
|
||||||
data['level2'] = data['标签'].map(sort_mapping_2['index'])
|
|
||||||
data.sort_values(by=['FUSION', 'level2', 'level1'], ascending=True, inplace=True)
|
|
||||||
data.drop(['level1', 'level2'], axis=1, inplace=True)
|
|
||||||
info = {}
|
|
||||||
for index, row in data.iterrows():
|
|
||||||
if re.search(r'敏感', row['Response_Type_C']):
|
|
||||||
if row['标签'] == '适应症':
|
|
||||||
info[row['FUSION'] + row['Drug']] = '1'
|
|
||||||
else:
|
|
||||||
if (row['FUSION'] + row['Drug']) in info.keys():
|
|
||||||
data.drop([index], inplace=True)
|
|
||||||
data.insert(0, '可信', 1)
|
|
||||||
data.to_csv(fusion_file_new, index=False, sep='\t')
|
|
||||||
|
|
||||||
##cnv处理
|
|
||||||
cnv_size = os.path.getsize(cnv_file)
|
|
||||||
if cnv_size > 0:
|
|
||||||
data = pd.read_table(cnv_file, sep="\t")
|
|
||||||
data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index'])
|
|
||||||
data['level2'] = data['标签'].map(sort_mapping_2['index'])
|
|
||||||
data.sort_values(by=['Gene_Symbol', 'level2', 'level1'], ascending=True, inplace=True)
|
|
||||||
data.drop(['level1', 'level2'], axis=1, inplace=True)
|
|
||||||
info = {}
|
|
||||||
for index, row in data.iterrows():
|
|
||||||
if re.search(r'敏感', row['Response_Type_C']):
|
|
||||||
if row['标签'] == '适应症':
|
|
||||||
info[row['Gene_Symbol'] + row['Drug']] = '1'
|
|
||||||
else:
|
|
||||||
if (row['Gene_Symbol'] + row['Drug']) in info.keys():
|
|
||||||
data.drop([index], inplace=True)
|
|
||||||
data.insert(0, '可信', 1)
|
|
||||||
data.to_csv(cnv_file_new, index=False, sep='\t')
|
|
||||||
|
|
@ -30,14 +30,15 @@ while (<IN>) {
|
||||||
}
|
}
|
||||||
my @line = split(/\t/);
|
my @line = split(/\t/);
|
||||||
$line[7] =~ /Gene.refGene=(.*?);/;
|
$line[7] =~ /Gene.refGene=(.*?);/;
|
||||||
if ((grep {$1 =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
|
my $gene = $1;
|
||||||
if ($1 eq "BCL2L11") {
|
if ((grep {$gene =~ /$_/} @longindels) && ($_ =~ /SVTYPE=DEL/ || $_ =~ /SVTYPE=DUP/ || $_ =~ /SVTYPE=INS/)) {
|
||||||
|
if ($gene eq "BCL2L11") {
|
||||||
if ($line[1] == '111883194') {
|
if ($line[1] == '111883194') {
|
||||||
print LONGINDEL join("\n", @pos) . "\n";
|
print LONGINDEL $_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
print LONGINDEL join("\n", @pos) . "\n";
|
print LONGINDEL $_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
#use warnings;
|
#use warnings;
|
||||||
use List::Util qw(sum);
|
use List::Util qw(sum);
|
||||||
|
|
@ -136,7 +137,7 @@ while (<IN>) {
|
||||||
$line[9] = join(":", ($gene, $hgvs));
|
$line[9] = join(":", ($gene, $hgvs));
|
||||||
}
|
}
|
||||||
elsif ($gene eq "MET") {
|
elsif ($gene eq "MET") {
|
||||||
$line[9] = join(":", ($gene, "exon14", "c.xxx"));
|
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
|
||||||
$line[8] = 'skipping'
|
$line[8] = 'skipping'
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use List::Util qw(min max);
|
||||||
|
#max_length:最大的epitope长度
|
||||||
|
die "usage:perl $0 outputDir tumor_prefix" if @ARGV != 2;
|
||||||
|
|
||||||
|
my ($outputDir, $tumor_prefix) = @ARGV;
|
||||||
|
|
||||||
|
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.fasta";
|
||||||
|
my %fa;
|
||||||
|
while (<IN>) {
|
||||||
|
if (/^>MT/) {
|
||||||
|
open OUT, ">$outputDir/neoantigen/MHC_Class_I/tmp.fa";
|
||||||
|
print OUT;
|
||||||
|
$_ =~ /MT\.(\d+)\./;
|
||||||
|
my $id = $1;
|
||||||
|
my $seq = <IN>;
|
||||||
|
print OUT $seq;
|
||||||
|
chomp $seq;
|
||||||
|
$fa{$id} = $seq;
|
||||||
|
system "predict.py -m netchop -n $outputDir/neoantigen/MHC_Class_I/tmp.fa >>$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
|
||||||
|
close OUT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unlink "$outputDir/neoantigen/MHC_Class_I/tmp.fa";
|
||||||
|
|
||||||
|
my %score;
|
||||||
|
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.cleavage.txt";
|
||||||
|
while (<IN>) {
|
||||||
|
next unless /^\d+/;
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
$line[3] =~ /MT\.(\d+)\./;
|
||||||
|
$score{$1}{$line[0]} = $line[2];
|
||||||
|
}
|
||||||
|
open IN, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.tsv";
|
||||||
|
open OUT, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.txt";
|
||||||
|
open OUT2, ">$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
|
||||||
|
#my %neopt;
|
||||||
|
my $head = <IN>;
|
||||||
|
chomp $head;
|
||||||
|
print OUT "$head\tcleavage_score\n";
|
||||||
|
while (<IN>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split(/\t/);
|
||||||
|
$line[44] =~ /^(\d+)\./;
|
||||||
|
my $id = $1;
|
||||||
|
my $pep = $line[18];
|
||||||
|
if (exists $fa{$id}) {
|
||||||
|
my $index = index($fa{$id}, $pep) + length($pep);
|
||||||
|
my $cleavage_score = $score{$id}{$index};
|
||||||
|
print OUT "$_\t$cleavage_score\n";
|
||||||
|
if ($line[21] <= 5000 and ($line[23] eq "NA" or $line[23] >= 1)) {
|
||||||
|
print OUT2 "$_\t$cleavage_score\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT "$_\tNA\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
system "sort -k 22 -n $outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt >$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
|
||||||
|
unlink "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.txt";
|
||||||
|
open SORT, "$outputDir/neoantigen/MHC_Class_I/${tumor_prefix}.all_epitopes.netchop.filter.sort.txt";
|
||||||
|
open OUT3, ">$outputDir/neoantigen/MHC_Class_I/neoantigen.txt";
|
||||||
|
print OUT3 "序号\tHLA分型\t基因\t多肽\t亲和力\t剪切效率\n";
|
||||||
|
my %pep;
|
||||||
|
my $bool = 0;
|
||||||
|
while (<SORT>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split(/\t/);
|
||||||
|
if (not exists $pep{$line[18]}) {
|
||||||
|
$pep{$line[18]}++;
|
||||||
|
$bool += 1;
|
||||||
|
print OUT3 "$bool\t$line[14]\t$line[11]\t$line[18]\t$line[21]\t$line[53]\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -110,7 +110,7 @@ def process_judge_vcf(input_vcf, output_vcf):
|
||||||
if not line.startswith("#"):
|
if not line.startswith("#"):
|
||||||
fields = line.strip().split('\t')
|
fields = line.strip().split('\t')
|
||||||
info = fields[9].split(":")
|
info = fields[9].split(":")
|
||||||
percentage = float(info[4])
|
percentage = float(info[6])
|
||||||
|
|
||||||
if 0.1 <= percentage <= 0.9:
|
if 0.1 <= percentage <= 0.9:
|
||||||
b = 0.5
|
b = 0.5
|
||||||
|
|
@ -135,6 +135,9 @@ def merge_and_sort_files(matched_file, unmatched_file, output_file):
|
||||||
return output_file
|
return output_file
|
||||||
|
|
||||||
# 如果 unmatched_file 不为空,继续合并和排序操作
|
# 如果 unmatched_file 不为空,继续合并和排序操作
|
||||||
|
if os.stat(matched_file).st_size == 0:
|
||||||
|
matched_df = pd.DataFrame()
|
||||||
|
else:
|
||||||
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
matched_df = pd.read_csv(matched_file, sep='\t', header=None)
|
||||||
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
|
unmatched_df = pd.read_csv(unmatched_file, sep='\t', header=None)
|
||||||
|
|
||||||
|
|
@ -195,7 +198,7 @@ def select_cnvkit_vcf(vcf, bed, output_file):
|
||||||
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
line.split()[1] == str(position_list[i]) and line.split()[0] == str(chr_list[i]) and len(
|
||||||
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
line.split()[3]) < 2 and len(line.split()[4]) < 2]
|
||||||
for line in filtered_lines:
|
for line in filtered_lines:
|
||||||
p_value_str = line.split()[9].split(":")[4]
|
p_value_str = line.split()[9].split(":")[6]
|
||||||
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
p_value = float(p_value_str[:-1]) / 100 if p_value_str[-1] == "%" else float(p_value_str)
|
||||||
if 0.1 <= p_value <= 0.9:
|
if 0.1 <= p_value <= 0.9:
|
||||||
result_data.append(line)
|
result_data.append(line)
|
||||||
|
|
@ -218,7 +221,7 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed,
|
||||||
# 处理胚系,根据bed筛选
|
# 处理胚系,根据bed筛选
|
||||||
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
|
select_position_output_file3 = os.path.join(output_dir, f'{name}_germline_matched.vcf')
|
||||||
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
|
select_position_output_file4 = os.path.join(output_dir, f'{name}_germline_unmatched.vcf')
|
||||||
Germline_matched_file, Germline_unmatched_file = select_position(germline_vcf, ref_bed,
|
germline_matched_file, germline_unmatched_file = select_position(germline_vcf, ref_bed,
|
||||||
select_position_output_file3,
|
select_position_output_file3,
|
||||||
select_position_output_file4)
|
select_position_output_file4)
|
||||||
# 处理体系,数值转换
|
# 处理体系,数值转换
|
||||||
|
|
@ -226,14 +229,14 @@ def paired_monitoring(name, somatic_vcf, germline_vcf, ref_bed, cnvkit_ref_bed,
|
||||||
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
|
somatic_matched_add_judge_file = process_judge_vcf(somatic_matched_file, process_judge_vcf_file1)
|
||||||
# 处理胚系,数值转换
|
# 处理胚系,数值转换
|
||||||
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
|
process_judge_vcf_file2 = os.path.join(output_dir, f'{name}_germline_matched_add_judge.vcf')
|
||||||
germline_matched_add_judge_file = process_judge_vcf(Germline_matched_file, process_judge_vcf_file2)
|
germline_matched_add_judge_file = process_judge_vcf(germline_matched_file, process_judge_vcf_file2)
|
||||||
# 合并体系,将匹配到的和未匹配到bed的的合并
|
# 合并体系,将匹配到的和未匹配到bed的的合并
|
||||||
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
|
merge_and_sort_files_file1 = os.path.join(output_dir, f'{name}_somatic_merged.vcf')
|
||||||
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
|
somatic_merged_file = merge_and_sort_files(somatic_matched_add_judge_file, somatic_unmatched_file,
|
||||||
merge_and_sort_files_file1)
|
merge_and_sort_files_file1)
|
||||||
# 合并胚系,将匹配到的和未匹配到bed的的合并
|
# 合并胚系,将匹配到的和未匹配到bed的的合并
|
||||||
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
|
merge_and_sort_files_file2 = os.path.join(output_dir, f'{name}_germline__merged.vcf')
|
||||||
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, Germline_unmatched_file,
|
Germline_merged_file = merge_and_sort_files(germline_matched_add_judge_file, germline_unmatched_file,
|
||||||
merge_and_sort_files_file2)
|
merge_and_sort_files_file2)
|
||||||
# 合并胚系,体系,将体系,胚系两个合并文件再合并
|
# 合并胚系,体系,将体系,胚系两个合并文件再合并
|
||||||
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')
|
result_pro_file = os.path.join(output_dir, f'{name}_result_pro.txt')
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ class PostProcess:
|
||||||
def txt_2_excel(path):
|
def txt_2_excel(path):
|
||||||
try:
|
try:
|
||||||
df = pd.read_csv(path, sep='\t')
|
df = pd.read_csv(path, sep='\t')
|
||||||
except pd.errors.EmptyDataError:
|
except (pd.errors.EmptyDataError, FileNotFoundError):
|
||||||
return []
|
return []
|
||||||
return df.to_dict('records')
|
return df.to_dict('records')
|
||||||
|
|
||||||
|
|
@ -179,6 +179,8 @@ class PostProcess:
|
||||||
filter_neg = os.path.join(self.path, 'mutation',
|
filter_neg = os.path.join(self.path, 'mutation',
|
||||||
f'{self.sample_name}.snp_indel.somatic.hg19_multianno.filter.sum.neg.txt')
|
f'{self.sample_name}.snp_indel.somatic.hg19_multianno.filter.sum.neg.txt')
|
||||||
|
|
||||||
|
tmb_file = os.path.join(self.path, 'tmb', f'{self.sample_name}.tmb.txt')
|
||||||
|
|
||||||
filter_sum_pos_res = list()
|
filter_sum_pos_res = list()
|
||||||
# 从pos_files中获取药物信息
|
# 从pos_files中获取药物信息
|
||||||
pos_check = check_file_exist_and_empty(filter_pos)
|
pos_check = check_file_exist_and_empty(filter_pos)
|
||||||
|
|
@ -219,7 +221,7 @@ class PostProcess:
|
||||||
neg['AMP_mut_level'] = 'IIII'
|
neg['AMP_mut_level'] = 'IIII'
|
||||||
neg_dict = neg.set_index(['Chr', 'Start', 'End'])['AMP_mut_level'].to_dict()
|
neg_dict = neg.set_index(['Chr', 'Start', 'End'])['AMP_mut_level'].to_dict()
|
||||||
|
|
||||||
filter_sum_res = list()
|
filter_sum_df = pd.DataFrame()
|
||||||
filter_sum_check = check_file_exist_and_empty(filter_sum)
|
filter_sum_check = check_file_exist_and_empty(filter_sum)
|
||||||
if not filter_sum_check:
|
if not filter_sum_check:
|
||||||
filter_sum_df = pd.read_csv(filter_sum, sep='\t')
|
filter_sum_df = pd.read_csv(filter_sum, sep='\t')
|
||||||
|
|
@ -228,6 +230,31 @@ class PostProcess:
|
||||||
level_dict.update(vus_dict)
|
level_dict.update(vus_dict)
|
||||||
level_dict.update(neg_dict)
|
level_dict.update(neg_dict)
|
||||||
filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['Chr', 'Start', 'End']).index.map(level_dict)
|
filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['Chr', 'Start', 'End']).index.map(level_dict)
|
||||||
|
|
||||||
|
cols = list(filter_sum_df.columns)
|
||||||
|
|
||||||
|
tmb_file_check = check_file_exist_and_empty(tmb_file)
|
||||||
|
if not tmb_file_check:
|
||||||
|
tmb_df = pd.read_csv(tmb_file, sep='\t')
|
||||||
|
key_cols = ['Chr', 'Start', 'End']
|
||||||
|
filter_sum_df = filter_sum_df.set_index(key_cols)
|
||||||
|
tmb_df = tmb_df.set_index(key_cols)
|
||||||
|
|
||||||
|
# 在filter_sum_df中的process列中追加字符串";tmb",对应tmb_df中的行 并且 是非 1,2类突变
|
||||||
|
filter_sum_df['process'] = filter_sum_df.index.map(
|
||||||
|
lambda x: filter_sum_df.at[x, 'process'] + ';tmb' if x in tmb_df.index and filter_sum_df.at[
|
||||||
|
x, 'AMP_mut_level'] not in ['I', 'II'] else filter_sum_df.at[x, 'process'])
|
||||||
|
|
||||||
|
# 找到tmb_df中不在filter_sum_df中的行,并将这些新的行添加到filter_sum_df中
|
||||||
|
new_rows = tmb_df[~tmb_df.index.isin(filter_sum_df.index)]
|
||||||
|
filter_sum_df = pd.concat([filter_sum_df, new_rows])
|
||||||
|
|
||||||
|
# 重置索引
|
||||||
|
filter_sum_df = filter_sum_df.reset_index()
|
||||||
|
|
||||||
|
# 按之前列排
|
||||||
|
filter_sum_df = filter_sum_df[cols]
|
||||||
|
|
||||||
filter_sum_df = filter_sum_df.fillna('.')
|
filter_sum_df = filter_sum_df.fillna('.')
|
||||||
filter_sum_res = filter_sum_df.to_dict('records')
|
filter_sum_res = filter_sum_df.to_dict('records')
|
||||||
|
|
||||||
|
|
@ -429,11 +456,62 @@ class PostProcess:
|
||||||
print(file_check)
|
print(file_check)
|
||||||
|
|
||||||
def longindel(self):
|
def longindel(self):
|
||||||
longindel_files = glob.glob(
|
|
||||||
os.path.join(self.path, 'fusion', '*.longindel.pos.txt'))
|
filter_sum_pos = os.path.join(self.path, 'fusion',
|
||||||
if longindel_files:
|
f'{self.sample_name}.longindel.hg19_multianno.filter.pos.txt')
|
||||||
# return self.txt_2_excel(longindel_files[0])
|
filter_sum_pos_check = check_file_exist_and_empty(filter_sum_pos)
|
||||||
self.sheet['longindel'] = self.txt_2_excel(longindel_files[0])
|
|
||||||
|
filter_sum_pos_res = list()
|
||||||
|
pos_dict = dict()
|
||||||
|
|
||||||
|
if not filter_sum_pos_check:
|
||||||
|
filter_sum_pos_df = pd.read_csv(filter_sum_pos, sep='\t')
|
||||||
|
# 添加常规列
|
||||||
|
filter_sum_pos_df = self._add_columns(filter_sum_pos_df)
|
||||||
|
# 添加基因功能
|
||||||
|
filter_sum_pos_df = self._add_gene_function(filter_sum_pos_df, colname='ref_gene')
|
||||||
|
# 药物处理
|
||||||
|
self.drug_parse(filter_sum_pos_df['DrugCn'].to_list())
|
||||||
|
filter_sum_pos_df['Validated'] = 1
|
||||||
|
filter_sum_pos_df = filter_sum_pos_df.fillna('.')
|
||||||
|
|
||||||
|
grouped_df = filter_sum_pos_df.groupby(['#CHROM', 'POS', 'REF', 'ALT'])
|
||||||
|
# 对每个分组进行操作
|
||||||
|
for group_name, group_data in grouped_df:
|
||||||
|
chr, pos, ref, alt = group_name
|
||||||
|
if any(group_data['AMP_mut_level'] == 'I'):
|
||||||
|
filter_condition = (filter_sum_pos_df['#CHROM'] == chr) & \
|
||||||
|
(filter_sum_pos_df['POS'] == pos) & \
|
||||||
|
(filter_sum_pos_df['REF'] == ref) & \
|
||||||
|
(filter_sum_pos_df['ALT'] == alt)
|
||||||
|
filter_sum_pos_df.loc[filter_condition, 'AMP_mut_level'] = 'I'
|
||||||
|
|
||||||
|
pos_dict = filter_sum_pos_df.set_index(['#CHROM', 'POS', 'REF', 'ALT'])['AMP_mut_level'].to_dict()
|
||||||
|
filter_sum_pos_res = filter_sum_pos_df.to_dict('records')
|
||||||
|
|
||||||
|
filter_sum = os.path.join(self.path, 'fusion', f'{self.sample_name}.longindel.hg19_multianno.filter.txt')
|
||||||
|
filter_sum_check = check_file_exist_and_empty(filter_sum)
|
||||||
|
filter_sum_res = list()
|
||||||
|
if not filter_sum_check:
|
||||||
|
filter_sum_df = pd.read_csv(filter_sum, sep='\t')
|
||||||
|
|
||||||
|
filter_sum_df['Validated'] = 1
|
||||||
|
level_dict = dict()
|
||||||
|
level_dict.update(pos_dict)
|
||||||
|
filter_sum_df['AMP_mut_level'] = filter_sum_df.set_index(['#CHROM', 'POS', 'REF', 'ALT']).index.map(
|
||||||
|
level_dict)
|
||||||
|
filter_sum_df = filter_sum_df.fillna('.')
|
||||||
|
filter_sum_res = filter_sum_df.to_dict('records')
|
||||||
|
|
||||||
|
self.sheet['longindel_mut'] = filter_sum_res
|
||||||
|
self.sheet['longindel_drug'] = filter_sum_pos_res
|
||||||
|
|
||||||
|
def neoantigen(self):
|
||||||
|
neoantigen = os.path.join(self.path, 'neoantigen', f'MHC_Class_I', 'neoantigen.txt')
|
||||||
|
hla = os.path.join(self.path, 'neoantigen', f'hla', f'{self.normal_name}_result.tsv')
|
||||||
|
|
||||||
|
self.sheet['neoantigen'] = self.txt_2_excel(neoantigen)
|
||||||
|
self.sheet['hla'] = self.txt_2_excel(hla)
|
||||||
|
|
||||||
def qc(self):
|
def qc(self):
|
||||||
qc_files = glob.glob(os.path.join(self.path, 'qc', '*_qc.txt'))
|
qc_files = glob.glob(os.path.join(self.path, 'qc', '*_qc.txt'))
|
||||||
|
|
@ -452,17 +530,18 @@ class PostProcess:
|
||||||
def collect(self):
|
def collect(self):
|
||||||
writer = pd.ExcelWriter(self.outpath)
|
writer = pd.ExcelWriter(self.outpath)
|
||||||
self.cms()
|
self.cms()
|
||||||
|
self.qc()
|
||||||
self.snv()
|
self.snv()
|
||||||
self.fusion()
|
self.fusion()
|
||||||
|
self.longindel()
|
||||||
self.cnv()
|
self.cnv()
|
||||||
self.msi()
|
self.msi()
|
||||||
self.germline()
|
self.germline()
|
||||||
self.heredity()
|
self.heredity()
|
||||||
self.heredity_res()
|
self.heredity_res()
|
||||||
self.longindel()
|
|
||||||
self.chemo()
|
self.chemo()
|
||||||
self.indication()
|
self.indication()
|
||||||
self.qc()
|
self.neoantigen()
|
||||||
self.drugs()
|
self.drugs()
|
||||||
|
|
||||||
# 遍历CSV文件列表
|
# 遍历CSV文件列表
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
|
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
|
||||||
|
|
||||||
|
|
@ -42,13 +43,15 @@ if __name__ == '__main__':
|
||||||
if not os.path.exists(res_path):
|
if not os.path.exists(res_path):
|
||||||
os.makedirs(res_path)
|
os.makedirs(res_path)
|
||||||
|
|
||||||
|
logname = datetime.now().strftime("%m%d%H%M")
|
||||||
|
|
||||||
cmd = f'nohup python ' \
|
cmd = f'nohup python ' \
|
||||||
f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \
|
f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \
|
||||||
f'{"-u " if args.umi else ""} -i {args.input_dir} ' \
|
f'{"-u " if args.umi else ""} -i {args.input_dir} ' \
|
||||||
f'-node {args.start_node} ' \
|
f'-node {args.start_node} ' \
|
||||||
f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \
|
f'-o {res_path} -b {args.probe} -p {args.project} -c {args.cancer} -w {args.wdl} ' \
|
||||||
f'> {res_path}/{args.barcode}_run.log ' \
|
f'> {res_path}/{args.barcode}_{logname}_run.log ' \
|
||||||
f'2>> {res_path}/{args.barcode}_run.log &'
|
f'2>> {res_path}/{args.barcode}_{logname}_run.log &'
|
||||||
# with open(os.path.join(res_path, 'exec'), 'w') as execfile:
|
# with open(os.path.join(res_path, 'exec'), 'w') as execfile:
|
||||||
# execfile.write(cmd + '\n')
|
# execfile.write(cmd + '\n')
|
||||||
os.system(cmd)
|
os.system(cmd)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
@ -80,7 +81,8 @@ def run(barcode, normal, umi, input_dir, output_dir, project, cancer, probe, wdl
|
||||||
arg = {key: value for key, value in arg.items() if value not in (None, '', False)}
|
arg = {key: value for key, value in arg.items() if value not in (None, '', False)}
|
||||||
|
|
||||||
# generate json
|
# generate json
|
||||||
jsfile_path = os.path.join(output_dir, f'{barcode}.json')
|
logname = datetime.now().strftime("%m%d%H%M")
|
||||||
|
jsfile_path = os.path.join(output_dir, f'{barcode}_{logname}.json')
|
||||||
with open(jsfile_path, 'w') as jsfile:
|
with open(jsfile_path, 'w') as jsfile:
|
||||||
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
|
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
@ -127,7 +129,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-p', '--project', help="project", required=True)
|
parser.add_argument('-p', '--project', help="project", required=True)
|
||||||
parser.add_argument('-c', '--cancer', help="cancer", required=True)
|
parser.add_argument('-c', '--cancer', help="cancer", required=True)
|
||||||
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
parser.add_argument('-b', '--probe', help="probe, 682, 624, 160, 17 for now ", required=True)
|
||||||
parser.add_argument('-w', '--wdl', help="wdl", default='/home/zhangchao/project/pipeline/workflow/pipeline.wdl')
|
parser.add_argument('-w', '--wdl', help="wdl", default='$WORKFLOW/pipeline.wdl')
|
||||||
parser.add_argument('-node', '--start_node',
|
parser.add_argument('-node', '--start_node',
|
||||||
help="node begain to run; 'addQc', 'addAlignment', "
|
help="node begain to run; 'addQc', 'addAlignment', "
|
||||||
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
|
"'addTarget', 'addFusion', 'addCnv', 'addMsi', 'addChemo',"
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,10 @@ die "useage:perl $0 input output cancer_type" unless @ARGV == 3;
|
||||||
my ($input, $output, $cancer_type) = @ARGV;
|
my ($input, $output, $cancer_type) = @ARGV;
|
||||||
|
|
||||||
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public";
|
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public";
|
||||||
print "Fusion药物注释使用public路径:$public_path\n";
|
print "Longindel药物注释使用public路径:$public_path\n";
|
||||||
|
|
||||||
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
||||||
print "Fusion药物注释使用路径:$database_path\n";
|
print "Longindel药物注释使用路径:$database_path\n";
|
||||||
|
|
||||||
open MUT, "$database_path/fusion.csv";
|
open MUT, "$database_path/fusion.csv";
|
||||||
<MUT>;
|
<MUT>;
|
||||||
|
|
@ -26,7 +26,8 @@ my %therapy;
|
||||||
while (<THERAPY>) {
|
while (<THERAPY>) {
|
||||||
chomp;
|
chomp;
|
||||||
my @line = split("\t");
|
my @line = split("\t");
|
||||||
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
|
# push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
|
||||||
|
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
|
||||||
}
|
}
|
||||||
|
|
||||||
##药物翻译信息
|
##药物翻译信息
|
||||||
|
|
@ -106,23 +107,38 @@ while (<IN>) {
|
||||||
my @splitline = split(/\t/);
|
my @splitline = split(/\t/);
|
||||||
|
|
||||||
my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7];
|
my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7];
|
||||||
my $gene;
|
|
||||||
if ($_ =~ /Gene\.refGene=([^;]+)/) {
|
|
||||||
$gene = $1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) {
|
if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) {
|
||||||
|
print "$freq\n";
|
||||||
foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) {
|
foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) {
|
||||||
|
|
||||||
my @line = split("\t", $entry);
|
my @line = split("\t", $entry);
|
||||||
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) {
|
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
||||||
|
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
}
|
}
|
||||||
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) {
|
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
||||||
|
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
}
|
}
|
||||||
elsif ($line[2] =~ /$cancer_type|solid tumor/i) {
|
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
||||||
|
# push @pos, "$_\t.\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# my @line = split("\t", $entry);
|
||||||
|
# if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] =~ /$cancer_type|solid tumor/i) {
|
||||||
|
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
|
# }
|
||||||
|
# elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and $line[2] !~ /$cancer_type|solid tumor/i) {
|
||||||
|
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
|
# }
|
||||||
|
# elsif ($line[2] =~ /$cancer_type|solid tumor/i) {
|
||||||
|
# push @pos, "$_\tc\.394+1479_394+4381del\tBCL2L11:NM_001204106:intron2:c\.394+1479_394+4381del\t" . $freq . "\t" . join("\t", @line[0 .. 9]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
||||||
|
# }
|
||||||
|
# else {
|
||||||
|
# print "未匹配到"
|
||||||
|
# }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
import "./wdl/qc.wdl"
|
import "./wdl/qc.wdl"
|
||||||
import "./wdl/alignment.wdl"
|
import "./wdl/alignment.wdl"
|
||||||
import "./wdl/call_mutation.wdl"
|
import "./wdl/call_mutation.wdl"
|
||||||
|
|
@ -199,6 +200,7 @@ workflow pipeline {
|
||||||
msi=call_msi.msi_txt,
|
msi=call_msi.msi_txt,
|
||||||
hereditary=call_hereditary.hereditary_txt,
|
hereditary=call_hereditary.hereditary_txt,
|
||||||
chemo=call_chemo.chemo_res,
|
chemo=call_chemo.chemo_res,
|
||||||
|
neoantigen=call_neoantigen.neoantigen_txt,
|
||||||
pollution=call_pollution.pollution_res,
|
pollution=call_pollution.pollution_res,
|
||||||
name=tumor,
|
name=tumor,
|
||||||
normal=normal,
|
normal=normal,
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task mutation_calling_umi {
|
task mutation_calling_umi {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_chemo {
|
task run_chemo {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
@ -10,11 +11,12 @@ task run_chemo {
|
||||||
if [ ! -d ${output_dir}/chemo ];then
|
if [ ! -d ${output_dir}/chemo ];then
|
||||||
mkdir ${output_dir}/chemo
|
mkdir ${output_dir}/chemo
|
||||||
fi
|
fi
|
||||||
|
|
||||||
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
|
chemo.py -d $DATABASE/chemo_database.xlsx -probe ${probe} -n ${name} -v ${vcf} -o ${output_dir}/chemo -c ${cancer} -p ${project}
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
|
String run_chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task cnv_single {
|
task cnv_single {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_hereditary {
|
task run_hereditary {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
@ -8,6 +9,7 @@ task run_hereditary {
|
||||||
if [ ! -d ${output_dir}/hereditary ];then
|
if [ ! -d ${output_dir}/hereditary ];then
|
||||||
mkdir ${output_dir}/hereditary
|
mkdir ${output_dir}/hereditary
|
||||||
fi
|
fi
|
||||||
|
|
||||||
hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
|
hereditary.py -d $DATABASE/hereditary_database.xlsx -p ${project} -n ${name} -f ${filter_txt} -o ${output_dir}/hereditary
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task msi_single {
|
task msi_single {
|
||||||
String name
|
String name
|
||||||
String bed
|
String bed
|
||||||
|
|
@ -18,7 +19,7 @@ task msi_single {
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String msi_txt = "${output_dir}/msi/${name}.msi.txt"
|
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -44,7 +45,7 @@ task msi_paired {
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String msi_txt = "${output_dir}/msi/${name}.msi.txt"
|
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_neoantigen {
|
task run_neoantigen {
|
||||||
String tumor
|
String tumor
|
||||||
String? normal
|
String? normal
|
||||||
|
|
@ -12,7 +13,7 @@ task run_neoantigen {
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${output_dir}/neoantigen/hla ];then
|
if [ ! -d ${output_dir}/neoantigen/hla ];then
|
||||||
mkdir ${output_dir}/neoantigen/hla
|
mkdir -p ${output_dir}/neoantigen/hla
|
||||||
fi
|
fi
|
||||||
|
|
||||||
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
|
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${output_dir}/neoantigen/hla/fished_1.bam \
|
||||||
|
|
@ -145,7 +146,7 @@ task run_neoantigen {
|
||||||
/data
|
/data
|
||||||
|
|
||||||
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
|
dos2unix ${output_dir}/neoantigen/MHC_Class_I/*.all_epitopes.tsv
|
||||||
# perl ${output_dir}/netchop.pl $outputDir $name $tumor $max_peptide_length
|
netchop.pl ${output_dir} ${tumor}
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -178,4 +179,8 @@ workflow call_neoantigen {
|
||||||
sample_type=if umi then 'c' else 't'
|
sample_type=if umi then 'c' else 't'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
output {
|
||||||
|
String neoantigen_txt = "${output_dir}neoantigen/MHC_Class_I/neoantigen.txt"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_pollution {
|
task run_pollution {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
@ -20,7 +21,7 @@ task run_pollution {
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
|
String run_pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_post {
|
task run_post {
|
||||||
String? mutation
|
String? mutation
|
||||||
String? fusion
|
String? fusion
|
||||||
|
|
@ -5,6 +6,7 @@ task run_post {
|
||||||
String? msi
|
String? msi
|
||||||
String? hereditary
|
String? hereditary
|
||||||
String? chemo
|
String? chemo
|
||||||
|
String? neoantigen
|
||||||
String? pollution
|
String? pollution
|
||||||
String name
|
String name
|
||||||
String? normal
|
String? normal
|
||||||
|
|
@ -22,7 +24,7 @@ task run_post {
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String merged = "${output_dir}/report/${name}.merged_file.xlsx"
|
String run_merged = "${output_dir}/report/${name}.merged_file.xlsx"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -37,6 +39,7 @@ workflow call_postprocess {
|
||||||
String? hereditary
|
String? hereditary
|
||||||
String? pollution
|
String? pollution
|
||||||
String? chemo
|
String? chemo
|
||||||
|
String? neoantigen
|
||||||
String name
|
String name
|
||||||
String? normal
|
String? normal
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
@ -52,6 +55,7 @@ workflow call_postprocess {
|
||||||
msi=msi,
|
msi=msi,
|
||||||
hereditary=hereditary,
|
hereditary=hereditary,
|
||||||
chemo=chemo,
|
chemo=chemo,
|
||||||
|
neoantigen=neoantigen,
|
||||||
pollution=pollution,
|
pollution=pollution,
|
||||||
name=name,
|
name=name,
|
||||||
normal=normal,
|
normal=normal,
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task runqc {
|
task runqc {
|
||||||
String name
|
String name
|
||||||
String input_dir
|
String input_dir
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_statistics {
|
task run_statistics {
|
||||||
String name
|
String name
|
||||||
String output_dir
|
String output_dir
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
task run_tmb {
|
task run_tmb {
|
||||||
String name
|
String name
|
||||||
String file
|
String file
|
||||||
|
|
@ -19,7 +20,7 @@ task run_tmb {
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String tmb_txt = "${output_dir}/tmb/${name}.tmb.txt"
|
String run_tmb_txt = "${output_dir}/tmb/${name}.tmb.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -50,7 +51,7 @@ workflow call_tmb {
|
||||||
name=name,
|
name=name,
|
||||||
file=file,
|
file=file,
|
||||||
project=project,
|
project=project,
|
||||||
sample_type='c',
|
sample_type='t',
|
||||||
output_dir=output_dir
|
output_dir=output_dir
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue