需求处理
parent
0a60e18e93
commit
6a22cb8463
|
|
@ -104,26 +104,39 @@ class ChemoRun:
|
|||
alt = record.alts[0]
|
||||
# gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT')))))
|
||||
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
|
||||
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
|
||||
if freq > 0.9:
|
||||
gt = '1/1'
|
||||
elif 0.9 >= freq > 0.1:
|
||||
gt = '0/1'
|
||||
else:
|
||||
gt = '0/0'
|
||||
fliter = pd.concat([fliter, drug_rsid_data[
|
||||
match_drug_rsid_data = drug_rsid_data[
|
||||
(drug_rsid_data['chr'] == chrom) &
|
||||
(drug_rsid_data['end'] == end) &
|
||||
(drug_rsid_data['ref'] == ref) &
|
||||
(drug_rsid_data['alt'] == alt) &
|
||||
(drug_rsid_data['genotype'] == gt)
|
||||
]])
|
||||
]
|
||||
match_drug_rsid_data = match_drug_rsid_data.reset_index()
|
||||
match_drug_rsid_data['chr'] = chrom
|
||||
match_drug_rsid_data['pos'] = end
|
||||
match_drug_rsid_data['freq'] = freq
|
||||
match_drug_rsid_data['depth'] = depth
|
||||
fliter = pd.concat([fliter, match_drug_rsid_data])
|
||||
|
||||
if fliter.empty:
|
||||
fliter = pd.concat([fliter, drug_rsid_data[
|
||||
match_drug_rsid_data = drug_rsid_data[
|
||||
(drug_rsid_data['chr'] == chrom) &
|
||||
(drug_rsid_data['end'] == end) &
|
||||
(drug_rsid_data['genotype'] == '0/0')
|
||||
]])
|
||||
]
|
||||
match_drug_rsid_data = match_drug_rsid_data.reset_index()
|
||||
match_drug_rsid_data['chr'] = chrom
|
||||
match_drug_rsid_data['pos'] = end
|
||||
match_drug_rsid_data['freq'] = '.'
|
||||
match_drug_rsid_data['depth'] = '.'
|
||||
fliter = pd.concat([fliter, match_drug_rsid_data])
|
||||
|
||||
if fliter.empty:
|
||||
raise UserWarning(
|
||||
|
|
@ -136,7 +149,8 @@ class ChemoRun:
|
|||
fliterdata.to_csv(respath, sep='\t', index=False)
|
||||
|
||||
# 分类汇总 同位点,药物合并 drug.infos.txt
|
||||
drugrsid = fliterdata[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']]
|
||||
drugrsid = fliterdata[
|
||||
['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort', 'chr', 'pos', 'freq', 'depth']]
|
||||
drugrsid = drugrsid.drop_duplicates()
|
||||
resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
|
||||
','.join).reset_index()
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@ chomp($head);
|
|||
|
||||
my @columns = split("\t", $head);
|
||||
my $new_head = join("\t", "Validated", "ClinicalSign", @columns[0 .. 6],
|
||||
"Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "genetag", "process");
|
||||
"Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "is_oncogene",
|
||||
"is_tumor_suppressor_gene", "genetag", "process");
|
||||
|
||||
if (!($pipeline eq 'somatic' || $pipeline eq 'tmb' || $pipeline eq 'hotspot' || $pipeline eq 'germline')) {
|
||||
die "useage: pipeline must be 'somatic' or 'germline' or 'hotspot or tmb'";
|
||||
|
|
@ -50,7 +51,9 @@ my @hhr2 = @$hhr2_ref;
|
|||
my @promoter = @$promoter_ref;
|
||||
|
||||
my %transcript = transcript();
|
||||
my %oncogenic = get_oncogenic();
|
||||
my ($oncogenic, $is_oncogene) = get_oncogenic();
|
||||
my %oncogenic = %$oncogenic;
|
||||
my %is_oncogene = %$is_oncogene;
|
||||
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
|
|
@ -144,7 +147,7 @@ while (<IN>) {
|
|||
}
|
||||
elsif ($line[9] eq '.') {
|
||||
# splicing 位点
|
||||
if ($line[5] =~ /splicing/) {
|
||||
if (($line[5] =~ /splicing/) or ($pipeline eq 'hotspot')) {
|
||||
my @hgvs = split(/;/, $line[7]);
|
||||
my $hgvs = $hgvs[0];
|
||||
my $transcript_gene;
|
||||
|
|
@ -166,6 +169,11 @@ while (<IN>) {
|
|||
$hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
|
||||
$line[9] = join(":", ($gene, $hgvs));
|
||||
}
|
||||
# 不是前面2种情况,hotspot强制转换hgvs
|
||||
elsif ($pipeline eq 'hotspot') {
|
||||
print "$hgvs\n";
|
||||
$line[9] = join(":", ($gene, $hgvs));
|
||||
}
|
||||
else {
|
||||
push @reason, 'not_need_spl';
|
||||
}
|
||||
|
|
@ -177,6 +185,9 @@ while (<IN>) {
|
|||
|
||||
}
|
||||
else {
|
||||
if ($line[8] eq 'intron') {
|
||||
push @reason, 'not_need_spl_inron';
|
||||
}
|
||||
my @hgvs = split(/,/, $line[9]);
|
||||
my $hgvs = $hgvs[0];
|
||||
my $transcript_gene;
|
||||
|
|
@ -214,6 +225,13 @@ while (<IN>) {
|
|||
if ((grep {$_ eq $gene} @promoter) and ($pipeline eq 'somatic') and ($gene eq 'TERT')
|
||||
and ($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A')) {
|
||||
@reason = ();
|
||||
if ($line[1] eq '1295228') {
|
||||
$line[9] = 'TERT:NM_198253:/:c.-124C>T (C228T)';
|
||||
}
|
||||
else {
|
||||
$line[9] = 'TERT:NM_198253:/:c.-146C>T (C250T)';
|
||||
}
|
||||
$line[8] = 'promoter';
|
||||
}
|
||||
|
||||
if (@reason) {
|
||||
|
|
@ -221,17 +239,27 @@ while (<IN>) {
|
|||
next;
|
||||
}
|
||||
|
||||
my ($oncogenic_col, $mut_effect_col);
|
||||
my ($oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene);
|
||||
my $get_key = "$gene\_$protein";
|
||||
if (exists $oncogenic{lc $get_key}) {
|
||||
my @get_values = split('&&', $oncogenic{lc $get_key});
|
||||
$oncogenic_col = $get_values[0];
|
||||
$mut_effect_col = $get_values[1];
|
||||
|
||||
}
|
||||
else {
|
||||
$oncogenic_col = '.';
|
||||
$mut_effect_col = '.';
|
||||
}
|
||||
if (exists $is_oncogene{lc $gene}) {
|
||||
my @get_values = split('&&', $is_oncogene{lc $gene});
|
||||
$is_oncogene_gene = $get_values[0];
|
||||
$is_tumor_suppressor_gene = $get_values[1];
|
||||
}
|
||||
else {
|
||||
$is_oncogene_gene = '.';
|
||||
$is_tumor_suppressor_gene = '.';
|
||||
}
|
||||
|
||||
my $clisig;
|
||||
if ($line[16] =~ /Likely_pathogenic|drug/i) {
|
||||
|
|
@ -266,7 +294,9 @@ while (<IN>) {
|
|||
|
||||
$line[6] = $gene;
|
||||
my $genetag = join(";", @genetags);
|
||||
my $new_line = join("\t", $validated, $clisig, @line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], $oncogenic_col, $mut_effect_col, $genetag, $pipeline);
|
||||
my $new_line = join("\t", $validated, $clisig,
|
||||
@line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102],
|
||||
$oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene, $genetag, $pipeline);
|
||||
print OUT "$new_line\n";
|
||||
print TAG_OUT "PASS\t", join("\t", @line), "\n";
|
||||
|
||||
|
|
@ -339,13 +369,16 @@ sub transcript {
|
|||
# oncokb snv_indel 临床意义定义
|
||||
sub get_oncogenic {
|
||||
my %sig;
|
||||
my %sig_gene;
|
||||
open SNV_INDEL, "$database_path/snv_indel_mutation.csv";
|
||||
<SNV_INDEL>;
|
||||
while (<SNV_INDEL>) {
|
||||
chomp;
|
||||
$_ =~ s/\r//g;
|
||||
my @line = split(",");
|
||||
my $key = join("_", @line[0, 1]);
|
||||
$sig{lc $key} = join("&&", @line[2, 3]);
|
||||
$sig{lc $key} = join("&&", @line[2, 3, 7, 8]);
|
||||
$sig_gene{lc $line[0]} = join("&&", @line[7, 8]);
|
||||
}
|
||||
return %sig;
|
||||
return (\%sig, \%sig_gene);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,6 +263,7 @@ class PostProcess:
|
|||
filter_sum_df = filter_sum_df[cols]
|
||||
|
||||
filter_sum_df = filter_sum_df.fillna('.')
|
||||
filter_sum_df = filter_sum_df.sort_values(by='AMP_mut_level')
|
||||
filter_sum_res = filter_sum_df.to_dict('records')
|
||||
|
||||
self.sheet['target_mut'] = filter_sum_res
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue