需求处理

master
chaopower 2024-01-29 13:03:17 +08:00
parent 0a60e18e93
commit 6a22cb8463
4 changed files with 66 additions and 18 deletions

View File

@ -104,26 +104,39 @@ class ChemoRun:
alt = record.alts[0] alt = record.alts[0]
# gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT'))))) # gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT')))))
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0] freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
if freq > 0.9: if freq > 0.9:
gt = '1/1' gt = '1/1'
elif 0.9 >= freq > 0.1: elif 0.9 >= freq > 0.1:
gt = '0/1' gt = '0/1'
else: else:
gt = '0/0' gt = '0/0'
fliter = pd.concat([fliter, drug_rsid_data[ match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) & (drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) & (drug_rsid_data['end'] == end) &
(drug_rsid_data['ref'] == ref) & (drug_rsid_data['ref'] == ref) &
(drug_rsid_data['alt'] == alt) & (drug_rsid_data['alt'] == alt) &
(drug_rsid_data['genotype'] == gt) (drug_rsid_data['genotype'] == gt)
]]) ]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = freq
match_drug_rsid_data['depth'] = depth
fliter = pd.concat([fliter, match_drug_rsid_data])
if fliter.empty: if fliter.empty:
fliter = pd.concat([fliter, drug_rsid_data[ match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) & (drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) & (drug_rsid_data['end'] == end) &
(drug_rsid_data['genotype'] == '0/0') (drug_rsid_data['genotype'] == '0/0')
]]) ]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = '.'
match_drug_rsid_data['depth'] = '.'
fliter = pd.concat([fliter, match_drug_rsid_data])
if fliter.empty: if fliter.empty:
raise UserWarning( raise UserWarning(
@ -136,7 +149,8 @@ class ChemoRun:
fliterdata.to_csv(respath, sep='\t', index=False) fliterdata.to_csv(respath, sep='\t', index=False)
# 分类汇总 同位点,药物合并 drug.infos.txt # 分类汇总 同位点,药物合并 drug.infos.txt
drugrsid = fliterdata[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']] drugrsid = fliterdata[
['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort', 'chr', 'pos', 'freq', 'depth']]
drugrsid = drugrsid.drop_duplicates() drugrsid = drugrsid.drop_duplicates()
resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg( resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
','.join).reset_index() ','.join).reset_index()

View File

@ -30,7 +30,8 @@ chomp($head);
my @columns = split("\t", $head); my @columns = split("\t", $head);
my $new_head = join("\t", "Validated", "ClinicalSign", @columns[0 .. 6], my $new_head = join("\t", "Validated", "ClinicalSign", @columns[0 .. 6],
"Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "genetag", "process"); "Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "is_oncogene",
"is_tumor_suppressor_gene", "genetag", "process");
if (!($pipeline eq 'somatic' || $pipeline eq 'tmb' || $pipeline eq 'hotspot' || $pipeline eq 'germline')) { if (!($pipeline eq 'somatic' || $pipeline eq 'tmb' || $pipeline eq 'hotspot' || $pipeline eq 'germline')) {
die "useage: pipeline must be 'somatic' or 'germline' or 'hotspot or tmb'"; die "useage: pipeline must be 'somatic' or 'germline' or 'hotspot or tmb'";
@ -50,7 +51,9 @@ my @hhr2 = @$hhr2_ref;
my @promoter = @$promoter_ref; my @promoter = @$promoter_ref;
my %transcript = transcript(); my %transcript = transcript();
my %oncogenic = get_oncogenic(); my ($oncogenic, $is_oncogene) = get_oncogenic();
my %oncogenic = %$oncogenic;
my %is_oncogene = %$is_oncogene;
while (<IN>) { while (<IN>) {
chomp; chomp;
@ -144,7 +147,7 @@ while (<IN>) {
} }
elsif ($line[9] eq '.') { elsif ($line[9] eq '.') {
# splicing 位点 # splicing 位点
if ($line[5] =~ /splicing/) { if (($line[5] =~ /splicing/) or ($pipeline eq 'hotspot')) {
my @hgvs = split(/;/, $line[7]); my @hgvs = split(/;/, $line[7]);
my $hgvs = $hgvs[0]; my $hgvs = $hgvs[0];
my $transcript_gene; my $transcript_gene;
@ -166,6 +169,11 @@ while (<IN>) {
$hgvs =~ s/exon(\d+)/intron$intron;exon$exon/; $hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
$line[9] = join(":", ($gene, $hgvs)); $line[9] = join(":", ($gene, $hgvs));
} }
# 不是前面2种情况hotspot强制转换hgvs
elsif ($pipeline eq 'hotspot') {
print "$hgvs\n";
$line[9] = join(":", ($gene, $hgvs));
}
else { else {
push @reason, 'not_need_spl'; push @reason, 'not_need_spl';
} }
@ -177,6 +185,9 @@ while (<IN>) {
} }
else { else {
if ($line[8] eq 'intron') {
push @reason, 'not_need_spl_inron';
}
my @hgvs = split(/,/, $line[9]); my @hgvs = split(/,/, $line[9]);
my $hgvs = $hgvs[0]; my $hgvs = $hgvs[0];
my $transcript_gene; my $transcript_gene;
@ -214,6 +225,13 @@ while (<IN>) {
if ((grep {$_ eq $gene} @promoter) and ($pipeline eq 'somatic') and ($gene eq 'TERT') if ((grep {$_ eq $gene} @promoter) and ($pipeline eq 'somatic') and ($gene eq 'TERT')
and ($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A')) { and ($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A')) {
@reason = (); @reason = ();
if ($line[1] eq '1295228') {
$line[9] = 'TERT:NM_198253:/:c.-124C>T (C228T)';
}
else {
$line[9] = 'TERT:NM_198253:/:c.-146C>T (C250T)';
}
$line[8] = 'promoter';
} }
if (@reason) { if (@reason) {
@ -221,17 +239,27 @@ while (<IN>) {
next; next;
} }
my ($oncogenic_col, $mut_effect_col); my ($oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene);
my $get_key = "$gene\_$protein"; my $get_key = "$gene\_$protein";
if (exists $oncogenic{lc $get_key}) { if (exists $oncogenic{lc $get_key}) {
my @get_values = split('&&', $oncogenic{lc $get_key}); my @get_values = split('&&', $oncogenic{lc $get_key});
$oncogenic_col = $get_values[0]; $oncogenic_col = $get_values[0];
$mut_effect_col = $get_values[1]; $mut_effect_col = $get_values[1];
} }
else { else {
$oncogenic_col = '.'; $oncogenic_col = '.';
$mut_effect_col = '.'; $mut_effect_col = '.';
} }
if (exists $is_oncogene{lc $gene}) {
my @get_values = split('&&', $is_oncogene{lc $gene});
$is_oncogene_gene = $get_values[0];
$is_tumor_suppressor_gene = $get_values[1];
}
else {
$is_oncogene_gene = '.';
$is_tumor_suppressor_gene = '.';
}
my $clisig; my $clisig;
if ($line[16] =~ /Likely_pathogenic|drug/i) { if ($line[16] =~ /Likely_pathogenic|drug/i) {
@ -266,7 +294,9 @@ while (<IN>) {
$line[6] = $gene; $line[6] = $gene;
my $genetag = join(";", @genetags); my $genetag = join(";", @genetags);
my $new_line = join("\t", $validated, $clisig, @line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], $oncogenic_col, $mut_effect_col, $genetag, $pipeline); my $new_line = join("\t", $validated, $clisig,
@line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102],
$oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene, $genetag, $pipeline);
print OUT "$new_line\n"; print OUT "$new_line\n";
print TAG_OUT "PASS\t", join("\t", @line), "\n"; print TAG_OUT "PASS\t", join("\t", @line), "\n";
@ -339,13 +369,16 @@ sub transcript {
# oncokb snv_indel 临床意义定义 # oncokb snv_indel 临床意义定义
sub get_oncogenic { sub get_oncogenic {
my %sig; my %sig;
my %sig_gene;
open SNV_INDEL, "$database_path/snv_indel_mutation.csv"; open SNV_INDEL, "$database_path/snv_indel_mutation.csv";
<SNV_INDEL>; <SNV_INDEL>;
while (<SNV_INDEL>) { while (<SNV_INDEL>) {
chomp; chomp;
$_ =~ s/\r//g;
my @line = split(","); my @line = split(",");
my $key = join("_", @line[0, 1]); my $key = join("_", @line[0, 1]);
$sig{lc $key} = join("&&", @line[2, 3]); $sig{lc $key} = join("&&", @line[2, 3, 7, 8]);
$sig_gene{lc $line[0]} = join("&&", @line[7, 8]);
} }
return %sig; return (\%sig, \%sig_gene);
} }

View File

@ -263,6 +263,7 @@ class PostProcess:
filter_sum_df = filter_sum_df[cols] filter_sum_df = filter_sum_df[cols]
filter_sum_df = filter_sum_df.fillna('.') filter_sum_df = filter_sum_df.fillna('.')
filter_sum_df = filter_sum_df.sort_values(by='AMP_mut_level')
filter_sum_res = filter_sum_df.to_dict('records') filter_sum_res = filter_sum_df.to_dict('records')
self.sheet['target_mut'] = filter_sum_res self.sheet['target_mut'] = filter_sum_res

File diff suppressed because one or more lines are too long