需求处理

master
chaopower 2024-01-29 13:03:17 +08:00
parent 0a60e18e93
commit 6a22cb8463
4 changed files with 66 additions and 18 deletions

View File

@ -104,26 +104,39 @@ class ChemoRun:
alt = record.alts[0]
# gt = '/'.join(list(map(str, sorted(record.samples.get(record.samples.keys()[0]).get('GT')))))
freq = record.samples.get(record.samples.keys()[-1]).get('AF')[0]
depth = record.samples.get(record.samples.keys()[-1]).get('DP')
if freq > 0.9:
gt = '1/1'
elif 0.9 >= freq > 0.1:
gt = '0/1'
else:
gt = '0/0'
fliter = pd.concat([fliter, drug_rsid_data[
match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) &
(drug_rsid_data['ref'] == ref) &
(drug_rsid_data['alt'] == alt) &
(drug_rsid_data['genotype'] == gt)
]])
]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = freq
match_drug_rsid_data['depth'] = depth
fliter = pd.concat([fliter, match_drug_rsid_data])
if fliter.empty:
fliter = pd.concat([fliter, drug_rsid_data[
match_drug_rsid_data = drug_rsid_data[
(drug_rsid_data['chr'] == chrom) &
(drug_rsid_data['end'] == end) &
(drug_rsid_data['genotype'] == '0/0')
]])
]
match_drug_rsid_data = match_drug_rsid_data.reset_index()
match_drug_rsid_data['chr'] = chrom
match_drug_rsid_data['pos'] = end
match_drug_rsid_data['freq'] = '.'
match_drug_rsid_data['depth'] = '.'
fliter = pd.concat([fliter, match_drug_rsid_data])
if fliter.empty:
raise UserWarning(
@ -136,7 +149,8 @@ class ChemoRun:
fliterdata.to_csv(respath, sep='\t', index=False)
# 分类汇总 同位点,药物合并 drug.infos.txt
drugrsid = fliterdata[['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort']]
drugrsid = fliterdata[
['drugname', 'genename', 'rsid', 'result', 'level', 'tips', 'drugsort', 'chr', 'pos', 'freq', 'depth']]
drugrsid = drugrsid.drop_duplicates()
resdrugrsid = drugrsid.groupby(['drugname', 'genename', 'rsid', 'result', 'level', 'drugsort'])['tips'].agg(
','.join).reset_index()

View File

@ -30,7 +30,8 @@ chomp($head);
my @columns = split("\t", $head);
my $new_head = join("\t", "Validated", "ClinicalSign", @columns[0 .. 6],
"Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "genetag", "process");
"Freq", @columns[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], "Oncogenic", "Mutation_Effect", "is_oncogene",
"is_tumor_suppressor_gene", "genetag", "process");
if (!($pipeline eq 'somatic' || $pipeline eq 'tmb' || $pipeline eq 'hotspot' || $pipeline eq 'germline')) {
die "useage: pipeline must be 'somatic' or 'germline' or 'hotspot or tmb'";
@ -50,7 +51,9 @@ my @hhr2 = @$hhr2_ref;
my @promoter = @$promoter_ref;
my %transcript = transcript();
my %oncogenic = get_oncogenic();
my ($oncogenic, $is_oncogene) = get_oncogenic();
my %oncogenic = %$oncogenic;
my %is_oncogene = %$is_oncogene;
while (<IN>) {
chomp;
@ -144,7 +147,7 @@ while (<IN>) {
}
elsif ($line[9] eq '.') {
# splicing 位点
if ($line[5] =~ /splicing/) {
if (($line[5] =~ /splicing/) or ($pipeline eq 'hotspot')) {
my @hgvs = split(/;/, $line[7]);
my $hgvs = $hgvs[0];
my $transcript_gene;
@ -166,6 +169,11 @@ while (<IN>) {
$hgvs =~ s/exon(\d+)/intron$intron;exon$exon/;
$line[9] = join(":", ($gene, $hgvs));
}
# 不是前面2种情况hotspot强制转换hgvs
elsif ($pipeline eq 'hotspot') {
print "$hgvs\n";
$line[9] = join(":", ($gene, $hgvs));
}
else {
push @reason, 'not_need_spl';
}
@ -177,6 +185,9 @@ while (<IN>) {
}
else {
if ($line[8] eq 'intron') {
push @reason, 'not_need_spl_inron';
}
my @hgvs = split(/,/, $line[9]);
my $hgvs = $hgvs[0];
my $transcript_gene;
@ -214,6 +225,13 @@ while (<IN>) {
if ((grep {$_ eq $gene} @promoter) and ($pipeline eq 'somatic') and ($gene eq 'TERT')
and ($line[1] eq '1295228' and $line[4] eq 'A') or ($line[1] eq '1295250' and $line[4] eq 'A')) {
@reason = ();
if ($line[1] eq '1295228') {
$line[9] = 'TERT:NM_198253:/:c.-124C>T (C228T)';
}
else {
$line[9] = 'TERT:NM_198253:/:c.-146C>T (C250T)';
}
$line[8] = 'promoter';
}
if (@reason) {
@ -221,17 +239,27 @@ while (<IN>) {
next;
}
my ($oncogenic_col, $mut_effect_col);
my ($oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene);
my $get_key = "$gene\_$protein";
if (exists $oncogenic{lc $get_key}) {
my @get_values = split('&&', $oncogenic{lc $get_key});
$oncogenic_col = $get_values[0];
$mut_effect_col = $get_values[1];
}
else {
$oncogenic_col = '.';
$mut_effect_col = '.';
}
if (exists $is_oncogene{lc $gene}) {
my @get_values = split('&&', $is_oncogene{lc $gene});
$is_oncogene_gene = $get_values[0];
$is_tumor_suppressor_gene = $get_values[1];
}
else {
$is_oncogene_gene = '.';
$is_tumor_suppressor_gene = '.';
}
my $clisig;
if ($line[16] =~ /Likely_pathogenic|drug/i) {
@ -266,7 +294,9 @@ while (<IN>) {
$line[6] = $gene;
my $genetag = join(";", @genetags);
my $new_line = join("\t", $validated, $clisig, @line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102], $oncogenic_col, $mut_effect_col, $genetag, $pipeline);
my $new_line = join("\t", $validated, $clisig,
@line[0 .. 6], $freq, @line[7 .. 20, 23, 28, 32, 50, 56, 62, 101, 102],
$oncogenic_col, $mut_effect_col, $is_oncogene_gene, $is_tumor_suppressor_gene, $genetag, $pipeline);
print OUT "$new_line\n";
print TAG_OUT "PASS\t", join("\t", @line), "\n";
@ -339,13 +369,16 @@ sub transcript {
# oncokb snv_indel 临床意义定义
sub get_oncogenic {
my %sig;
my %sig_gene;
open SNV_INDEL, "$database_path/snv_indel_mutation.csv";
<SNV_INDEL>;
while (<SNV_INDEL>) {
chomp;
$_ =~ s/\r//g;
my @line = split(",");
my $key = join("_", @line[0, 1]);
$sig{lc $key} = join("&&", @line[2, 3]);
$sig{lc $key} = join("&&", @line[2, 3, 7, 8]);
$sig_gene{lc $line[0]} = join("&&", @line[7, 8]);
}
return %sig;
return (\%sig, \%sig_gene);
}

View File

@ -263,6 +263,7 @@ class PostProcess:
filter_sum_df = filter_sum_df[cols]
filter_sum_df = filter_sum_df.fillna('.')
filter_sum_df = filter_sum_df.sort_values(by='AMP_mut_level')
filter_sum_res = filter_sum_df.to_dict('records')
self.sheet['target_mut'] = filter_sum_res

File diff suppressed because one or more lines are too long