Compare commits
No commits in common. "fb36b973296eee51ed19d60690c58a20abe294ce" and "3f6e7b2dc059dc07f352ac59d1560d0360e4be72" have entirely different histories.
fb36b97329
...
3f6e7b2dc0
|
|
@ -189,12 +189,12 @@ while (<IN>) {
|
||||||
push @reason, 'not_need_spl_inron';
|
push @reason, 'not_need_spl_inron';
|
||||||
}
|
}
|
||||||
my @hgvs = split(/,/, $line[9]);
|
my @hgvs = split(/,/, $line[9]);
|
||||||
# my $hgvs = $hgvs[0];
|
my $hgvs = $hgvs[0];
|
||||||
my $transcript_gene;
|
my $transcript_gene;
|
||||||
$transcript_gene = $transcript{$gene} if (exists $transcript{$gene});
|
$transcript_gene = $transcript{$gene} if (exists $transcript{$gene});
|
||||||
my $hgvs;
|
|
||||||
if (grep {/$transcript_gene/} @hgvs) {
|
if (grep {/$transcript_gene/} @hgvs) {
|
||||||
$hgvs = (grep {/$transcript_gene/} @hgvs)[0];
|
$hgvs = (grep {/$transcript_gene/} @hgvs)[0];
|
||||||
|
}
|
||||||
$line[9] = $hgvs;
|
$line[9] = $hgvs;
|
||||||
|
|
||||||
$hgvs =~ /:(NM_\d+):exon\d+:(c\.\S+):p\.(\S+)$/;
|
$hgvs =~ /:(NM_\d+):exon\d+:(c\.\S+):p\.(\S+)$/;
|
||||||
|
|
@ -202,10 +202,6 @@ while (<IN>) {
|
||||||
if ($protein =~ /\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion') {
|
if ($protein =~ /\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion') {
|
||||||
$protein = 'Truncating Mutations';
|
$protein = 'Truncating Mutations';
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else {
|
|
||||||
push @reason, 'not_correct_hgvs';
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,32 +6,6 @@ import re
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def split_hgvs(hgvs):
|
|
||||||
hgvs_split = hgvs.split(':')
|
|
||||||
if len(hgvs_split) == 4:
|
|
||||||
gene, position, transcript_version, coordinate_type = hgvs_split
|
|
||||||
|
|
||||||
# pattern = r'c\.\d+([\+\-])[12]\D+>\D+'
|
|
||||||
# match = re.search(pattern, coordinate_type)
|
|
||||||
# # if match:
|
|
||||||
# # transcript_version =
|
|
||||||
# # if match.group(1) == '-':X
|
|
||||||
|
|
||||||
variant_version = None
|
|
||||||
elif len(hgvs_split) == 5:
|
|
||||||
gene, position, transcript_version, coordinate_type, variant_version = hgvs_split
|
|
||||||
else:
|
|
||||||
raise ValueError(f'Invalid HGVS format{hgvs}')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'gene': gene,
|
|
||||||
'transcript': position,
|
|
||||||
'exon': transcript_version,
|
|
||||||
'nacid': coordinate_type,
|
|
||||||
'aacid': variant_version
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class HereditaryRun:
|
class HereditaryRun:
|
||||||
|
|
||||||
def __init__(self, database, project, output_dir, name, file):
|
def __init__(self, database, project, output_dir, name, file):
|
||||||
|
|
@ -56,19 +30,21 @@ class HereditaryRun:
|
||||||
|
|
||||||
result_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
|
result_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation'])
|
||||||
for _, rows in data.iterrows():
|
for _, rows in data.iterrows():
|
||||||
# matches = re.match(r"([A-Za-z0-9]+):.*:(p\..*)", rows['AAChange_refGene'])
|
matches = re.match(r"([A-Za-z0-9]+):.*:(p\..*)", rows['AAChange_refGene'])
|
||||||
matches = split_hgvs(rows['AAChange_refGene'])
|
row_df = pd.DataFrame(columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation', 'ClinicalSign'])
|
||||||
gene = matches['gene']
|
gene, mutation = '', ''
|
||||||
aacid = matches['aacid'] if matches['aacid'] else matches['nacid']
|
if matches:
|
||||||
row_df = pd.DataFrame(
|
gene = matches.group(1)
|
||||||
columns=['Gene', 'Syndrome_Cn', 'inheritance', 'genotype', 'mutation', 'ClinicalSign'])
|
mutation = matches.group(2)
|
||||||
|
else:
|
||||||
|
raise UserWarning('HGVS 解析错误!')
|
||||||
|
|
||||||
selected_rows = expanded_database[expanded_database['Gene'].str.split(';').apply(lambda x: gene in x)]
|
selected_rows = expanded_database[expanded_database['Gene'].str.split(';').apply(lambda x: gene in x)]
|
||||||
|
|
||||||
row_df['Syndrome_Cn'] = selected_rows['Syndrome_Cn']
|
row_df['Syndrome_Cn'] = selected_rows['Syndrome_Cn']
|
||||||
row_df['inheritance'] = selected_rows['inheritance']
|
row_df['inheritance'] = selected_rows['inheritance']
|
||||||
row_df['Gene'] = gene
|
row_df['Gene'] = gene
|
||||||
row_df['mutation'] = aacid
|
row_df['mutation'] = mutation
|
||||||
row_df['genotype'] = '纯合' if rows['Freq'] > 0.9 else '杂合'
|
row_df['genotype'] = '纯合' if rows['Freq'] > 0.9 else '杂合'
|
||||||
row_df['ClinicalSign'] = str(rows['ClinicalSign'])
|
row_df['ClinicalSign'] = str(rows['ClinicalSign'])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,13 +46,9 @@ def single_monitor(name, vcf_file, bed_file, freq_range, output_dir):
|
||||||
bed_regions = load_bed_regions(bed_file)
|
bed_regions = load_bed_regions(bed_file)
|
||||||
|
|
||||||
vcf = pysam.VariantFile(vcf_file)
|
vcf = pysam.VariantFile(vcf_file)
|
||||||
|
|
||||||
res_pos = list()
|
res_pos = list()
|
||||||
count_normal = 0
|
count_normal = 0
|
||||||
count_exception = 0
|
count_exception = 0
|
||||||
vcf_out = open(os.path.join(output_dir, f'{name}_cnvkit_tumor.vcf'), 'w')
|
|
||||||
for line in str(vcf.header).strip().split('\n'):
|
|
||||||
vcf_out.write(line + '\n')
|
|
||||||
|
|
||||||
for record in vcf:
|
for record in vcf:
|
||||||
contig = record.contig
|
contig = record.contig
|
||||||
|
|
@ -83,9 +79,7 @@ def single_monitor(name, vcf_file, bed_file, freq_range, output_dir):
|
||||||
freq=freq,
|
freq=freq,
|
||||||
res=res
|
res=res
|
||||||
))
|
))
|
||||||
vcf_out.write(str(record) + '\n')
|
|
||||||
break
|
break
|
||||||
|
|
||||||
count_all = count_exception + count_normal
|
count_all = count_exception + count_normal
|
||||||
if count_all == 0:
|
if count_all == 0:
|
||||||
z_score = 0
|
z_score = 0
|
||||||
|
|
|
||||||
|
|
@ -61,19 +61,6 @@ task run_generate_png {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task run_single_generate_png {
|
|
||||||
String name
|
|
||||||
String probe
|
|
||||||
String cnvkit_tumor_vcf
|
|
||||||
String cnv_cnr
|
|
||||||
String cnv_cns
|
|
||||||
String output_dir
|
|
||||||
|
|
||||||
command {
|
|
||||||
cnvkit.py scatter ${cnv_cnr} -s ${cnv_cns} -v ${cnvkit_tumor_vcf} -o ${output_dir}/pollution/${name}_pollution_cnvkit_tumor.png
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
workflow call_pollution {
|
workflow call_pollution {
|
||||||
|
|
||||||
Boolean run=true
|
Boolean run=true
|
||||||
|
|
@ -127,16 +114,6 @@ workflow call_pollution {
|
||||||
probe=probe,
|
probe=probe,
|
||||||
vcf=initial_vcf
|
vcf=initial_vcf
|
||||||
}
|
}
|
||||||
call run_single_generate_png {
|
|
||||||
input:
|
|
||||||
name=tumor,
|
|
||||||
probe=probe,
|
|
||||||
cnvkit_tumor_vcf=run_pollution_paired.cnvkit_tumor_vcf,
|
|
||||||
cnv_cnr=cnv_cnr,
|
|
||||||
cnv_cns=cnv_cns,
|
|
||||||
output_dir=output_dir
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue