处理过滤

master
chaopower 2023-09-27 17:57:28 +08:00
parent 188e7dd8a4
commit c55811e306
11 changed files with 856 additions and 312 deletions

View File

@ -0,0 +1,206 @@
#!/usr/bin/env perl
use strict;
#use warnings;
use List::Util qw(sum);
die "useage:perl $0 input project sample_type somtic_out germline_out tag_out " unless @ARGV == 6;
my ($input, $project, $sample_type, $somtic_out, $germline_out, $tag_out) = @ARGV;
# die "useage:perl $0 output_dir tumor project sample_type" unless @ARGV == 4;
# my ($output_dir, $name, $project, $sample_type) = @ARGV;
# open IN, "$output_dir/mutation/${name}.snp.indel.anno.hg19_multianno.txt";
open IN, "$input";
my $head = <IN>;
# if ($sample_type eq 'c') {
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt";
# }
# elsif ($sample_type eq 't') {
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
# }
open OUT, "> $somtic_out";
print OUT "可信\t$head";
# open OUT2, ">${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
open OUT2, ">$germline_out";
print OUT2 "临床意义\t$head";
# open OUT3, ">${name}.snp.indel.anno.hg19_multianno_tag.txt";
open OUT3, ">$tag_out";
print OUT3 "TAG\t$head";
##black list
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
open BKLT, "$public_path/blacklist.txt";
my %bk;
<BKLS>;
while (<BKLT>) {
chomp;
my @line = split("\t");
my $key = join("_", @line[0 .. 4]);
$bk{$key} = 1;
}
sub blacklist {
my $pos = shift @_;
if (exists $bk{$pos}) {
return "1";
}
else {
return "";
}
}
open INFO, "$public_path/info.txt";
my @muts;
while (<INFO>) {
chomp;
my @line = split(/\t/, $_);
if ($line[0] eq $project) {
if ($line[2] ne "NA") {
@muts = split(/\//, $line[2]);
}
}
}
while (<IN>) {
chomp;
my @line = split(/\t/, $_);
my $freq = (split(":", $line[-1]))[4];
# next if $line[9] eq '.';
if ($line[8] ne "synonymous SNV" and $line[8] ne "unknown"){
if ( $line[17] < 0.01 and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01){
if ($line[16] =~ /benign/i and $line[16] !~ /pathogenic|Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i) {
print OUT3 "benign\t", join("\t", @line), "\n";
next;
}
if ($sample_type eq 'c') {
if ($line[11] =~ /OCCURENCE=(\S+)/) {
my $cosmic = $1;
$cosmic =~ s/\(\S+?\)//g;
my @cosmic = split(",", $cosmic);
$cosmic = sum @cosmic;
if ($freq < 0.01 and $cosmic <= 1) {
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
next;
}
}
if ($freq < 0.01 and $line[11] eq '.') {
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
next;
}
}
#blacklist
my $key = join("_", @line[0 .. 4]);
if (&blacklist($key)) {
print OUT3 "blacklist\t", join("\t", @line), "\n";
next;
};
if ($line[9] ne '.') {
my @hgvs = split(/,/, $line[9]);
my $hgvs = $hgvs[0];
$hgvs =~ /(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/;
my $gene = $1;
if (!(@muts and grep {$gene eq $_} @muts)) {
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
next;
}
if ($line[101] ne 'PASS') {
my $filter = split(";", $line[101]);
if ($freq < 0.02 or ($freq >= 0.02 and $freq < 0.05 and $filter >= 2)) {
print OUT3 "byfilter\t", join("\t", @line), "\n";
next;
};
}
if (my $transcript = &transcript($gene)) {
if (grep {/$transcript/} @hgvs) {
$hgvs = (grep {/$transcript/} @hgvs)[0];
}
}
$line[9] = $hgvs;
print OUT "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
print OUT3 "PASS\t", join("\t", @line), "\n";
if ($freq > 0.1) {
if ($line[16] =~ /Likely_pathogenic|drug/i) {
print OUT2 "2\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
}
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
print OUT2 "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
}
else {
print OUT2 "3\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
}
}
}
elsif ($line[5] =~ /splicing/) {
next if $line[101] ne 'PASS';
my $gene = (split(";", $line[6]))[0];
if (!(@muts and grep {$gene eq $_} @muts)) {
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
next;
};
my @hgvs = split(/;/, $line[7]);
my $hgvs = $hgvs[0];
if (my $transcript = &transcript($gene)) {
if (grep {/$transcript/} @hgvs) {
$hgvs = (grep {/$transcript/} @hgvs)[0];
}
}
$hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/;
my $spl = $3;
if ($spl =~ /\d+[\+|\-][1|2]\D+/) {
$line[7] = join(":", ($gene, $hgvs));
print OUT3 "PASS\t", join("\t", @line), "\n";
print OUT "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
if ($freq > 0.1) {
if ($line[16] =~ /Likely_pathogenic|drug/i) {
print OUT2 "2\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
}
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
print OUT2 "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
}
else {
print OUT2 "3\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
}
}
}
else {
print OUT3 "unknow1\t", join("\t", @line), "\n";
}
}
else {
print OUT3 "unknow2\t", join("\t", @line), "\n";
}
}
else{
print OUT3 "common_snp\t", join("\t", @line), "\n";
}
}
else {
print OUT3 "synonymous\t", join("\t", @line), "\n";
}
}
sub transcript {
my $gene = shift @_;
my $data_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase/";
open TR, "$data_path/oncokbgene.txt";
my %oncogene;
while (<TR>) {
chomp;
my @line = split;
$oncogene{$line[0]} = $line[2];
}
if (exists $oncogene{$gene}) {
$oncogene{$gene} =~ s/\.\d+//;
return $oncogene{$gene};
}
else {
print "$gene has no NM id in oncokbgene.txt";
return "";
}
}

View File

@ -0,0 +1,45 @@
#!/usr/bin/env perl
use strict;
use warnings;
die "usage:perl $0 vcf1r vcf2r outvcf" unless @ARGV == 3;
my ($vcf1r, $vcf2r, $outvcf) = @ARGV;
# open R1, "${outputdir}/mutation/${tumor}.1r.snp.indel.vcf";
# open R2, "${outputdir}/mutation/${tumor}.2r.snp.indel.vcf";
# open OUT, ">${outputdir}/mutation/${tumor}.snp.indel.vcf";
open R1, "$vcf1r";
open R2, "$vcf2r";
open OUT, ">$outvcf";
my (%r1, %r2);
while (<R2>) {
next if /^#/;
chomp;
my @line = split;
my $key = join("_", @line[0, 1, 3, 4]);
$r2{$key} = $_;
}
while (<R1>) {
if (/^#/) {
print OUT;
next;
}
chomp;
my @line = split;
my $freq = (split(":", $line[-1]))[4];
my $dp = (split(":", $line[-1]))[1];
next if $dp < 50;
if ($freq < 0.01) {
my $key = join("_", @line[0, 1, 3, 4]);
if (exists $r2{$key}) {
print OUT "$_\n";
}
}
else {
print OUT "$_\n";
}
}

View File

@ -0,0 +1,115 @@
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import sys
import pandas as pd
import pysam
if len(sys.argv) != 4:
print(" ".join(['usage:python', sys.argv[0], 'filter_file', 'bam_file', 'output']))
sys.exit()
# output_dir = sys.argv[1]
# tumor = sys.argv[2]
# infile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt'])
# bamfile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam'])
# outfile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'])
infile = sys.argv[1]
bamfile = sys.argv[2]
outfile = sys.argv[3]
samfile = pysam.AlignmentFile(bamfile)
OUT = open(outfile, 'w')
def correct(chr, start, end, alt_base):
total_reads = []
alt_reads = []
for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0, min_mapping_quality=20,
max_depth=100000, ignore_overlaps=False, truncate=True, ignore_orphans=False):
for pileupread in pileupcolumn.pileups:
# print(str(pileupread))
if not pileupread.is_del and not pileupread.is_refskip:
if pileupread.alignment.query_sequence[pileupread.query_position] == alt_base:
alt_reads.append(pileupread.alignment.query_name)
else:
if pileupread.alignment.get_tag('NM') < 4 and pileupread.alignment.query_qualities[
pileupread.query_position] >= 20:
total_reads.append(pileupread.alignment.query_name)
alt_reads = list(set(alt_reads))
non_alt_depth = len(list(set(total_reads)))
dic = {'chr': [],
'pos': [],
'read': [],
'base': [],
'quality': [],
'NM': [],
'FR_RR': []
}
for read in alt_reads:
for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0,
min_mapping_quality=20, max_depth=100000, ignore_overlaps=False,
truncate=True, ignore_orphans=False):
for pileupread in pileupcolumn.pileups:
if not pileupread.is_del and not pileupread.is_refskip and pileupread.alignment.query_name == read:
dic['chr'].append(pileupcolumn.reference_name)
dic['pos'].append(pileupcolumn.reference_pos + 1)
dic['read'].append(pileupread.alignment.query_name)
dic['base'].append(pileupread.alignment.query_sequence[pileupread.query_position])
dic['quality'].append(pileupread.alignment.query_qualities[pileupread.query_position])
dic['NM'].append(pileupread.alignment.get_tag('NM'))
fr = 0
rr = 0
if pileupread.alignment.has_tag('FR'):
fr = pileupread.alignment.get_tag('FR')
if pileupread.alignment.has_tag('RR'):
fr = pileupread.alignment.get_tag('RR')
dic['FR_RR'].append(fr + rr)
dic = pd.DataFrame(dic)
b = alt_reads[:]
for R in b:
if len(list(dic[dic['read'] == R]['base'])) == 2:
if list(dic[dic['read'] == R]['base'])[0] != list(dic[dic['read'] == R]['base'])[1]:
print(dic[dic['read'] == R]['base'])
print(dic[dic['read'] == R]['quality'])
alt_reads.remove(R)
else:
if (list(dic[dic['read'] == R]['quality'])[0] >= 20 and list(dic[dic['read'] == R]['NM'])[0] < 4) or (
list(dic[dic['read'] == R]['quality'])[1] >= 20 and list(dic[dic['read'] == R]['NM'])[1] < 4):
pass
else:
alt_reads.remove(R)
else:
if list(dic[dic['read'] == R]['quality'])[0] < 20 or list(dic[dic['read'] == R]['NM'])[0] >= 4:
alt_reads.remove(R)
alt_reads_num = len(alt_reads)
total_depth = non_alt_depth + alt_reads_num
correct_num = 0
for index, row in dic.iterrows():
if row['read'] in alt_reads:
if row['quality'] >= 20 and row['NM'] < 4 and row['FR_RR'] > 1:
correct_num += 1
# if alt_reads_num > 3 and correct_num > 0:
# return(alt_reads_num)
# else:
# return(0)
return (alt_reads_num, correct_num, total_depth)
snv = pd.read_table(infile, sep="\t")
cols = [index for index, row in snv[snv['可信'] == 0].iterrows()]
snv.drop(cols, inplace=True)
drop_index = []
for index, row in snv.iterrows():
if len(row['Ref']) == 1 and len(row['Alt']) == 1:
if float(row['Otherinfo13'].split(':')[4]) < 0.05:
c = correct(row['Chr'], row['Start'] - 1, row['End'], row['Alt'])
if float(c[0]) < 3 or float(c[1]) < 1 or float(c[0] / (c[0] + c[2])) < 0.002:
drop_index.append(index)
snv.drop(labels=drop_index, inplace=True)
# OUT.write(snv)
snv.to_csv(outfile, index=False, sep="\t")

View File

@ -0,0 +1,46 @@
#!/usr/bin/env perl
use strict;
use warnings;
die "usage:perl $0 tumor normal output" unless @ARGV == 3;
my ($tumor, $normal, $output) = @ARGV;
open T, "$tumor";
open N, "$normal";
open OUT, "> $output";
my %n;
while (<N>) {
next if /^#/;
chomp;
my @line = split;
my $freq = (split(":", $line[-1]))[4];
my $key = join '_', @line[0, 1, 3, 4];
$n{$key} = $freq;
}
while (<T>) {
if (/^#/) {
print OUT;
next;
}
chomp;
my @line = split;
my $freq = (split(":", $line[-1]))[4];
my $key = join '_', @line[0, 1, 3, 4];
if (not exists $n{$key}) {
print OUT "$_\n";
next;
}
else {
# 去除对照样本中频率大于0.05的
if ($n{$key} >= 0.05) {
next;
}
else {
# 频率是对照样本的五倍?
if ($freq / $n{$key} > 5) {
print OUT "$_\n";
}
}
}
}

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import sys
import pysam
# 提取fr+rr>=2的点
if len(sys.argv) != 3:
print(" ".join(['usage:python', sys.argv[0], 'outputDir', 'tumor']))
sys.exit()
# output_dir = sys.argv[1]
# tumor = sys.argv[2]
# infile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam'])
# outfile = "".join([output_dir, '/alignment/', tumor, '.2r.rmdup.bam'])
samfile = pysam.AlignmentFile(sys.argv[1], "rb")
bamfile = pysam.AlignmentFile(sys.argv[2], "wb", template=samfile)
for s in samfile:
fr = 0
rr = 0
if s.has_tag('FR'):
fr = s.get_tag('FR')
if s.has_tag('RR'):
rr = s.get_tag('RR')
if fr + rr >= 2:
bamfile.write(s)
# os.system('samtools index ' + outfile)

View File

@ -0,0 +1,21 @@
Chr Start End Ref Alt Gene AAchange
chr6 29911227 29911228 GA TG HLA-A p.E176W
chr6 29912029 29912030 GG C HLA-A p.Q250Hfs*47
chr6 29912383 29912383 G A HLA-A p.R334R
chr6 31237764 31237774 TCATAGCGGTG CCACAACAGCC HLA-C p.T329_M332delinsAVVV
chr6 31237994 31237994 C - HLA-C p.S297Afs*25
chr6 31238865 31238865 T - HLA-C p.T202Rfs*12
chr6 31238884 31238884 G T HLA-C p.Y195X
chr6 31238909 31238910 GT AG HLA-C p.T187L
chr6 31239431 31239431 C - HLA-C p.A97Lfs*4
chr6 31239490 31239490 C - HLA-C p.E77Sfs*24
chr6 31324003 31324004 TC GT HLA-B p.E187T
chr6 31324525 31324536 CCTGGGCCTTGT TGTTGGTCTTGG HLA-B p.Y91_A95delinsSKTNT
chr6 31324734 31324861 CCTGGGGGTGAGGAGGGGCTGAGACCCGCCCGACCCTCCTCCCGGCGCGGCTCCTCAGGTCCTGCGCCCCCGCCTGCGGTCCCCTCGCTCCTCCCGGCAGAGGCCATTTCCCTCCCGACCCGCACTCA - HLA-B p.G25Afs*5
chr6 29910581 29910583 CGC AGT HLA-A p.R41S
chr6 31237862 31237862 T A HLA-C p.E299V
chr6 31324525 31324549 CCTGGGCCTTGTAGATCTGTGTGTT TCTGGCGCTTGTACTTCTGTGTCTC HLA-B p.N87_A95delinsETQKYKRQT
chr6 31238909 31238910 GT TC HLA-C p.T187E
chr3 10088407 10088410 AGTA - FANCD2 p.V427Ffs*20
chr12 57112003 57112003 A G NACA p.M1104T
chr12 57112022 57112022 A G NACA p.S1098P

View File

@ -0,0 +1,6 @@
project probe mutation splicing promoter cnv fusion long_indel chemotherapy_drug non_target_mutation
160gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MET/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MDM2/MDM4/CDKN2A ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA
650gene /dataseq/jmdna/database/bed/650.bed ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MET/MPL/MTOR/MYCN/MYD88/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MYCN/MDM2/MDM4/CDKN2A/CDKN2B ALK/BRAF/FGFR1/FGFR2/FGFR3/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA
lung85gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/DDR2/EGFR/ERBB2/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/HRAS/IDH1/KIT/KRAS/MET/MTOR/MYC/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RET/ROS1/TP53/TSC1/TSC2/VHL/STK11/APC/ARID1A/CTNNB1/EPCAM/ERBB3/MAP2K1/MLH1/MSH2/MSH6/PMS2/RB1/SMAD4/SMO MET NA ALK/CCND1/CCND2/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/MET/MYC/SMO ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3
crc88gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ARID1A/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND2/CCND3/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/EGFR/EPCAM/ERBB2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/IDH1/KDR/KIT/KRAS/MAP2K1/MET/MLH1/MSH2/MSH6/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PMS2/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/TP53/TSC1/TSC2/VHL/POLD1/POLE/STK11/MUTYH/SDHA/SDHB/SDHC/SDHD MET NA ALK/CCND2/CCND3/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/PTEN/RB1 ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 NA ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3 NA
17gene /dataseq/jmdna/database/bed/lung17gene.hg19.liftover.bed EGFR/BRAF/ERBB2/ALK/MET/AKT1/KRAS/MAP2K1/NRAS/PTEN/PIK3CA/NTRK1/NTRK2/NTRK3 MET NA ERBB2/MET/EGFR ALK/RET/ROS1/NTRK1/NTRK2/NTRK3 BCL2L11 NA lung

View File

@ -0,0 +1,33 @@
import argparse
import os
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="JM to run pipeline")
parser.add_argument('-n', '--barcode', help="sample's barcode", required=True)
parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?')
parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False)
parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True)
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./')
parser.add_argument('-p', '--project', help="project", required=True)
parser.add_argument('-b', '--bed', help="bed", required=True)
parser.add_argument('-w', '--wdl', help="wdl")
args = parser.parse_args()
res_path = os.path.realpath(os.path.join(args.output_dir, args.barcode))
if not os.path.exists(res_path):
os.mkdir(res_path)
cmd = f'nohup python ' \
f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \
f'{"-u " if args.umi else ""} -i {args.input_dir} ' \
f'-o {res_path} -b {args.bed} -p {args.project} -w {args.wdl} ' \
f'> {res_path}/{args.barcode}_run.log ' \
f'2>> {res_path}/{args.barcode}_run.log &'
with open(os.path.join(res_path, 'exec'), 'w') as execfile:
execfile.write(cmd + '\n')
os.system(cmd)

70
script/run_wdl.py 100755
View File

@ -0,0 +1,70 @@
import argparse
import json
import os
import subprocess
import time
def run(barcode, normal, umi, input_dir, output_dir, project, bed, wdl):
input_dir = os.path.realpath(input_dir)
output_dir = os.path.realpath(output_dir)
wdl = os.path.realpath(wdl)
arg = {
"pipeline.tumor": barcode,
"pipeline.normal": normal,
"pipeline.umi": umi,
"pipeline.input_dir": input_dir,
"pipeline.output_dir": output_dir,
"pipeline.project": project,
"pipeline.bed": bed
}
arg = {key: value for key, value in arg.items() if value not in (None, '', False)}
# generate json
jsfile_path = os.path.join(output_dir, f'{barcode}.json')
with open(jsfile_path, 'w') as jsfile:
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
# run pipeline
cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public/'
cmd3 = f'cd {output_dir}'
cmd4 = f'/home/zhangchao/soft/jdk-17.0.7+7/bin/java -jar /home/zhangchao/soft/cromwell-85.jar run --inputs {jsfile_path} {wdl}'
cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
# 记录开始时间
start_time = time.time()
print(cmd)
ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8")
pidnum = ret.pid
with open(os.path.join(output_dir, 'pid'), 'w') as pidfile:
pidfile.write(str(pidnum))
ret.wait()
# 记录结束时间
end_time = time.time()
# 计算运行时间
elapsed_time = end_time - start_time
print("\n运行时间:{:.2f}".format(elapsed_time))
print(ret.stdout.read(), ret.stderr.read())
print('#' * 50)
print('读取日志')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="JM to run pipeline")
parser.add_argument('-n', '--barcode', help="sample's barcode", required=True)
parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?')
parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False)
parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True)
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./')
parser.add_argument('-p', '--project', help="project", required=True)
parser.add_argument('-b', '--bed', help="bed", required=True)
parser.add_argument('-w', '--wdl', help="wdl")
args = parser.parse_args()
run(args.barcode, args.normal, args.umi, args.input_dir, args.output_dir, args.project, args.bed, args.wdl)

View File

@ -10,7 +10,6 @@ task mutation_calling_umi {
mkdir ${output_dir}/mutation mkdir ${output_dir}/mutation
fi fi
#1条call #1条call
# 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多 # 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多
# -r 3 是指有3条这样样的reads支撑 # -r 3 是指有3条这样样的reads支撑
@ -27,7 +26,7 @@ task mutation_calling_umi {
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列 #提取>=2条矫正的序列
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads # 保证 1r call mut umi family 里面有2条reads
@ -38,25 +37,15 @@ task mutation_calling_umi {
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。 #merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \ filter_snpindel_umi_1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \ ${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \ ${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>> >>>
output { output {
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf" String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
} }
} }
@ -86,16 +75,6 @@ task mutation_calling_tissue {
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>> >>>
output { output {
@ -124,15 +103,73 @@ task mutation_calling_tissue_control {
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \ -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \ >>>
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \ output {
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \ String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ }
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ }
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \ task mutation_calling_umi_control {
--outfile ${output_dir}/mutation/${name}.snp.indel.anno String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# 对照样本
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${normal_rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}_normal.snp.indel.vcf
# 实验样本
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${tumor_rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
filter_snpindel_umi_1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf
# 去除normal 中的突变位点
filter_snpindel_umi_subnormal.pl \
${output_dir}/mutation/${name}_tumor.snp.indel.vcf \
${output_dir}/mutation/${name}_normal.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf
>>> >>>
@ -152,64 +189,59 @@ workflow call_mutation {
String ref String ref
String bed String bed
scatter(name in [tumor, normal]) { # 双样本
if (defined(name)) { if (defined(normal)) {
if (name==tumor) { if (umi) {
if (umi) { call mutation_calling_umi_control {
call mutation_calling_umi as tumor_mutation_calling_umi { input:
input: name=tumor,
name=name, output_dir=output_dir,
output_dir=output_dir, ref=ref,
ref=ref, bed=bed,
bed=bed, tumor_rmdup_bam=tumor_rmdup_bam,
rmdup_bam=tumor_rmdup_bam normal_rmdup_bam=normal_rmdup_bam
}
}
if (!umi) {
# 单样本模式normal没有定义
if (name==select_first([normal, tumor])) {
call mutation_calling_tissue as tumor_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
# 双样本模式normal有定义
if (name!=select_first([normal, tumor])) {
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}
}
}
if (name==select_first([normal, 'None'])) {
if (umi) {
call mutation_calling_tissue as normal_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
} }
}
if (!umi) {
call mutation_calling_tissue_control {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}
}
# 单样本
if (!defined(normal)) {
if (umi) {
call mutation_calling_umi {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
}
if (!umi) {
call mutation_calling_tissue {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
} }
} }
output { output {
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf" String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
} }
} }

View File

@ -6,130 +6,92 @@ task create_dir {
if [ ! -d ${workdir} ];then if [ ! -d ${workdir} ];then
mkdir -p ${workdir}/log mkdir -p ${workdir}/log
fi fi
>>> >>>
} }
task mutation_calling {
String name
String tumor_rmdupBam
String normal_rmdupBam
String outputDir
String bed
command <<<
if [ ! -d ${outputDir}/mutation ];then
mkdir ${outputDir}/mutation
fi
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G /dataseq/jmdna/database/genome/hg19/hg19.fa \
-f 0.01 \
-N ${name} \
-b "${tumor_rmdupBam}|${normal_rmdupBam}" \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \
/dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \
/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
> ${outputDir}/mutation/${name}_vardict.snp.indel.vcf
vep \
--input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \
--output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \
--format vcf \
--vcf \
--symbol \
--terms SO \
--hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \
--offline --cache --dir_cache /home/software/.vep \
--pick \
--force_overwrite
>>>
output {
String somatic_hc_vcf = "${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf"
String germline_vcf="${outputDir}/mutation/${name}.snp.indel.Germline.vcf"
String loh_hc_vcf="${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf"
}
}
task annovar { task annovar {
String name String prefix
String outputDir String output_dir
String ref String ref
String somatic_hc_vcf String vcf
String germline_vcf
String loh_hc_vcf
String rmdupBam
command <<< command <<<
if [ ! -d ${outputDir}/mutation ];then if [ ! -d ${output_dir}/mutation ];then
mkdir ${outputDir}/mutation mkdir ${output_dir}/mutation
fi fi
table_annovar.pl \ table_annovar.pl \
${somatic_hc_vcf} \ ${vcf} \
/dataseq/jmdna/software/annovar/humandb/ \ /dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \ --intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \ -operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.Somatic.anno --outfile ${output_dir}/mutation/${prefix}
table_annovar.pl \
${germline_vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.Germline.anno
table_annovar.pl \
${loh_hc_vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${name}.snp.indel.LOH.anno
java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \
-R ${ref} \
-I ${rmdupBam} \
-V ${somatic_hc_vcf} \
-o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
--annotation TandemRepeatAnnotator
grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
|cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \
> ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt
>>> >>>
output { output {
String somatic_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt" String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt"
String germline_anno = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno.txt" }
String somatic_all_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt" }
task dealwithsnvindel {
String name
String anno
String project
String output_dir
String umi
String tumor_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
if ${umi} ;then
filter_snpindel_common.pl \
${anno} \
${project} \
c \
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \
${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \
${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt
filter_snpindel_umi_correct_f1r1.py \
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \
${tumor_rmdup_bam} \
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt
else
filter_snpindel_common.pl \
${anno} \
${project} \
t \
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt \
${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \
${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt
>>>
output {
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
} }
} }
task tmb { task tmb {
String codesDir String codesDir
String name String name
String outputDir String output_dir
String somatic_anno String somatic_anno
command <<< command <<<
perl ${codesDir}/tmb.pl ${outputDir} ${name} perl ${codesDir}/tmb.pl ${output_dir} ${name}
>>> >>>
output { output {
String tmb="${outputDir}/mutation/${name}.tmb.txt" String tmb="${output_dir}/mutation/${name}.tmb.txt"
} }
} }
@ -137,7 +99,7 @@ task fusion {
String name String name
String ref String ref
String codesDir String codesDir
String outputDir String output_dir
String rmdupBam String rmdupBam
String cancer String cancer
String project String project
@ -145,48 +107,48 @@ task fusion {
command <<< command <<<
if [ ! -d ${outputDir}/fusion ];then if [ ! -d ${output_dir}/fusion ];then
mkdir ${outputDir}/fusion mkdir ${output_dir}/fusion
fi fi
# Extract the discordant paired-end alignments. # Extract the discordant paired-end alignments.
samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${name}.discordants.bam samtools view -b -F 1294 ${rmdupBam} > ${output_dir}/fusion/${name}.discordants.bam
# Extract the split-read alignments # Extract the split-read alignments
samtools view -h ${rmdupBam} \ samtools view -h ${rmdupBam} \
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
| samtools view -Sb - \ | samtools view -Sb - \
> ${outputDir}/fusion/${name}.splitters.bam > ${output_dir}/fusion/${name}.splitters.bam
lumpyexpress \ lumpyexpress \
-B ${rmdupBam} \ -B ${rmdupBam} \
-S ${outputDir}/fusion/${name}.splitters.bam \ -S ${output_dir}/fusion/${name}.splitters.bam \
-D ${outputDir}/fusion/${name}.discordants.bam \ -D ${output_dir}/fusion/${name}.discordants.bam \
-o ${outputDir}/fusion/${name}.fusion.vcf -o ${output_dir}/fusion/${name}.fusion.vcf
perl ${codesDir}/fusion.filter.pl ${outputDir}/fusion/${name}.fusion.vcf ${outputDir}/fusion/${name}.fusion.filter.vcf perl ${codesDir}/fusion.filter.pl ${output_dir}/fusion/${name}.fusion.vcf ${output_dir}/fusion/${name}.fusion.filter.vcf
svtyper \ svtyper \
-B ${rmdupBam} \ -B ${rmdupBam} \
-i ${outputDir}/fusion/${name}.fusion.filter.vcf \ -i ${output_dir}/fusion/${name}.fusion.filter.vcf \
-T ${ref} \ -T ${ref} \
-o ${outputDir}/fusion/${name}.fusion.gt.vcf -o ${output_dir}/fusion/${name}.fusion.gt.vcf
table_annovar.pl \ table_annovar.pl \
${outputDir}/fusion/${name}.fusion.gt.vcf \ ${output_dir}/fusion/${name}.fusion.gt.vcf \
/dataseq/jmdna/software/annovar/humandb/ \ /dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \ -protocol refGene \
-operation g \ -operation g \
--outfile ${outputDir}/fusion/${name}.fusion --outfile ${output_dir}/fusion/${name}.fusion
perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name} perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${output_dir} ${name}
perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${output_dir} ${project} ${cancer}
>>> >>>
output { output {
String fusion = "${outputDir}/fusion/${name}.fusion.pos.txt" String fusion = "${output_dir}/fusion/${name}.fusion.pos.txt"
} }
} }
@ -195,7 +157,7 @@ task tumor_content {
String tumor_pileup String tumor_pileup
String normal_pileup String normal_pileup
String ref String ref
String outputDir String output_dir
String codesDir String codesDir
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
@ -206,16 +168,16 @@ task tumor_content {
-F ${ref} \ -F ${ref} \
-n ${normal_pileup} \ -n ${normal_pileup} \
-t ${tumor_pileup} \ -t ${tumor_pileup} \
| gzip > ${outputDir}/qc/target_${name}.200base.seqz.gz | gzip > ${output_dir}/qc/target_${name}.200base.seqz.gz
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz \ sequenza-utils seqz_binning -w 200 -s ${output_dir}/qc/target_${name}.200base.seqz.gz \
| gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz | gzip > ${output_dir}/qc/target_${name}.200base.small.seqz.gz
Rscript ${codesDir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!" Rscript ${codesDir}/sequenza.R ${name} ${output_dir}/qc/target_${name}.200base.small.seqz.gz ${output_dir}/qc/sequenza || echo "sequenza failed!"
>>> >>>
output { output {
String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf" String purity = "${output_dir}/qc/sequenza/${name}_CP_contours.pdf"
} }
} }
@ -228,7 +190,7 @@ task cnvkit {
String normal_rmdupBam String normal_rmdupBam
String ref String ref
String bed String bed
String outputDir String output_dir
String cancer String cancer
String codesDir String codesDir
String project String project
@ -237,8 +199,8 @@ task cnvkit {
command <<< command <<<
if [ ! -d ${outputDir}/cnvkit ];then if [ ! -d ${output_dir}/cnvkit ];then
mkdir ${outputDir}/cnvkit mkdir ${output_dir}/cnvkit
fi fi
cnvkit.py batch \ cnvkit.py batch \
@ -247,149 +209,128 @@ task cnvkit {
--targets ${bed} \ --targets ${bed} \
--fasta ${ref} \ --fasta ${ref} \
--access ${accessBed} \ --access ${accessBed} \
--output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \ --output-reference ${output_dir}/cnvkit/${normal}_reference.cnn \
--annotate ${annotateGene} \ --annotate ${annotateGene} \
--drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit --drop-low-coverage --scatter --output-dir ${output_dir}/cnvkit
cnvkit.py scatter \ cnvkit.py scatter \
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \ ${output_dir}/cnvkit/${tumor}.rmdup.cnr -s ${output_dir}/cnvkit/${tumor}.rmdup.cns \
--y-max 3 --y-min -3 \ --y-max 3 --y-min -3 \
--title ${tumor}.cns \ --title ${tumor}.cns \
-o ${outputDir}/cnvkit/${tumor}.cnv.png -o ${output_dir}/cnvkit/${tumor}.cnv.png
perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn perl ${codesDir}/log2_cn.pl ${output_dir}/cnvkit/${tumor}.rmdup.cns ${output_dir}/cnvkit/${tumor}.rmdup.cns.cn
perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${output_dir} ${project} ${cancer}
>>> >>>
output { output {
String cns = "${outputDir}/cnvkit/${tumor}.rmdup.cns" String cns = "${output_dir}/cnvkit/${tumor}.rmdup.cns"
String png = "${outputDir}/cnvkit/${tumor}.cnv.png" String png = "${output_dir}/cnvkit/${tumor}.cnv.png"
} }
} }
task chemo { task chemo {
String codesDir String codesDir
String outputDir String output_dir
String project String project
String normal String normal
String rmdupBam String rmdupBam
command <<< command <<<
if [ ! -d ${outputDir}/chemo ];then if [ ! -d ${output_dir}/chemo ];then
mkdir ${outputDir}/chemo mkdir ${output_dir}/chemo
fi fi
${codesDir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} ${codesDir}/chemo/chemo_panel.py -p ${project} -o ${output_dir} --n ${normal}
>>> >>>
} }
task msi { task msi {
String bed String bed
String name String name
String outputDir String output_dir
String tumor_rmdupBam String tumor_rmdupBam
String normal_rmdupBam String normal_rmdupBam
command <<< command <<<
if [ ! -d ${outputDir}/msi ];then if [ ! -d ${output_dir}/msi ];then
mkdir ${outputDir}/msi mkdir ${output_dir}/msi
fi fi
msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \ msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \
-n ${normal_rmdupBam} \ -n ${normal_rmdupBam} \
-t ${tumor_rmdupBam} \ -t ${tumor_rmdupBam} \
-e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi -e ${bed} -b 10 -o ${output_dir}/msi/${name}.msi
>>> >>>
output { output {
String target="${outputDir}/MSI/${name}.msi" String target="${output_dir}/MSI/${name}.msi"
} }
} }
task hla { task hla {
String inputDir String inputDir
String outputDir String output_dir
String normal String normal
command <<< command <<<
if [ ! -d ${outputDir}/neoantigen ];then if [ ! -d ${output_dir}/neoantigen ];then
mkdir -p ${outputDir}/neoantigen/HLA mkdir -p ${output_dir}/neoantigen/HLA
fi fi
razers3 -tc 10 -i 95 -m 1 -dr 0 \ razers3 -tc 10 -i 95 -m 1 -dr 0 \
-o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ -o ${output_dir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
${inputDir}/*_${normal}_*1.fq.gz ${inputDir}/*_${normal}_*1.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq samtools bam2fq ${output_dir}/neoantigen/HLA/fished_1.bam > ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_1.bam rm ${output_dir}/neoantigen/HLA/fished_1.bam
razers3 -tc 10 -i 95 -m 1 -dr 0 \ razers3 -tc 10 -i 95 -m 1 -dr 0 \
-o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ -o ${output_dir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
${inputDir}/*_${normal}_*2.fq.gz ${inputDir}/*_${normal}_*2.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq samtools bam2fq ${output_dir}/neoantigen/HLA/fished_2.bam > ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_2.bam rm ${output_dir}/neoantigen/HLA/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \ /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
-i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq \ -i ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq \
--dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/ --dna -v --prefix ${normal} -o ${output_dir}/neoantigen/HLA/
>>> >>>
output { output {
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv" String hla = "${output_dir}/neoantigen/HLA/${normal}_result.tsv"
} }
} }
task neoantigen { task neoantigen {
String codesDir String codesDir
String outputDir String output_dir
String name String name
String normal String normal
String somatic_hc_vcf String somatic_hc_vcf
String hla String hla
command <<< command <<<
sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${outputDir} ${name} ${name} ${codesDir} sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${output_dir} ${name} ${name} ${codesDir}
>>> >>>
output { output {
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt" String neoantigen = "${output_dir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt"
}
}
task dealwithsnvindel {
String codesDir
String name
String somatic_all_anno
String germline_anno
String project
String outputDir
String cancer
command <<<
perl ${codesDir}/pick_variant.pl ${outputDir} ${name}
perl ${codesDir}/pick_mut_splice_promoter.pl ${codesDir} ${name} ${outputDir} ${project}
perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
>>>
output {
String snvindel_filtered= "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt"
String germline_filtered = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
} }
} }
task hereditary { task hereditary {
String codesDir String codesDir
String name String name
String outputDir String output_dir
String project String project
String germline_filtered String germline_filtered
command <<< command <<<
${codesDir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${name} ${codesDir}/hereditary/hereditary.py -p ${project} -o ${output_dir} --n ${name}
>>> >>>
output { output {
String hereditary_pre = "${outputDir}/hereditary/${name}.hereditary.pre.txt" String hereditary_pre = "${output_dir}/hereditary/${name}.hereditary.pre.txt"
} }
} }
@ -398,101 +339,101 @@ task conpair {
String name String name
String tumor_rmdupBam String tumor_rmdupBam
String normal_rmdupBam String normal_rmdupBam
String outputDir String output_dir
String ref String ref
command <<< command <<<
if [ ! -d ${outputDir}/conpair ];then if [ ! -d ${output_dir}/conpair ];then
mkdir -p ${outputDir}/conpair mkdir -p ${output_dir}/conpair
fi fi
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${tumor_rmdupBam} \ -B ${tumor_rmdupBam} \
-O ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -O ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
-R ${ref} \ -R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${normal_rmdupBam} \ -B ${normal_rmdupBam} \
-O ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -O ${output_dir}/conpair/${name}.normal.gatk.mpileup \
-R ${ref} \ -R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.tumor.gatk.mpileup sed -i 's/^chr//g' ${output_dir}/conpair/${name}.tumor.gatk.mpileup
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.normal.gatk.mpileup sed -i 's/^chr//g' ${output_dir}/conpair/${name}.normal.gatk.mpileup
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \ python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
-H \ -H \
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -N ${output_dir}/conpair/${name}.normal.gatk.mpileup \
-O ${outputDir}/conpair/${name}_concordance.txt -O ${output_dir}/conpair/${name}_concordance.txt
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \ python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ -T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ -N ${output_dir}/conpair/${name}.normal.gatk.mpileup \
-O ${outputDir}/conpair/${name}_contamination.txt -O ${output_dir}/conpair/${name}_contamination.txt
>>> >>>
output { output {
String concordance = "${outputDir}/conpair/${name}_concordance.txt" String concordance = "${output_dir}/conpair/${name}_concordance.txt"
String contamination = "${outputDir}/conpair/${name}_contamination.txt" String contamination = "${output_dir}/conpair/${name}_contamination.txt"
} }
} }
task mmr { task mmr {
String codesDir String codesDir
String name String name
String outputDir String output_dir
String germline_filtered String germline_filtered
command <<< command <<<
if [ ! -d ${outputDir}/MMR ];then if [ ! -d ${output_dir}/MMR ];then
mkdir -p ${outputDir}/MMR mkdir -p ${output_dir}/MMR
fi fi
perl ${codesDir}/mmr_controlsample.pl ${outputDir} ${name} perl ${codesDir}/mmr_controlsample.pl ${output_dir} ${name}
>>> >>>
output { output {
String mmr = "${outputDir}/MMR/${name}_mmr.txt" String mmr = "${output_dir}/MMR/${name}_mmr.txt"
} }
} }
task hrr { task hrr {
String codesDir String codesDir
String name String name
String outputDir String output_dir
String germline_filtered String germline_filtered
command <<< command <<<
if [ ! -d ${outputDir}/HRR ];then if [ ! -d ${output_dir}/HRR ];then
mkdir -p ${outputDir}/HRR mkdir -p ${output_dir}/HRR
fi fi
perl ${codesDir}/hrr_controlsample_tissue.pl ${outputDir} ${name} perl ${codesDir}/hrr_controlsample_tissue.pl ${output_dir} ${name}
>>> >>>
output { output {
String hrr = "${outputDir}/HRR/${name}_hrr.txt" String hrr = "${output_dir}/HRR/${name}_hrr.txt"
} }
} }
task hotspot { task hotspot {
String name String name
String outputDir String output_dir
String ref String ref
String rmdupBam String rmdupBam
String codesDir String codesDir
command <<< command <<<
if [ ! -d ${outputDir}/mutation/hotspot/ ];then if [ ! -d ${output_dir}/mutation/hotspot/ ];then
mkdir -p ${outputDir}/mutation/hotspot/ mkdir -p ${output_dir}/mutation/hotspot/
fi fi
samtools mpileup -Bq 20 -Q 20 \ samtools mpileup -Bq 20 -Q 20 \
-f ${ref} \ -f ${ref} \
-l ${codesDir}/hotspot.bed \ -l ${codesDir}/hotspot.bed \
-o ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ -o ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
${rmdupBam} ${rmdupBam}
java -jar $VARSCAN mpileup2cns \ java -jar $VARSCAN mpileup2cns \
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
--min-var-freq 0.005 \ --min-var-freq 0.005 \
--min-avg-qual 20 \ --min-avg-qual 20 \
--output-vcf 1 \ --output-vcf 1 \
@ -500,10 +441,10 @@ task hotspot {
--p-value 0.99 \ --p-value 0.99 \
--min-reads2 2 \ --min-reads2 2 \
--strand-filter 0 \ --strand-filter 0 \
> ${outputDir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf > ${output_dir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf
java -jar $VARSCAN mpileup2cns \ java -jar $VARSCAN mpileup2cns \
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
--min-var-freq 0.01 \ --min-var-freq 0.01 \
--min-avg-qual 20 \ --min-avg-qual 20 \
--output-vcf 1 \ --output-vcf 1 \
@ -511,32 +452,32 @@ task hotspot {
--p-value 0.05 \ --p-value 0.05 \
--min-reads2 3 \ --min-reads2 3 \
--strand-filter 1 \ --strand-filter 1 \
> ${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf > ${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf
perl ${codesDir}/hotspot.hvl.pl ${outputDir} ${name} perl ${codesDir}/hotspot.hvl.pl ${output_dir} ${name}
if [ -e "${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then if [ -e "${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then
table_annovar.pl \ table_annovar.pl \
${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \ ${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \ /dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \ -protocol refGene \
-argument '-hgvs' \ -argument '-hgvs' \
-operation g \ -operation g \
--outfile ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.anno --outfile ${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.anno
perl ${codesDir}/hotspot.filter.pl ${outputDir} ${name} perl ${codesDir}/hotspot.filter.pl ${output_dir} ${name}
fi fi
>>> >>>
output { output {
String hotspot = "${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf" String hotspot = "${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf"
} }
} }
task auto_report { task auto_report {
String cancer String cancer
String codesDir String codesDir
String outputDir String output_dir
String normal String normal
String tumor String tumor
@ -554,28 +495,28 @@ task auto_report {
command <<< command <<<
if [ ! -d ${outputDir}/report ];then if [ ! -d ${output_dir}/report ];then
mkdir -p ${outputDir}/report mkdir -p ${output_dir}/report
fi fi
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${output_dir} ${cancer}
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor} python3 ${codesDir}/drug_dedup.py ${output_dir} ${tumor}
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor} perl ${codesDir}/file_format_change.pl ${output_dir} ${tumor}
python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${output_dir} ${tumor} ${normal} ${cancer}
ln -s ${cnv_cns} ${outputDir}/report/ ln -s ${cnv_cns} ${output_dir}/report/
ln -s ${cnv_png} ${outputDir}/report/ ln -s ${cnv_png} ${output_dir}/report/
ln -s ${fusion_pos} ${outputDir}/report/ ln -s ${fusion_pos} ${output_dir}/report/
ln -s ${snvindel_filtered} ${outputDir}/report/ ln -s ${snvindel_filtered} ${output_dir}/report/
ln -s ${tmb} ${outputDir}/report/ ln -s ${tmb} ${output_dir}/report/
ln -s ${mmr} ${outputDir}/report/ ln -s ${mmr} ${output_dir}/report/
ln -s ${hrr} ${outputDir}/report/ ln -s ${hrr} ${output_dir}/report/
ln -s ${hereditary_pre} ${outputDir}/report/ ln -s ${hereditary_pre} ${output_dir}/report/
>>> >>>
} }