处理过滤
parent
188e7dd8a4
commit
c55811e306
|
|
@ -0,0 +1,206 @@
|
||||||
|
#!/usr/bin/env perl
|
||||||
|
use strict;
|
||||||
|
#use warnings;
|
||||||
|
use List::Util qw(sum);
|
||||||
|
|
||||||
|
die "useage:perl $0 input project sample_type somtic_out germline_out tag_out " unless @ARGV == 6;
|
||||||
|
my ($input, $project, $sample_type, $somtic_out, $germline_out, $tag_out) = @ARGV;
|
||||||
|
|
||||||
|
# die "useage:perl $0 output_dir tumor project sample_type" unless @ARGV == 4;
|
||||||
|
# my ($output_dir, $name, $project, $sample_type) = @ARGV;
|
||||||
|
|
||||||
|
# open IN, "$output_dir/mutation/${name}.snp.indel.anno.hg19_multianno.txt";
|
||||||
|
open IN, "$input";
|
||||||
|
my $head = <IN>;
|
||||||
|
# if ($sample_type eq 'c') {
|
||||||
|
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt";
|
||||||
|
# }
|
||||||
|
# elsif ($sample_type eq 't') {
|
||||||
|
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
|
||||||
|
# }
|
||||||
|
|
||||||
|
open OUT, "> $somtic_out";
|
||||||
|
print OUT "可信\t$head";
|
||||||
|
|
||||||
|
# open OUT2, ">${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
|
||||||
|
open OUT2, ">$germline_out";
|
||||||
|
print OUT2 "临床意义\t$head";
|
||||||
|
|
||||||
|
# open OUT3, ">${name}.snp.indel.anno.hg19_multianno_tag.txt";
|
||||||
|
open OUT3, ">$tag_out";
|
||||||
|
print OUT3 "TAG\t$head";
|
||||||
|
|
||||||
|
##black list
|
||||||
|
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||||||
|
open BKLT, "$public_path/blacklist.txt";
|
||||||
|
my %bk;
|
||||||
|
<BKLS>;
|
||||||
|
while (<BKLT>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split("\t");
|
||||||
|
my $key = join("_", @line[0 .. 4]);
|
||||||
|
$bk{$key} = 1;
|
||||||
|
}
|
||||||
|
sub blacklist {
|
||||||
|
my $pos = shift @_;
|
||||||
|
if (exists $bk{$pos}) {
|
||||||
|
return "1";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
open INFO, "$public_path/info.txt";
|
||||||
|
my @muts;
|
||||||
|
while (<INFO>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split(/\t/, $_);
|
||||||
|
if ($line[0] eq $project) {
|
||||||
|
if ($line[2] ne "NA") {
|
||||||
|
@muts = split(/\//, $line[2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (<IN>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split(/\t/, $_);
|
||||||
|
my $freq = (split(":", $line[-1]))[4];
|
||||||
|
# next if $line[9] eq '.';
|
||||||
|
if ($line[8] ne "synonymous SNV" and $line[8] ne "unknown"){
|
||||||
|
if ( $line[17] < 0.01 and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01){
|
||||||
|
if ($line[16] =~ /benign/i and $line[16] !~ /pathogenic|Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i) {
|
||||||
|
print OUT3 "benign\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
if ($sample_type eq 'c') {
|
||||||
|
if ($line[11] =~ /OCCURENCE=(\S+)/) {
|
||||||
|
my $cosmic = $1;
|
||||||
|
$cosmic =~ s/\(\S+?\)//g;
|
||||||
|
my @cosmic = split(",", $cosmic);
|
||||||
|
$cosmic = sum @cosmic;
|
||||||
|
if ($freq < 0.01 and $cosmic <= 1) {
|
||||||
|
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($freq < 0.01 and $line[11] eq '.') {
|
||||||
|
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#blacklist
|
||||||
|
my $key = join("_", @line[0 .. 4]);
|
||||||
|
if (&blacklist($key)) {
|
||||||
|
print OUT3 "blacklist\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
};
|
||||||
|
if ($line[9] ne '.') {
|
||||||
|
my @hgvs = split(/,/, $line[9]);
|
||||||
|
my $hgvs = $hgvs[0];
|
||||||
|
$hgvs =~ /(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/;
|
||||||
|
my $gene = $1;
|
||||||
|
if (!(@muts and grep {$gene eq $_} @muts)) {
|
||||||
|
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($line[101] ne 'PASS') {
|
||||||
|
my $filter = split(";", $line[101]);
|
||||||
|
if ($freq < 0.02 or ($freq >= 0.02 and $freq < 0.05 and $filter >= 2)) {
|
||||||
|
print OUT3 "byfilter\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (my $transcript = &transcript($gene)) {
|
||||||
|
if (grep {/$transcript/} @hgvs) {
|
||||||
|
$hgvs = (grep {/$transcript/} @hgvs)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$line[9] = $hgvs;
|
||||||
|
print OUT "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
||||||
|
print OUT3 "PASS\t", join("\t", @line), "\n";
|
||||||
|
if ($freq > 0.1) {
|
||||||
|
if ($line[16] =~ /Likely_pathogenic|drug/i) {
|
||||||
|
print OUT2 "2\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
|
||||||
|
print OUT2 "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT2 "3\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ($line[5] =~ /splicing/) {
|
||||||
|
next if $line[101] ne 'PASS';
|
||||||
|
my $gene = (split(";", $line[6]))[0];
|
||||||
|
if (!(@muts and grep {$gene eq $_} @muts)) {
|
||||||
|
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
|
||||||
|
next;
|
||||||
|
};
|
||||||
|
my @hgvs = split(/;/, $line[7]);
|
||||||
|
my $hgvs = $hgvs[0];
|
||||||
|
if (my $transcript = &transcript($gene)) {
|
||||||
|
if (grep {/$transcript/} @hgvs) {
|
||||||
|
$hgvs = (grep {/$transcript/} @hgvs)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/;
|
||||||
|
my $spl = $3;
|
||||||
|
|
||||||
|
if ($spl =~ /\d+[\+|\-][1|2]\D+/) {
|
||||||
|
$line[7] = join(":", ($gene, $hgvs));
|
||||||
|
print OUT3 "PASS\t", join("\t", @line), "\n";
|
||||||
|
print OUT "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
||||||
|
if ($freq > 0.1) {
|
||||||
|
if ($line[16] =~ /Likely_pathogenic|drug/i) {
|
||||||
|
print OUT2 "2\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
|
||||||
|
print OUT2 "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT2 "3\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT3 "unknow1\t", join("\t", @line), "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT3 "unknow2\t", join("\t", @line), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
print OUT3 "common_snp\t", join("\t", @line), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT3 "synonymous\t", join("\t", @line), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sub transcript {
|
||||||
|
my $gene = shift @_;
|
||||||
|
my $data_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase/";
|
||||||
|
open TR, "$data_path/oncokbgene.txt";
|
||||||
|
my %oncogene;
|
||||||
|
while (<TR>) {
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
$oncogene{$line[0]} = $line[2];
|
||||||
|
}
|
||||||
|
if (exists $oncogene{$gene}) {
|
||||||
|
$oncogene{$gene} =~ s/\.\d+//;
|
||||||
|
return $oncogene{$gene};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print "$gene has no NM id in oncokbgene.txt";
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
die "usage:perl $0 vcf1r vcf2r outvcf" unless @ARGV == 3;
|
||||||
|
my ($vcf1r, $vcf2r, $outvcf) = @ARGV;
|
||||||
|
|
||||||
|
# open R1, "${outputdir}/mutation/${tumor}.1r.snp.indel.vcf";
|
||||||
|
# open R2, "${outputdir}/mutation/${tumor}.2r.snp.indel.vcf";
|
||||||
|
# open OUT, ">${outputdir}/mutation/${tumor}.snp.indel.vcf";
|
||||||
|
|
||||||
|
open R1, "$vcf1r";
|
||||||
|
open R2, "$vcf2r";
|
||||||
|
open OUT, ">$outvcf";
|
||||||
|
|
||||||
|
my (%r1, %r2);
|
||||||
|
while (<R2>) {
|
||||||
|
next if /^#/;
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
my $key = join("_", @line[0, 1, 3, 4]);
|
||||||
|
$r2{$key} = $_;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (<R1>) {
|
||||||
|
if (/^#/) {
|
||||||
|
print OUT;
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
my $freq = (split(":", $line[-1]))[4];
|
||||||
|
my $dp = (split(":", $line[-1]))[1];
|
||||||
|
next if $dp < 50;
|
||||||
|
|
||||||
|
if ($freq < 0.01) {
|
||||||
|
my $key = join("_", @line[0, 1, 3, 4]);
|
||||||
|
if (exists $r2{$key}) {
|
||||||
|
print OUT "$_\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print OUT "$_\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,115 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pysam
|
||||||
|
|
||||||
|
if len(sys.argv) != 4:
|
||||||
|
print(" ".join(['usage:python', sys.argv[0], 'filter_file', 'bam_file', 'output']))
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# output_dir = sys.argv[1]
|
||||||
|
# tumor = sys.argv[2]
|
||||||
|
|
||||||
|
# infile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt'])
|
||||||
|
# bamfile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam'])
|
||||||
|
# outfile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt'])
|
||||||
|
|
||||||
|
infile = sys.argv[1]
|
||||||
|
bamfile = sys.argv[2]
|
||||||
|
outfile = sys.argv[3]
|
||||||
|
|
||||||
|
samfile = pysam.AlignmentFile(bamfile)
|
||||||
|
|
||||||
|
OUT = open(outfile, 'w')
|
||||||
|
|
||||||
|
|
||||||
|
def correct(chr, start, end, alt_base):
|
||||||
|
total_reads = []
|
||||||
|
alt_reads = []
|
||||||
|
for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0, min_mapping_quality=20,
|
||||||
|
max_depth=100000, ignore_overlaps=False, truncate=True, ignore_orphans=False):
|
||||||
|
for pileupread in pileupcolumn.pileups:
|
||||||
|
# print(str(pileupread))
|
||||||
|
if not pileupread.is_del and not pileupread.is_refskip:
|
||||||
|
if pileupread.alignment.query_sequence[pileupread.query_position] == alt_base:
|
||||||
|
alt_reads.append(pileupread.alignment.query_name)
|
||||||
|
else:
|
||||||
|
if pileupread.alignment.get_tag('NM') < 4 and pileupread.alignment.query_qualities[
|
||||||
|
pileupread.query_position] >= 20:
|
||||||
|
total_reads.append(pileupread.alignment.query_name)
|
||||||
|
alt_reads = list(set(alt_reads))
|
||||||
|
non_alt_depth = len(list(set(total_reads)))
|
||||||
|
dic = {'chr': [],
|
||||||
|
'pos': [],
|
||||||
|
'read': [],
|
||||||
|
'base': [],
|
||||||
|
'quality': [],
|
||||||
|
'NM': [],
|
||||||
|
'FR_RR': []
|
||||||
|
}
|
||||||
|
for read in alt_reads:
|
||||||
|
for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0,
|
||||||
|
min_mapping_quality=20, max_depth=100000, ignore_overlaps=False,
|
||||||
|
truncate=True, ignore_orphans=False):
|
||||||
|
for pileupread in pileupcolumn.pileups:
|
||||||
|
if not pileupread.is_del and not pileupread.is_refskip and pileupread.alignment.query_name == read:
|
||||||
|
dic['chr'].append(pileupcolumn.reference_name)
|
||||||
|
dic['pos'].append(pileupcolumn.reference_pos + 1)
|
||||||
|
dic['read'].append(pileupread.alignment.query_name)
|
||||||
|
dic['base'].append(pileupread.alignment.query_sequence[pileupread.query_position])
|
||||||
|
dic['quality'].append(pileupread.alignment.query_qualities[pileupread.query_position])
|
||||||
|
dic['NM'].append(pileupread.alignment.get_tag('NM'))
|
||||||
|
fr = 0
|
||||||
|
rr = 0
|
||||||
|
if pileupread.alignment.has_tag('FR'):
|
||||||
|
fr = pileupread.alignment.get_tag('FR')
|
||||||
|
if pileupread.alignment.has_tag('RR'):
|
||||||
|
fr = pileupread.alignment.get_tag('RR')
|
||||||
|
dic['FR_RR'].append(fr + rr)
|
||||||
|
dic = pd.DataFrame(dic)
|
||||||
|
b = alt_reads[:]
|
||||||
|
for R in b:
|
||||||
|
if len(list(dic[dic['read'] == R]['base'])) == 2:
|
||||||
|
if list(dic[dic['read'] == R]['base'])[0] != list(dic[dic['read'] == R]['base'])[1]:
|
||||||
|
print(dic[dic['read'] == R]['base'])
|
||||||
|
print(dic[dic['read'] == R]['quality'])
|
||||||
|
alt_reads.remove(R)
|
||||||
|
else:
|
||||||
|
if (list(dic[dic['read'] == R]['quality'])[0] >= 20 and list(dic[dic['read'] == R]['NM'])[0] < 4) or (
|
||||||
|
list(dic[dic['read'] == R]['quality'])[1] >= 20 and list(dic[dic['read'] == R]['NM'])[1] < 4):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
alt_reads.remove(R)
|
||||||
|
else:
|
||||||
|
if list(dic[dic['read'] == R]['quality'])[0] < 20 or list(dic[dic['read'] == R]['NM'])[0] >= 4:
|
||||||
|
alt_reads.remove(R)
|
||||||
|
alt_reads_num = len(alt_reads)
|
||||||
|
total_depth = non_alt_depth + alt_reads_num
|
||||||
|
correct_num = 0
|
||||||
|
for index, row in dic.iterrows():
|
||||||
|
if row['read'] in alt_reads:
|
||||||
|
if row['quality'] >= 20 and row['NM'] < 4 and row['FR_RR'] > 1:
|
||||||
|
correct_num += 1
|
||||||
|
# if alt_reads_num > 3 and correct_num > 0:
|
||||||
|
# return(alt_reads_num)
|
||||||
|
# else:
|
||||||
|
# return(0)
|
||||||
|
return (alt_reads_num, correct_num, total_depth)
|
||||||
|
|
||||||
|
|
||||||
|
snv = pd.read_table(infile, sep="\t")
|
||||||
|
cols = [index for index, row in snv[snv['可信'] == 0].iterrows()]
|
||||||
|
snv.drop(cols, inplace=True)
|
||||||
|
drop_index = []
|
||||||
|
for index, row in snv.iterrows():
|
||||||
|
if len(row['Ref']) == 1 and len(row['Alt']) == 1:
|
||||||
|
if float(row['Otherinfo13'].split(':')[4]) < 0.05:
|
||||||
|
c = correct(row['Chr'], row['Start'] - 1, row['End'], row['Alt'])
|
||||||
|
if float(c[0]) < 3 or float(c[1]) < 1 or float(c[0] / (c[0] + c[2])) < 0.002:
|
||||||
|
drop_index.append(index)
|
||||||
|
|
||||||
|
snv.drop(labels=drop_index, inplace=True)
|
||||||
|
# OUT.write(snv)
|
||||||
|
snv.to_csv(outfile, index=False, sep="\t")
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/env perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
die "usage:perl $0 tumor normal output" unless @ARGV == 3;
|
||||||
|
my ($tumor, $normal, $output) = @ARGV;
|
||||||
|
open T, "$tumor";
|
||||||
|
open N, "$normal";
|
||||||
|
open OUT, "> $output";
|
||||||
|
|
||||||
|
my %n;
|
||||||
|
while (<N>) {
|
||||||
|
next if /^#/;
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
my $freq = (split(":", $line[-1]))[4];
|
||||||
|
my $key = join '_', @line[0, 1, 3, 4];
|
||||||
|
$n{$key} = $freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (<T>) {
|
||||||
|
if (/^#/) {
|
||||||
|
print OUT;
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
chomp;
|
||||||
|
my @line = split;
|
||||||
|
my $freq = (split(":", $line[-1]))[4];
|
||||||
|
my $key = join '_', @line[0, 1, 3, 4];
|
||||||
|
if (not exists $n{$key}) {
|
||||||
|
print OUT "$_\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# 去除对照样本中频率大于0.05的
|
||||||
|
if ($n{$key} >= 0.05) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# 频率是对照样本的五倍?
|
||||||
|
if ($freq / $n{$key} > 5) {
|
||||||
|
print OUT "$_\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pysam
|
||||||
|
|
||||||
|
# 提取fr+rr>=2的点
|
||||||
|
if len(sys.argv) != 3:
|
||||||
|
print(" ".join(['usage:python', sys.argv[0], 'outputDir', 'tumor']))
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# output_dir = sys.argv[1]
|
||||||
|
# tumor = sys.argv[2]
|
||||||
|
|
||||||
|
# infile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam'])
|
||||||
|
# outfile = "".join([output_dir, '/alignment/', tumor, '.2r.rmdup.bam'])
|
||||||
|
samfile = pysam.AlignmentFile(sys.argv[1], "rb")
|
||||||
|
bamfile = pysam.AlignmentFile(sys.argv[2], "wb", template=samfile)
|
||||||
|
for s in samfile:
|
||||||
|
fr = 0
|
||||||
|
rr = 0
|
||||||
|
if s.has_tag('FR'):
|
||||||
|
fr = s.get_tag('FR')
|
||||||
|
if s.has_tag('RR'):
|
||||||
|
rr = s.get_tag('RR')
|
||||||
|
if fr + rr >= 2:
|
||||||
|
bamfile.write(s)
|
||||||
|
|
||||||
|
# os.system('samtools index ' + outfile)
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
Chr Start End Ref Alt Gene AAchange
|
||||||
|
chr6 29911227 29911228 GA TG HLA-A p.E176W
|
||||||
|
chr6 29912029 29912030 GG C HLA-A p.Q250Hfs*47
|
||||||
|
chr6 29912383 29912383 G A HLA-A p.R334R
|
||||||
|
chr6 31237764 31237774 TCATAGCGGTG CCACAACAGCC HLA-C p.T329_M332delinsAVVV
|
||||||
|
chr6 31237994 31237994 C - HLA-C p.S297Afs*25
|
||||||
|
chr6 31238865 31238865 T - HLA-C p.T202Rfs*12
|
||||||
|
chr6 31238884 31238884 G T HLA-C p.Y195X
|
||||||
|
chr6 31238909 31238910 GT AG HLA-C p.T187L
|
||||||
|
chr6 31239431 31239431 C - HLA-C p.A97Lfs*4
|
||||||
|
chr6 31239490 31239490 C - HLA-C p.E77Sfs*24
|
||||||
|
chr6 31324003 31324004 TC GT HLA-B p.E187T
|
||||||
|
chr6 31324525 31324536 CCTGGGCCTTGT TGTTGGTCTTGG HLA-B p.Y91_A95delinsSKTNT
|
||||||
|
chr6 31324734 31324861 CCTGGGGGTGAGGAGGGGCTGAGACCCGCCCGACCCTCCTCCCGGCGCGGCTCCTCAGGTCCTGCGCCCCCGCCTGCGGTCCCCTCGCTCCTCCCGGCAGAGGCCATTTCCCTCCCGACCCGCACTCA - HLA-B p.G25Afs*5
|
||||||
|
chr6 29910581 29910583 CGC AGT HLA-A p.R41S
|
||||||
|
chr6 31237862 31237862 T A HLA-C p.E299V
|
||||||
|
chr6 31324525 31324549 CCTGGGCCTTGTAGATCTGTGTGTT TCTGGCGCTTGTACTTCTGTGTCTC HLA-B p.N87_A95delinsETQKYKRQT
|
||||||
|
chr6 31238909 31238910 GT TC HLA-C p.T187E
|
||||||
|
chr3 10088407 10088410 AGTA - FANCD2 p.V427Ffs*20
|
||||||
|
chr12 57112003 57112003 A G NACA p.M1104T
|
||||||
|
chr12 57112022 57112022 A G NACA p.S1098P
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
project probe mutation splicing promoter cnv fusion long_indel chemotherapy_drug non_target_mutation
|
||||||
|
160gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MET/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MDM2/MDM4/CDKN2A ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA
|
||||||
|
650gene /dataseq/jmdna/database/bed/650.bed ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MET/MPL/MTOR/MYCN/MYD88/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MYCN/MDM2/MDM4/CDKN2A/CDKN2B ALK/BRAF/FGFR1/FGFR2/FGFR3/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA
|
||||||
|
lung85gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/DDR2/EGFR/ERBB2/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/HRAS/IDH1/KIT/KRAS/MET/MTOR/MYC/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RET/ROS1/TP53/TSC1/TSC2/VHL/STK11/APC/ARID1A/CTNNB1/EPCAM/ERBB3/MAP2K1/MLH1/MSH2/MSH6/PMS2/RB1/SMAD4/SMO MET NA ALK/CCND1/CCND2/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/MET/MYC/SMO ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3
|
||||||
|
crc88gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ARID1A/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND2/CCND3/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/EGFR/EPCAM/ERBB2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/IDH1/KDR/KIT/KRAS/MAP2K1/MET/MLH1/MSH2/MSH6/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PMS2/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/TP53/TSC1/TSC2/VHL/POLD1/POLE/STK11/MUTYH/SDHA/SDHB/SDHC/SDHD MET NA ALK/CCND2/CCND3/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/PTEN/RB1 ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 NA ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3 NA
|
||||||
|
17gene /dataseq/jmdna/database/bed/lung17gene.hg19.liftover.bed EGFR/BRAF/ERBB2/ALK/MET/AKT1/KRAS/MAP2K1/NRAS/PTEN/PIK3CA/NTRK1/NTRK2/NTRK3 MET NA ERBB2/MET/EGFR ALK/RET/ROS1/NTRK1/NTRK2/NTRK3 BCL2L11 NA lung
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description="JM to run pipeline")
|
||||||
|
|
||||||
|
parser.add_argument('-n', '--barcode', help="sample's barcode", required=True)
|
||||||
|
parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?')
|
||||||
|
parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False)
|
||||||
|
parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True)
|
||||||
|
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./')
|
||||||
|
parser.add_argument('-p', '--project', help="project", required=True)
|
||||||
|
parser.add_argument('-b', '--bed', help="bed", required=True)
|
||||||
|
parser.add_argument('-w', '--wdl', help="wdl")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
res_path = os.path.realpath(os.path.join(args.output_dir, args.barcode))
|
||||||
|
|
||||||
|
if not os.path.exists(res_path):
|
||||||
|
os.mkdir(res_path)
|
||||||
|
|
||||||
|
cmd = f'nohup python ' \
|
||||||
|
f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \
|
||||||
|
f'{"-u " if args.umi else ""} -i {args.input_dir} ' \
|
||||||
|
f'-o {res_path} -b {args.bed} -p {args.project} -w {args.wdl} ' \
|
||||||
|
f'> {res_path}/{args.barcode}_run.log ' \
|
||||||
|
f'2>> {res_path}/{args.barcode}_run.log &'
|
||||||
|
with open(os.path.join(res_path, 'exec'), 'w') as execfile:
|
||||||
|
execfile.write(cmd + '\n')
|
||||||
|
os.system(cmd)
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def run(barcode, normal, umi, input_dir, output_dir, project, bed, wdl):
|
||||||
|
input_dir = os.path.realpath(input_dir)
|
||||||
|
output_dir = os.path.realpath(output_dir)
|
||||||
|
wdl = os.path.realpath(wdl)
|
||||||
|
arg = {
|
||||||
|
"pipeline.tumor": barcode,
|
||||||
|
"pipeline.normal": normal,
|
||||||
|
"pipeline.umi": umi,
|
||||||
|
"pipeline.input_dir": input_dir,
|
||||||
|
"pipeline.output_dir": output_dir,
|
||||||
|
"pipeline.project": project,
|
||||||
|
"pipeline.bed": bed
|
||||||
|
}
|
||||||
|
|
||||||
|
arg = {key: value for key, value in arg.items() if value not in (None, '', False)}
|
||||||
|
|
||||||
|
# generate json
|
||||||
|
jsfile_path = os.path.join(output_dir, f'{barcode}.json')
|
||||||
|
with open(jsfile_path, 'w') as jsfile:
|
||||||
|
jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
|
# run pipeline
|
||||||
|
cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH'
|
||||||
|
cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public/'
|
||||||
|
cmd3 = f'cd {output_dir}'
|
||||||
|
cmd4 = f'/home/zhangchao/soft/jdk-17.0.7+7/bin/java -jar /home/zhangchao/soft/cromwell-85.jar run --inputs {jsfile_path} {wdl}'
|
||||||
|
cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}'
|
||||||
|
|
||||||
|
# 记录开始时间
|
||||||
|
start_time = time.time()
|
||||||
|
print(cmd)
|
||||||
|
ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8")
|
||||||
|
pidnum = ret.pid
|
||||||
|
with open(os.path.join(output_dir, 'pid'), 'w') as pidfile:
|
||||||
|
pidfile.write(str(pidnum))
|
||||||
|
ret.wait()
|
||||||
|
# 记录结束时间
|
||||||
|
end_time = time.time()
|
||||||
|
# 计算运行时间
|
||||||
|
elapsed_time = end_time - start_time
|
||||||
|
|
||||||
|
print("\n运行时间:{:.2f} 秒".format(elapsed_time))
|
||||||
|
|
||||||
|
print(ret.stdout.read(), ret.stderr.read())
|
||||||
|
print('#' * 50)
|
||||||
|
print('读取日志')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description="JM to run pipeline")
|
||||||
|
|
||||||
|
parser.add_argument('-n', '--barcode', help="sample's barcode", required=True)
|
||||||
|
parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?')
|
||||||
|
parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False)
|
||||||
|
parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True)
|
||||||
|
parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./')
|
||||||
|
parser.add_argument('-p', '--project', help="project", required=True)
|
||||||
|
parser.add_argument('-b', '--bed', help="bed", required=True)
|
||||||
|
parser.add_argument('-w', '--wdl', help="wdl")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
run(args.barcode, args.normal, args.umi, args.input_dir, args.output_dir, args.project, args.bed, args.wdl)
|
||||||
|
|
@ -10,7 +10,6 @@ task mutation_calling_umi {
|
||||||
mkdir ${output_dir}/mutation
|
mkdir ${output_dir}/mutation
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
#1条call
|
#1条call
|
||||||
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
||||||
# -r 3 是指有3条这样样的reads支撑
|
# -r 3 是指有3条这样样的reads支撑
|
||||||
|
|
@ -27,7 +26,7 @@ task mutation_calling_umi {
|
||||||
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||||||
|
|
||||||
#提取>=2条矫正的序列
|
#提取>=2条矫正的序列
|
||||||
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
|
|
||||||
# 保证 1r call mut umi family 里面有2条reads
|
# 保证 1r call mut umi family 里面有2条reads
|
||||||
|
|
@ -38,25 +37,15 @@ task mutation_calling_umi {
|
||||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||||||
|
|
||||||
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||||||
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
|
filter_snpindel_umi_1r_plus_2r.pl \
|
||||||
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||||||
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||||||
${output_dir}/mutation/${name}.snp.indel.vcf
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
table_annovar.pl \
|
|
||||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
|
||||||
--intronhgvs 50 \
|
|
||||||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
|
||||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,16 +75,6 @@ task mutation_calling_tissue {
|
||||||
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
|
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
table_annovar.pl \
|
|
||||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
|
||||||
-operation g,f,f,f,f,f,f,f,f,f \
|
|
||||||
--intronhgvs 50 \
|
|
||||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
|
|
@ -124,15 +103,73 @@ task mutation_calling_tissue_control {
|
||||||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
||||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
table_annovar.pl \
|
>>>
|
||||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
output {
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
}
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
}
|
||||||
-operation g,f,f,f,f,f,f,f,f,f \
|
|
||||||
--intronhgvs 50 \
|
task mutation_calling_umi_control {
|
||||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
String name
|
||||||
|
String bed
|
||||||
|
String ref
|
||||||
|
String output_dir
|
||||||
|
String tumor_rmdup_bam
|
||||||
|
String normal_rmdup_bam
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
|
mkdir ${output_dir}/mutation
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 对照样本
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
|
-G ${ref} \
|
||||||
|
-f 0.01 \
|
||||||
|
-N ${name} \
|
||||||
|
-b ${normal_rmdup_bam} \
|
||||||
|
-UN \
|
||||||
|
-Q 20 \
|
||||||
|
-m 3 \
|
||||||
|
-r 3 \
|
||||||
|
-th 10 \
|
||||||
|
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}_normal.snp.indel.vcf
|
||||||
|
|
||||||
|
# 实验样本
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
|
-G ${ref} \
|
||||||
|
-f 0.001 \
|
||||||
|
-N ${name} \
|
||||||
|
-b ${tumor_rmdup_bam} \
|
||||||
|
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
||||||
|
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||||||
|
|
||||||
|
#提取>=2条矫正的序列
|
||||||
|
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
|
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
|
|
||||||
|
# 保证 1r call mut umi family 里面有2条reads
|
||||||
|
#2条矫正的call
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||||||
|
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
||||||
|
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||||||
|
|
||||||
|
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||||||
|
filter_snpindel_umi_1r_plus_2r.pl \
|
||||||
|
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
|
# 去除normal 中的突变位点
|
||||||
|
filter_snpindel_umi_subnormal.pl \
|
||||||
|
${output_dir}/mutation/${name}_tumor.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}_normal.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
|
|
@ -152,64 +189,59 @@ workflow call_mutation {
|
||||||
String ref
|
String ref
|
||||||
String bed
|
String bed
|
||||||
|
|
||||||
scatter(name in [tumor, normal]) {
|
# 双样本
|
||||||
if (defined(name)) {
|
if (defined(normal)) {
|
||||||
if (name==tumor) {
|
if (umi) {
|
||||||
if (umi) {
|
call mutation_calling_umi_control {
|
||||||
call mutation_calling_umi as tumor_mutation_calling_umi {
|
input:
|
||||||
input:
|
name=tumor,
|
||||||
name=name,
|
output_dir=output_dir,
|
||||||
output_dir=output_dir,
|
ref=ref,
|
||||||
ref=ref,
|
bed=bed,
|
||||||
bed=bed,
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||||
rmdup_bam=tumor_rmdup_bam
|
normal_rmdup_bam=normal_rmdup_bam
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!umi) {
|
|
||||||
# 单样本模式,normal没有定义
|
|
||||||
if (name==select_first([normal, tumor])) {
|
|
||||||
call mutation_calling_tissue as tumor_mutation_calling_tissue {
|
|
||||||
input:
|
|
||||||
name=name,
|
|
||||||
output_dir=output_dir,
|
|
||||||
ref=ref,
|
|
||||||
bed=bed,
|
|
||||||
rmdup_bam=normal_rmdup_bam
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# 双样本模式,normal有定义
|
|
||||||
if (name!=select_first([normal, tumor])) {
|
|
||||||
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
|
|
||||||
input:
|
|
||||||
name=name,
|
|
||||||
output_dir=output_dir,
|
|
||||||
ref=ref,
|
|
||||||
bed=bed,
|
|
||||||
tumor_rmdup_bam=tumor_rmdup_bam,
|
|
||||||
normal_rmdup_bam=normal_rmdup_bam
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (name==select_first([normal, 'None'])) {
|
|
||||||
if (umi) {
|
|
||||||
call mutation_calling_tissue as normal_mutation_calling_tissue {
|
|
||||||
input:
|
|
||||||
name=name,
|
|
||||||
output_dir=output_dir,
|
|
||||||
ref=ref,
|
|
||||||
bed=bed,
|
|
||||||
rmdup_bam=normal_rmdup_bam
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (!umi) {
|
||||||
|
call mutation_calling_tissue_control {
|
||||||
|
input:
|
||||||
|
name=tumor,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||||
|
normal_rmdup_bam=normal_rmdup_bam
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 单样本
|
||||||
|
if (!defined(normal)) {
|
||||||
|
if (umi) {
|
||||||
|
call mutation_calling_umi {
|
||||||
|
input:
|
||||||
|
name=tumor,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
rmdup_bam=tumor_rmdup_bam
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!umi) {
|
||||||
|
call mutation_calling_tissue {
|
||||||
|
input:
|
||||||
|
name=tumor,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
rmdup_bam=normal_rmdup_bam
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
|
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
|
||||||
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
397
wdl/task.wdl
397
wdl/task.wdl
|
|
@ -6,130 +6,92 @@ task create_dir {
|
||||||
if [ ! -d ${workdir} ];then
|
if [ ! -d ${workdir} ];then
|
||||||
mkdir -p ${workdir}/log
|
mkdir -p ${workdir}/log
|
||||||
fi
|
fi
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
}
|
}
|
||||||
|
|
||||||
task mutation_calling {
|
|
||||||
String name
|
|
||||||
String tumor_rmdupBam
|
|
||||||
String normal_rmdupBam
|
|
||||||
String outputDir
|
|
||||||
String bed
|
|
||||||
|
|
||||||
command <<<
|
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/mutation ];then
|
|
||||||
mkdir ${outputDir}/mutation
|
|
||||||
fi
|
|
||||||
|
|
||||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
||||||
-G /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
|
||||||
-f 0.01 \
|
|
||||||
-N ${name} \
|
|
||||||
-b "${tumor_rmdupBam}|${normal_rmdupBam}" \
|
|
||||||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \
|
|
||||||
/dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \
|
|
||||||
/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
|
|
||||||
> ${outputDir}/mutation/${name}_vardict.snp.indel.vcf
|
|
||||||
|
|
||||||
vep \
|
|
||||||
--input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \
|
|
||||||
--output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \
|
|
||||||
--format vcf \
|
|
||||||
--vcf \
|
|
||||||
--symbol \
|
|
||||||
--terms SO \
|
|
||||||
--hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
|
||||||
--offline --cache --dir_cache /home/software/.vep \
|
|
||||||
--pick \
|
|
||||||
--force_overwrite
|
|
||||||
|
|
||||||
>>>
|
|
||||||
|
|
||||||
output {
|
|
||||||
String somatic_hc_vcf = "${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf"
|
|
||||||
String germline_vcf="${outputDir}/mutation/${name}.snp.indel.Germline.vcf"
|
|
||||||
String loh_hc_vcf="${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf"
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
task annovar {
|
task annovar {
|
||||||
String name
|
String prefix
|
||||||
String outputDir
|
String output_dir
|
||||||
String ref
|
String ref
|
||||||
String somatic_hc_vcf
|
String vcf
|
||||||
String germline_vcf
|
|
||||||
String loh_hc_vcf
|
|
||||||
String rmdupBam
|
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/mutation ];then
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
mkdir ${outputDir}/mutation
|
mkdir ${output_dir}/mutation
|
||||||
fi
|
fi
|
||||||
|
|
||||||
table_annovar.pl \
|
table_annovar.pl \
|
||||||
${somatic_hc_vcf} \
|
${vcf} \
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||||||
--intronhgvs 50 \
|
--intronhgvs 50 \
|
||||||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||||||
--outfile ${outputDir}/mutation/${name}.snp.indel.Somatic.anno
|
--outfile ${output_dir}/mutation/${prefix}
|
||||||
|
|
||||||
table_annovar.pl \
|
|
||||||
${germline_vcf} \
|
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
|
||||||
--intronhgvs 50 \
|
|
||||||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
|
||||||
--outfile ${outputDir}/mutation/${name}.snp.indel.Germline.anno
|
|
||||||
|
|
||||||
table_annovar.pl \
|
|
||||||
${loh_hc_vcf} \
|
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
|
||||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
|
||||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
|
||||||
--intronhgvs 50 \
|
|
||||||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
|
||||||
--outfile ${outputDir}/mutation/${name}.snp.indel.LOH.anno
|
|
||||||
|
|
||||||
java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \
|
|
||||||
-R ${ref} \
|
|
||||||
-I ${rmdupBam} \
|
|
||||||
-V ${somatic_hc_vcf} \
|
|
||||||
-o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
|
|
||||||
--annotation TandemRepeatAnnotator
|
|
||||||
|
|
||||||
grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \
|
|
||||||
|cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \
|
|
||||||
> ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt
|
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String somatic_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt"
|
String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt"
|
||||||
String germline_anno = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno.txt"
|
}
|
||||||
String somatic_all_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt"
|
}
|
||||||
|
|
||||||
|
task dealwithsnvindel {
|
||||||
|
String name
|
||||||
|
String anno
|
||||||
|
String project
|
||||||
|
String output_dir
|
||||||
|
String umi
|
||||||
|
String tumor_rmdup_bam
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
|
||||||
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
|
mkdir ${output_dir}/mutation
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ${umi} ;then
|
||||||
|
filter_snpindel_common.pl \
|
||||||
|
${anno} \
|
||||||
|
${project} \
|
||||||
|
c \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt
|
||||||
|
|
||||||
|
filter_snpindel_umi_correct_f1r1.py \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \
|
||||||
|
${tumor_rmdup_bam} \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt
|
||||||
|
|
||||||
|
else
|
||||||
|
filter_snpindel_common.pl \
|
||||||
|
${anno} \
|
||||||
|
${project} \
|
||||||
|
t \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt
|
||||||
|
|
||||||
|
>>>
|
||||||
|
output {
|
||||||
|
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
|
||||||
|
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task tmb {
|
task tmb {
|
||||||
String codesDir
|
String codesDir
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String somatic_anno
|
String somatic_anno
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
perl ${codesDir}/tmb.pl ${outputDir} ${name}
|
perl ${codesDir}/tmb.pl ${output_dir} ${name}
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String tmb="${outputDir}/mutation/${name}.tmb.txt"
|
String tmb="${output_dir}/mutation/${name}.tmb.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,7 +99,7 @@ task fusion {
|
||||||
String name
|
String name
|
||||||
String ref
|
String ref
|
||||||
String codesDir
|
String codesDir
|
||||||
String outputDir
|
String output_dir
|
||||||
String rmdupBam
|
String rmdupBam
|
||||||
String cancer
|
String cancer
|
||||||
String project
|
String project
|
||||||
|
|
@ -145,48 +107,48 @@ task fusion {
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/fusion ];then
|
if [ ! -d ${output_dir}/fusion ];then
|
||||||
mkdir ${outputDir}/fusion
|
mkdir ${output_dir}/fusion
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Extract the discordant paired-end alignments.
|
# Extract the discordant paired-end alignments.
|
||||||
samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${name}.discordants.bam
|
samtools view -b -F 1294 ${rmdupBam} > ${output_dir}/fusion/${name}.discordants.bam
|
||||||
|
|
||||||
# Extract the split-read alignments
|
# Extract the split-read alignments
|
||||||
samtools view -h ${rmdupBam} \
|
samtools view -h ${rmdupBam} \
|
||||||
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
|
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
|
||||||
| samtools view -Sb - \
|
| samtools view -Sb - \
|
||||||
> ${outputDir}/fusion/${name}.splitters.bam
|
> ${output_dir}/fusion/${name}.splitters.bam
|
||||||
|
|
||||||
lumpyexpress \
|
lumpyexpress \
|
||||||
-B ${rmdupBam} \
|
-B ${rmdupBam} \
|
||||||
-S ${outputDir}/fusion/${name}.splitters.bam \
|
-S ${output_dir}/fusion/${name}.splitters.bam \
|
||||||
-D ${outputDir}/fusion/${name}.discordants.bam \
|
-D ${output_dir}/fusion/${name}.discordants.bam \
|
||||||
-o ${outputDir}/fusion/${name}.fusion.vcf
|
-o ${output_dir}/fusion/${name}.fusion.vcf
|
||||||
|
|
||||||
perl ${codesDir}/fusion.filter.pl ${outputDir}/fusion/${name}.fusion.vcf ${outputDir}/fusion/${name}.fusion.filter.vcf
|
perl ${codesDir}/fusion.filter.pl ${output_dir}/fusion/${name}.fusion.vcf ${output_dir}/fusion/${name}.fusion.filter.vcf
|
||||||
|
|
||||||
svtyper \
|
svtyper \
|
||||||
-B ${rmdupBam} \
|
-B ${rmdupBam} \
|
||||||
-i ${outputDir}/fusion/${name}.fusion.filter.vcf \
|
-i ${output_dir}/fusion/${name}.fusion.filter.vcf \
|
||||||
-T ${ref} \
|
-T ${ref} \
|
||||||
-o ${outputDir}/fusion/${name}.fusion.gt.vcf
|
-o ${output_dir}/fusion/${name}.fusion.gt.vcf
|
||||||
|
|
||||||
table_annovar.pl \
|
table_annovar.pl \
|
||||||
${outputDir}/fusion/${name}.fusion.gt.vcf \
|
${output_dir}/fusion/${name}.fusion.gt.vcf \
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
-protocol refGene \
|
-protocol refGene \
|
||||||
-operation g \
|
-operation g \
|
||||||
--outfile ${outputDir}/fusion/${name}.fusion
|
--outfile ${output_dir}/fusion/${name}.fusion
|
||||||
|
|
||||||
perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name}
|
perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${output_dir} ${name}
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer}
|
perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${output_dir} ${project} ${cancer}
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String fusion = "${outputDir}/fusion/${name}.fusion.pos.txt"
|
String fusion = "${output_dir}/fusion/${name}.fusion.pos.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -195,7 +157,7 @@ task tumor_content {
|
||||||
String tumor_pileup
|
String tumor_pileup
|
||||||
String normal_pileup
|
String normal_pileup
|
||||||
String ref
|
String ref
|
||||||
String outputDir
|
String output_dir
|
||||||
String codesDir
|
String codesDir
|
||||||
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
|
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
|
||||||
|
|
||||||
|
|
@ -206,16 +168,16 @@ task tumor_content {
|
||||||
-F ${ref} \
|
-F ${ref} \
|
||||||
-n ${normal_pileup} \
|
-n ${normal_pileup} \
|
||||||
-t ${tumor_pileup} \
|
-t ${tumor_pileup} \
|
||||||
| gzip > ${outputDir}/qc/target_${name}.200base.seqz.gz
|
| gzip > ${output_dir}/qc/target_${name}.200base.seqz.gz
|
||||||
|
|
||||||
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz \
|
sequenza-utils seqz_binning -w 200 -s ${output_dir}/qc/target_${name}.200base.seqz.gz \
|
||||||
| gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz
|
| gzip > ${output_dir}/qc/target_${name}.200base.small.seqz.gz
|
||||||
|
|
||||||
Rscript ${codesDir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!"
|
Rscript ${codesDir}/sequenza.R ${name} ${output_dir}/qc/target_${name}.200base.small.seqz.gz ${output_dir}/qc/sequenza || echo "sequenza failed!"
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf"
|
String purity = "${output_dir}/qc/sequenza/${name}_CP_contours.pdf"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -228,7 +190,7 @@ task cnvkit {
|
||||||
String normal_rmdupBam
|
String normal_rmdupBam
|
||||||
String ref
|
String ref
|
||||||
String bed
|
String bed
|
||||||
String outputDir
|
String output_dir
|
||||||
String cancer
|
String cancer
|
||||||
String codesDir
|
String codesDir
|
||||||
String project
|
String project
|
||||||
|
|
@ -237,8 +199,8 @@ task cnvkit {
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/cnvkit ];then
|
if [ ! -d ${output_dir}/cnvkit ];then
|
||||||
mkdir ${outputDir}/cnvkit
|
mkdir ${output_dir}/cnvkit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cnvkit.py batch \
|
cnvkit.py batch \
|
||||||
|
|
@ -247,149 +209,128 @@ task cnvkit {
|
||||||
--targets ${bed} \
|
--targets ${bed} \
|
||||||
--fasta ${ref} \
|
--fasta ${ref} \
|
||||||
--access ${accessBed} \
|
--access ${accessBed} \
|
||||||
--output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \
|
--output-reference ${output_dir}/cnvkit/${normal}_reference.cnn \
|
||||||
--annotate ${annotateGene} \
|
--annotate ${annotateGene} \
|
||||||
--drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit
|
--drop-low-coverage --scatter --output-dir ${output_dir}/cnvkit
|
||||||
|
|
||||||
cnvkit.py scatter \
|
cnvkit.py scatter \
|
||||||
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \
|
${output_dir}/cnvkit/${tumor}.rmdup.cnr -s ${output_dir}/cnvkit/${tumor}.rmdup.cns \
|
||||||
--y-max 3 --y-min -3 \
|
--y-max 3 --y-min -3 \
|
||||||
--title ${tumor}.cns \
|
--title ${tumor}.cns \
|
||||||
-o ${outputDir}/cnvkit/${tumor}.cnv.png
|
-o ${output_dir}/cnvkit/${tumor}.cnv.png
|
||||||
|
|
||||||
perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn
|
perl ${codesDir}/log2_cn.pl ${output_dir}/cnvkit/${tumor}.rmdup.cns ${output_dir}/cnvkit/${tumor}.rmdup.cns.cn
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer}
|
perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${output_dir} ${project} ${cancer}
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String cns = "${outputDir}/cnvkit/${tumor}.rmdup.cns"
|
String cns = "${output_dir}/cnvkit/${tumor}.rmdup.cns"
|
||||||
String png = "${outputDir}/cnvkit/${tumor}.cnv.png"
|
String png = "${output_dir}/cnvkit/${tumor}.cnv.png"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
task chemo {
|
task chemo {
|
||||||
String codesDir
|
String codesDir
|
||||||
String outputDir
|
String output_dir
|
||||||
String project
|
String project
|
||||||
String normal
|
String normal
|
||||||
String rmdupBam
|
String rmdupBam
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/chemo ];then
|
if [ ! -d ${output_dir}/chemo ];then
|
||||||
mkdir ${outputDir}/chemo
|
mkdir ${output_dir}/chemo
|
||||||
fi
|
fi
|
||||||
|
|
||||||
${codesDir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal}
|
${codesDir}/chemo/chemo_panel.py -p ${project} -o ${output_dir} --n ${normal}
|
||||||
>>>
|
>>>
|
||||||
}
|
}
|
||||||
|
|
||||||
task msi {
|
task msi {
|
||||||
String bed
|
String bed
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String tumor_rmdupBam
|
String tumor_rmdupBam
|
||||||
String normal_rmdupBam
|
String normal_rmdupBam
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/msi ];then
|
if [ ! -d ${output_dir}/msi ];then
|
||||||
mkdir ${outputDir}/msi
|
mkdir ${output_dir}/msi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \
|
msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \
|
||||||
-n ${normal_rmdupBam} \
|
-n ${normal_rmdupBam} \
|
||||||
-t ${tumor_rmdupBam} \
|
-t ${tumor_rmdupBam} \
|
||||||
-e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi
|
-e ${bed} -b 10 -o ${output_dir}/msi/${name}.msi
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String target="${outputDir}/MSI/${name}.msi"
|
String target="${output_dir}/MSI/${name}.msi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task hla {
|
task hla {
|
||||||
String inputDir
|
String inputDir
|
||||||
String outputDir
|
String output_dir
|
||||||
String normal
|
String normal
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/neoantigen ];then
|
if [ ! -d ${output_dir}/neoantigen ];then
|
||||||
mkdir -p ${outputDir}/neoantigen/HLA
|
mkdir -p ${output_dir}/neoantigen/HLA
|
||||||
fi
|
fi
|
||||||
|
|
||||||
razers3 -tc 10 -i 95 -m 1 -dr 0 \
|
razers3 -tc 10 -i 95 -m 1 -dr 0 \
|
||||||
-o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
|
-o ${output_dir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
|
||||||
${inputDir}/*_${normal}_*1.fq.gz
|
${inputDir}/*_${normal}_*1.fq.gz
|
||||||
|
|
||||||
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq
|
samtools bam2fq ${output_dir}/neoantigen/HLA/fished_1.bam > ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq
|
||||||
rm ${outputDir}/neoantigen/HLA/fished_1.bam
|
rm ${output_dir}/neoantigen/HLA/fished_1.bam
|
||||||
|
|
||||||
razers3 -tc 10 -i 95 -m 1 -dr 0 \
|
razers3 -tc 10 -i 95 -m 1 -dr 0 \
|
||||||
-o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
|
-o ${output_dir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \
|
||||||
${inputDir}/*_${normal}_*2.fq.gz
|
${inputDir}/*_${normal}_*2.fq.gz
|
||||||
|
|
||||||
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq
|
samtools bam2fq ${output_dir}/neoantigen/HLA/fished_2.bam > ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq
|
||||||
rm ${outputDir}/neoantigen/HLA/fished_2.bam
|
rm ${output_dir}/neoantigen/HLA/fished_2.bam
|
||||||
|
|
||||||
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
|
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \
|
||||||
-i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq \
|
-i ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq \
|
||||||
--dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/
|
--dna -v --prefix ${normal} -o ${output_dir}/neoantigen/HLA/
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv"
|
String hla = "${output_dir}/neoantigen/HLA/${normal}_result.tsv"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task neoantigen {
|
task neoantigen {
|
||||||
String codesDir
|
String codesDir
|
||||||
String outputDir
|
String output_dir
|
||||||
String name
|
String name
|
||||||
String normal
|
String normal
|
||||||
String somatic_hc_vcf
|
String somatic_hc_vcf
|
||||||
String hla
|
String hla
|
||||||
command <<<
|
command <<<
|
||||||
sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${outputDir} ${name} ${name} ${codesDir}
|
sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${output_dir} ${name} ${name} ${codesDir}
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt"
|
String neoantigen = "${output_dir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt"
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
task dealwithsnvindel {
|
|
||||||
String codesDir
|
|
||||||
String name
|
|
||||||
String somatic_all_anno
|
|
||||||
String germline_anno
|
|
||||||
String project
|
|
||||||
String outputDir
|
|
||||||
String cancer
|
|
||||||
|
|
||||||
command <<<
|
|
||||||
perl ${codesDir}/pick_variant.pl ${outputDir} ${name}
|
|
||||||
perl ${codesDir}/pick_mut_splice_promoter.pl ${codesDir} ${name} ${outputDir} ${project}
|
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
|
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${name} ${outputDir} ${project} ${cancer}
|
|
||||||
>>>
|
|
||||||
output {
|
|
||||||
String snvindel_filtered= "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt"
|
|
||||||
String germline_filtered = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task hereditary {
|
task hereditary {
|
||||||
String codesDir
|
String codesDir
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String project
|
String project
|
||||||
String germline_filtered
|
String germline_filtered
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
${codesDir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${name}
|
${codesDir}/hereditary/hereditary.py -p ${project} -o ${output_dir} --n ${name}
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String hereditary_pre = "${outputDir}/hereditary/${name}.hereditary.pre.txt"
|
String hereditary_pre = "${output_dir}/hereditary/${name}.hereditary.pre.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -398,101 +339,101 @@ task conpair {
|
||||||
String name
|
String name
|
||||||
String tumor_rmdupBam
|
String tumor_rmdupBam
|
||||||
String normal_rmdupBam
|
String normal_rmdupBam
|
||||||
String outputDir
|
String output_dir
|
||||||
String ref
|
String ref
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/conpair ];then
|
if [ ! -d ${output_dir}/conpair ];then
|
||||||
mkdir -p ${outputDir}/conpair
|
mkdir -p ${output_dir}/conpair
|
||||||
fi
|
fi
|
||||||
|
|
||||||
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
||||||
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
||||||
-B ${tumor_rmdupBam} \
|
-B ${tumor_rmdupBam} \
|
||||||
-O ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
|
-O ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
|
||||||
-R ${ref} \
|
-R ${ref} \
|
||||||
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
||||||
|
|
||||||
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
||||||
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
||||||
-B ${normal_rmdupBam} \
|
-B ${normal_rmdupBam} \
|
||||||
-O ${outputDir}/conpair/${name}.normal.gatk.mpileup \
|
-O ${output_dir}/conpair/${name}.normal.gatk.mpileup \
|
||||||
-R ${ref} \
|
-R ${ref} \
|
||||||
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
||||||
|
|
||||||
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.tumor.gatk.mpileup
|
sed -i 's/^chr//g' ${output_dir}/conpair/${name}.tumor.gatk.mpileup
|
||||||
sed -i 's/^chr//g' ${outputDir}/conpair/${name}.normal.gatk.mpileup
|
sed -i 's/^chr//g' ${output_dir}/conpair/${name}.normal.gatk.mpileup
|
||||||
|
|
||||||
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
|
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
|
||||||
-H \
|
-H \
|
||||||
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
|
-T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
|
||||||
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \
|
-N ${output_dir}/conpair/${name}.normal.gatk.mpileup \
|
||||||
-O ${outputDir}/conpair/${name}_concordance.txt
|
-O ${output_dir}/conpair/${name}_concordance.txt
|
||||||
|
|
||||||
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
|
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
|
||||||
-T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \
|
-T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \
|
||||||
-N ${outputDir}/conpair/${name}.normal.gatk.mpileup \
|
-N ${output_dir}/conpair/${name}.normal.gatk.mpileup \
|
||||||
-O ${outputDir}/conpair/${name}_contamination.txt
|
-O ${output_dir}/conpair/${name}_contamination.txt
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String concordance = "${outputDir}/conpair/${name}_concordance.txt"
|
String concordance = "${output_dir}/conpair/${name}_concordance.txt"
|
||||||
String contamination = "${outputDir}/conpair/${name}_contamination.txt"
|
String contamination = "${output_dir}/conpair/${name}_contamination.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task mmr {
|
task mmr {
|
||||||
String codesDir
|
String codesDir
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String germline_filtered
|
String germline_filtered
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/MMR ];then
|
if [ ! -d ${output_dir}/MMR ];then
|
||||||
mkdir -p ${outputDir}/MMR
|
mkdir -p ${output_dir}/MMR
|
||||||
fi
|
fi
|
||||||
perl ${codesDir}/mmr_controlsample.pl ${outputDir} ${name}
|
perl ${codesDir}/mmr_controlsample.pl ${output_dir} ${name}
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String mmr = "${outputDir}/MMR/${name}_mmr.txt"
|
String mmr = "${output_dir}/MMR/${name}_mmr.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task hrr {
|
task hrr {
|
||||||
String codesDir
|
String codesDir
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String germline_filtered
|
String germline_filtered
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/HRR ];then
|
if [ ! -d ${output_dir}/HRR ];then
|
||||||
mkdir -p ${outputDir}/HRR
|
mkdir -p ${output_dir}/HRR
|
||||||
fi
|
fi
|
||||||
perl ${codesDir}/hrr_controlsample_tissue.pl ${outputDir} ${name}
|
perl ${codesDir}/hrr_controlsample_tissue.pl ${output_dir} ${name}
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String hrr = "${outputDir}/HRR/${name}_hrr.txt"
|
String hrr = "${output_dir}/HRR/${name}_hrr.txt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task hotspot {
|
task hotspot {
|
||||||
String name
|
String name
|
||||||
String outputDir
|
String output_dir
|
||||||
String ref
|
String ref
|
||||||
String rmdupBam
|
String rmdupBam
|
||||||
String codesDir
|
String codesDir
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/mutation/hotspot/ ];then
|
if [ ! -d ${output_dir}/mutation/hotspot/ ];then
|
||||||
mkdir -p ${outputDir}/mutation/hotspot/
|
mkdir -p ${output_dir}/mutation/hotspot/
|
||||||
fi
|
fi
|
||||||
|
|
||||||
samtools mpileup -Bq 20 -Q 20 \
|
samtools mpileup -Bq 20 -Q 20 \
|
||||||
-f ${ref} \
|
-f ${ref} \
|
||||||
-l ${codesDir}/hotspot.bed \
|
-l ${codesDir}/hotspot.bed \
|
||||||
-o ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
|
-o ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
|
||||||
${rmdupBam}
|
${rmdupBam}
|
||||||
|
|
||||||
java -jar $VARSCAN mpileup2cns \
|
java -jar $VARSCAN mpileup2cns \
|
||||||
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
|
${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
|
||||||
--min-var-freq 0.005 \
|
--min-var-freq 0.005 \
|
||||||
--min-avg-qual 20 \
|
--min-avg-qual 20 \
|
||||||
--output-vcf 1 \
|
--output-vcf 1 \
|
||||||
|
|
@ -500,10 +441,10 @@ task hotspot {
|
||||||
--p-value 0.99 \
|
--p-value 0.99 \
|
||||||
--min-reads2 2 \
|
--min-reads2 2 \
|
||||||
--strand-filter 0 \
|
--strand-filter 0 \
|
||||||
> ${outputDir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf
|
> ${output_dir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf
|
||||||
|
|
||||||
java -jar $VARSCAN mpileup2cns \
|
java -jar $VARSCAN mpileup2cns \
|
||||||
${outputDir}/mutation/hotspot/${name}.hotspot.pileup \
|
${output_dir}/mutation/hotspot/${name}.hotspot.pileup \
|
||||||
--min-var-freq 0.01 \
|
--min-var-freq 0.01 \
|
||||||
--min-avg-qual 20 \
|
--min-avg-qual 20 \
|
||||||
--output-vcf 1 \
|
--output-vcf 1 \
|
||||||
|
|
@ -511,32 +452,32 @@ task hotspot {
|
||||||
--p-value 0.05 \
|
--p-value 0.05 \
|
||||||
--min-reads2 3 \
|
--min-reads2 3 \
|
||||||
--strand-filter 1 \
|
--strand-filter 1 \
|
||||||
> ${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf
|
> ${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf
|
||||||
|
|
||||||
perl ${codesDir}/hotspot.hvl.pl ${outputDir} ${name}
|
perl ${codesDir}/hotspot.hvl.pl ${output_dir} ${name}
|
||||||
|
|
||||||
if [ -e "${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then
|
if [ -e "${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then
|
||||||
table_annovar.pl \
|
table_annovar.pl \
|
||||||
${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \
|
${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \
|
||||||
/dataseq/jmdna/software/annovar/humandb/ \
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
-protocol refGene \
|
-protocol refGene \
|
||||||
-argument '-hgvs' \
|
-argument '-hgvs' \
|
||||||
-operation g \
|
-operation g \
|
||||||
--outfile ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.anno
|
--outfile ${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.anno
|
||||||
perl ${codesDir}/hotspot.filter.pl ${outputDir} ${name}
|
perl ${codesDir}/hotspot.filter.pl ${output_dir} ${name}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String hotspot = "${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf"
|
String hotspot = "${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task auto_report {
|
task auto_report {
|
||||||
String cancer
|
String cancer
|
||||||
String codesDir
|
String codesDir
|
||||||
String outputDir
|
String output_dir
|
||||||
|
|
||||||
String normal
|
String normal
|
||||||
String tumor
|
String tumor
|
||||||
|
|
@ -554,28 +495,28 @@ task auto_report {
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/report ];then
|
if [ ! -d ${output_dir}/report ];then
|
||||||
mkdir -p ${outputDir}/report
|
mkdir -p ${output_dir}/report
|
||||||
fi
|
fi
|
||||||
|
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
|
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${output_dir} ${cancer}
|
||||||
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
|
python3 ${codesDir}/drug_dedup.py ${output_dir} ${tumor}
|
||||||
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
|
perl ${codesDir}/file_format_change.pl ${output_dir} ${tumor}
|
||||||
python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${output_dir} ${tumor} ${normal} ${cancer}
|
||||||
|
|
||||||
ln -s ${cnv_cns} ${outputDir}/report/
|
ln -s ${cnv_cns} ${output_dir}/report/
|
||||||
ln -s ${cnv_png} ${outputDir}/report/
|
ln -s ${cnv_png} ${output_dir}/report/
|
||||||
|
|
||||||
ln -s ${fusion_pos} ${outputDir}/report/
|
ln -s ${fusion_pos} ${output_dir}/report/
|
||||||
ln -s ${snvindel_filtered} ${outputDir}/report/
|
ln -s ${snvindel_filtered} ${output_dir}/report/
|
||||||
|
|
||||||
ln -s ${tmb} ${outputDir}/report/
|
ln -s ${tmb} ${output_dir}/report/
|
||||||
|
|
||||||
ln -s ${mmr} ${outputDir}/report/
|
ln -s ${mmr} ${output_dir}/report/
|
||||||
|
|
||||||
ln -s ${hrr} ${outputDir}/report/
|
ln -s ${hrr} ${output_dir}/report/
|
||||||
|
|
||||||
ln -s ${hereditary_pre} ${outputDir}/report/
|
ln -s ${hereditary_pre} ${output_dir}/report/
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue