2023-09-27 10:47:03 +08:00
|
|
|
|
task mutation_calling_umi {
|
|
|
|
|
|
String name
|
|
|
|
|
|
String output_dir
|
|
|
|
|
|
String rmdup_bam
|
|
|
|
|
|
String ref
|
|
|
|
|
|
String bed
|
|
|
|
|
|
command <<<
|
|
|
|
|
|
|
|
|
|
|
|
if [ ! -d ${output_dir}/mutation ];then
|
|
|
|
|
|
mkdir ${output_dir}/mutation
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
#1条call
|
|
|
|
|
|
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
|
|
|
|
|
# -r 3 是指有3条这样样的reads支撑
|
|
|
|
|
|
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
|
|
|
|
|
|
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
|
|
|
|
-G ${ref} \
|
|
|
|
|
|
-f 0.001 \
|
|
|
|
|
|
-N ${name} \
|
|
|
|
|
|
-b ${rmdup_bam} \
|
|
|
|
|
|
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
|
|
|
|
|
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
#提取>=2条矫正的序列
|
2023-09-27 17:57:28 +08:00
|
|
|
|
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
2023-09-27 10:47:03 +08:00
|
|
|
|
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
|
|
|
|
|
|
|
|
|
|
|
# 保证 1r call mut umi family 里面有2条reads
|
|
|
|
|
|
#2条矫正的call
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
|
|
|
|
|
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
|
|
|
|
|
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
2023-09-27 17:57:28 +08:00
|
|
|
|
filter_snpindel_umi_1r_plus_2r.pl \
|
2023-09-27 10:47:03 +08:00
|
|
|
|
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
>>>
|
|
|
|
|
|
|
|
|
|
|
|
output {
|
2023-09-27 17:57:28 +08:00
|
|
|
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
2023-09-27 10:47:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
task mutation_calling_tissue {
|
|
|
|
|
|
String name
|
|
|
|
|
|
String bed
|
|
|
|
|
|
String ref
|
|
|
|
|
|
String output_dir
|
|
|
|
|
|
String rmdup_bam
|
|
|
|
|
|
|
|
|
|
|
|
command <<<
|
|
|
|
|
|
if [ ! -d ${output_dir}/mutation ];then
|
|
|
|
|
|
mkdir ${output_dir}/mutation
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# vardict
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
|
|
|
|
-G ${ref} \
|
|
|
|
|
|
-f 0.01 \
|
|
|
|
|
|
-N ${name} \
|
|
|
|
|
|
-b ${rmdup_bam} \
|
|
|
|
|
|
-UN \
|
|
|
|
|
|
-Q 20 \
|
|
|
|
|
|
-m 3 \
|
|
|
|
|
|
-r 3 \
|
|
|
|
|
|
-th 10 \
|
|
|
|
|
|
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
>>>
|
|
|
|
|
|
|
|
|
|
|
|
output {
|
|
|
|
|
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
task mutation_calling_tissue_control {
|
|
|
|
|
|
String name
|
|
|
|
|
|
String bed
|
|
|
|
|
|
String ref
|
|
|
|
|
|
String output_dir
|
|
|
|
|
|
String tumor_rmdup_bam
|
|
|
|
|
|
String normal_rmdup_bam
|
|
|
|
|
|
|
|
|
|
|
|
command <<<
|
|
|
|
|
|
if [ ! -d ${output_dir}/mutation ];then
|
|
|
|
|
|
mkdir ${output_dir}/mutation
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
|
|
|
|
-G ${ref} \
|
|
|
|
|
|
-f 0.01 \
|
|
|
|
|
|
-N ${name} \
|
|
|
|
|
|
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
|
|
|
|
|
|
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
|
|
|
|
|
|
|
2023-09-27 17:57:28 +08:00
|
|
|
|
>>>
|
|
|
|
|
|
|
|
|
|
|
|
output {
|
|
|
|
|
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
task mutation_calling_umi_control {
|
|
|
|
|
|
String name
|
|
|
|
|
|
String bed
|
|
|
|
|
|
String ref
|
|
|
|
|
|
String output_dir
|
|
|
|
|
|
String tumor_rmdup_bam
|
|
|
|
|
|
String normal_rmdup_bam
|
|
|
|
|
|
|
|
|
|
|
|
command <<<
|
|
|
|
|
|
if [ ! -d ${output_dir}/mutation ];then
|
|
|
|
|
|
mkdir ${output_dir}/mutation
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# 对照样本
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
|
|
|
|
-G ${ref} \
|
|
|
|
|
|
-f 0.01 \
|
|
|
|
|
|
-N ${name} \
|
|
|
|
|
|
-b ${normal_rmdup_bam} \
|
|
|
|
|
|
-UN \
|
|
|
|
|
|
-Q 20 \
|
|
|
|
|
|
-m 3 \
|
|
|
|
|
|
-r 3 \
|
|
|
|
|
|
-th 10 \
|
|
|
|
|
|
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}_normal.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
# 实验样本
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
|
|
|
|
|
-G ${ref} \
|
|
|
|
|
|
-f 0.001 \
|
|
|
|
|
|
-N ${name} \
|
|
|
|
|
|
-b ${tumor_rmdup_bam} \
|
|
|
|
|
|
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
|
|
|
|
|
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
#提取>=2条矫正的序列
|
|
|
|
|
|
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
|
|
|
|
|
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
|
|
|
|
|
|
|
|
|
|
|
# 保证 1r call mut umi family 里面有2条reads
|
|
|
|
|
|
#2条矫正的call
|
|
|
|
|
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
|
|
|
|
|
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
|
|
|
|
|
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
|
|
|
|
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
|
|
|
|
|
filter_snpindel_umi_1r_plus_2r.pl \
|
|
|
|
|
|
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
|
|
|
|
|
|
|
|
|
|
|
# 去除normal 中的突变位点
|
|
|
|
|
|
filter_snpindel_umi_subnormal.pl \
|
|
|
|
|
|
${output_dir}/mutation/${name}_tumor.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}_normal.snp.indel.vcf \
|
|
|
|
|
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
2023-09-27 10:47:03 +08:00
|
|
|
|
|
|
|
|
|
|
>>>
|
|
|
|
|
|
|
|
|
|
|
|
output {
|
|
|
|
|
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
workflow call_mutation {
|
|
|
|
|
|
|
|
|
|
|
|
String tumor
|
|
|
|
|
|
String tumor_rmdup_bam
|
|
|
|
|
|
String? normal
|
|
|
|
|
|
String? normal_rmdup_bam
|
|
|
|
|
|
Boolean umi
|
|
|
|
|
|
String output_dir
|
|
|
|
|
|
String ref
|
|
|
|
|
|
String bed
|
|
|
|
|
|
|
2023-09-27 17:57:28 +08:00
|
|
|
|
# 双样本
|
|
|
|
|
|
if (defined(normal)) {
|
|
|
|
|
|
if (umi) {
|
|
|
|
|
|
call mutation_calling_umi_control {
|
|
|
|
|
|
input:
|
|
|
|
|
|
name=tumor,
|
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
|
ref=ref,
|
|
|
|
|
|
bed=bed,
|
|
|
|
|
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
|
|
|
|
|
normal_rmdup_bam=normal_rmdup_bam
|
|
|
|
|
|
|
2023-09-27 10:47:03 +08:00
|
|
|
|
}
|
2023-09-27 17:57:28 +08:00
|
|
|
|
}
|
|
|
|
|
|
if (!umi) {
|
|
|
|
|
|
call mutation_calling_tissue_control {
|
|
|
|
|
|
input:
|
|
|
|
|
|
name=tumor,
|
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
|
ref=ref,
|
|
|
|
|
|
bed=bed,
|
|
|
|
|
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
|
|
|
|
|
normal_rmdup_bam=normal_rmdup_bam
|
2023-09-27 10:47:03 +08:00
|
|
|
|
|
|
|
|
|
|
}
|
2023-09-27 17:57:28 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-27 10:47:03 +08:00
|
|
|
|
|
2023-09-27 17:57:28 +08:00
|
|
|
|
# 单样本
|
|
|
|
|
|
if (!defined(normal)) {
|
|
|
|
|
|
if (umi) {
|
|
|
|
|
|
call mutation_calling_umi {
|
|
|
|
|
|
input:
|
|
|
|
|
|
name=tumor,
|
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
|
ref=ref,
|
|
|
|
|
|
bed=bed,
|
|
|
|
|
|
rmdup_bam=tumor_rmdup_bam
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!umi) {
|
|
|
|
|
|
call mutation_calling_tissue {
|
|
|
|
|
|
input:
|
|
|
|
|
|
name=tumor,
|
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
|
ref=ref,
|
|
|
|
|
|
bed=bed,
|
|
|
|
|
|
rmdup_bam=normal_rmdup_bam
|
|
|
|
|
|
}
|
2023-09-27 10:47:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
output {
|
|
|
|
|
|
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|