调整call_mutation
parent
82ce1d74bf
commit
188e7dd8a4
|
|
@ -2,8 +2,10 @@
|
||||||
|
|
||||||
## run script example
|
## run script example
|
||||||
```bash
|
```bash
|
||||||
/home/zhangchao/soft/jdk-17.0.7+7/bin/java -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \
|
export PATH=/dataseq/product/workflow/software/bin:$PATH
|
||||||
-jar /home/zhangchao/soft/cromwell-85.jar run \
|
|
||||||
|
java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \
|
||||||
|
-jar /dataseq/product/workflow/software/cromwell-85.jar run \
|
||||||
--inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \
|
--inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \
|
||||||
/home/zhangchao/project/pipeline/workflow/pipeline.wdl
|
/home/zhangchao/project/pipeline/workflow/pipeline.wdl
|
||||||
```
|
```
|
||||||
|
|
|
||||||
201
pipeline.wdl
201
pipeline.wdl
|
|
@ -1,21 +1,23 @@
|
||||||
import "./wdl/task.wdl" as mytask
|
import "./wdl/task.wdl" as mytask
|
||||||
import "./wdl/qc.wdl"
|
import "./wdl/qc.wdl"
|
||||||
import "./wdl/alignment.wdl"
|
import "./wdl/alignment.wdl"
|
||||||
import "./wdl/statistics.wdl"
|
import "./wdl/call_mutation.wdl"
|
||||||
|
|
||||||
workflow pipeline {
|
workflow pipeline {
|
||||||
|
|
||||||
String tumor
|
String tumor
|
||||||
String? normal
|
String? normal
|
||||||
String inputDir
|
Boolean umi=false
|
||||||
String outputDir
|
|
||||||
String cancer
|
|
||||||
String project="650gene"
|
|
||||||
String codesDir="/dataseq/jmdna/codes/pancancer_controlsample"
|
|
||||||
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
|
|
||||||
String bed = "/dataseq/jmdna/database/bed/650.bed"
|
|
||||||
|
|
||||||
String workdir="${outputDir}/${tumor}"
|
String input_dir
|
||||||
|
String output_dir
|
||||||
|
|
||||||
|
String bed
|
||||||
|
|
||||||
|
String codesDir="/home/zhangchao/project/pipeline/workflow/script"
|
||||||
|
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
|
||||||
|
|
||||||
|
String workdir="${output_dir}"
|
||||||
|
|
||||||
call mytask.create_dir as create_dir {
|
call mytask.create_dir as create_dir {
|
||||||
input:
|
input:
|
||||||
|
|
@ -26,8 +28,9 @@ workflow pipeline {
|
||||||
input:
|
input:
|
||||||
tumor=tumor,
|
tumor=tumor,
|
||||||
normal=normal,
|
normal=normal,
|
||||||
inputDir=inputDir,
|
umi=umi,
|
||||||
outputDir=workdir
|
input_dir=input_dir,
|
||||||
|
output_dir=workdir
|
||||||
}
|
}
|
||||||
|
|
||||||
call alignment.alignment as alignment {
|
call alignment.alignment as alignment {
|
||||||
|
|
@ -41,183 +44,23 @@ workflow pipeline {
|
||||||
normal_r1=qc.normal_r1,
|
normal_r1=qc.normal_r1,
|
||||||
normal_r2=qc.normal_r2,
|
normal_r2=qc.normal_r2,
|
||||||
|
|
||||||
|
umi=umi,
|
||||||
|
|
||||||
ref=ref,
|
ref=ref,
|
||||||
bed=bed,
|
bed=bed,
|
||||||
outputDir=workdir
|
output_dir=workdir
|
||||||
}
|
}
|
||||||
|
|
||||||
call statistics.statistics as statistics {
|
call call_mutation.call_mutation as call_mutation {
|
||||||
input:
|
input:
|
||||||
tumor=tumor,
|
tumor=tumor,
|
||||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
tumor_rmdup_bam=alignment.tumor_rmdup_bam,
|
||||||
|
|
||||||
normal=normal,
|
normal=normal,
|
||||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
normal_rmdup_bam=alignment.normal_rmdup_bam,
|
||||||
|
|
||||||
|
umi=umi,
|
||||||
ref=ref,
|
ref=ref,
|
||||||
bed=bed,
|
bed=bed,
|
||||||
outputDir=workdir,
|
output_dir=workdir
|
||||||
codesDir=codesDir
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.conpair as conpair {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
|
||||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
|
||||||
outputDir=workdir,
|
|
||||||
ref=ref
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.mutation_calling as mutation_calling {
|
|
||||||
input:
|
|
||||||
name=tumor,
|
|
||||||
tumor_pileup=alignment.tumor_pileup,
|
|
||||||
normal_pileup=alignment.normal_pileup,
|
|
||||||
outputDir=workdir
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.annovar as annovar {
|
|
||||||
input:
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
ref=ref,
|
|
||||||
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
|
|
||||||
germline_vcf=mutation_calling.germline_vcf,
|
|
||||||
loh_hc_vcf=mutation_calling.loh_hc_vcf,
|
|
||||||
rmdupBam=alignment.tumor_rmdupBam
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.dealwithsnvindel as dealwithsnvindel {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
somatic_all_anno=annovar.somatic_all_anno,
|
|
||||||
germline_anno=annovar.germline_anno,
|
|
||||||
project=project,
|
|
||||||
outputDir=workdir,
|
|
||||||
cancer=cancer
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.hereditary as hereditary {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
project=project,
|
|
||||||
germline_filtered = dealwithsnvindel.germline_filtered
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.tmb as tmb {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
somatic_anno=annovar.somatic_anno
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.fusion as fusion {
|
|
||||||
input:
|
|
||||||
name=tumor,
|
|
||||||
ref=ref,
|
|
||||||
codesDir=codesDir,
|
|
||||||
outputDir=workdir,
|
|
||||||
rmdupBam=alignment.tumor_rmdupBam,
|
|
||||||
cancer=cancer,
|
|
||||||
project=project,
|
|
||||||
tumor_bamdst_depth=statistics.tumor_bamdst_depth
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.cnvkit as cnvkit {
|
|
||||||
input:
|
|
||||||
tumor=tumor,
|
|
||||||
normal=normal,
|
|
||||||
|
|
||||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
|
||||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
|
||||||
ref=ref,
|
|
||||||
bed=bed,
|
|
||||||
outputDir=workdir,
|
|
||||||
cancer=cancer,
|
|
||||||
codesDir=codesDir,
|
|
||||||
project=project,
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.chemo as chemo {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
outputDir=workdir,
|
|
||||||
normal=normal,
|
|
||||||
project=project,
|
|
||||||
rmdupBam=alignment.tumor_rmdupBam,
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.msi as msi {
|
|
||||||
input:
|
|
||||||
bed=bed,
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
tumor_rmdupBam = alignment.tumor_rmdupBam,
|
|
||||||
normal_rmdupBam =alignment.normal_rmdupBam
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.hla as hla {
|
|
||||||
input:
|
|
||||||
inputDir=inputDir,
|
|
||||||
outputDir=workdir,
|
|
||||||
normal=normal,
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.neoantigen as neoantigen {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
outputDir=workdir,
|
|
||||||
name=tumor,
|
|
||||||
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
|
|
||||||
normal=normal,
|
|
||||||
hla=hla.hla
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.mmr as mmr {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
germline_filtered = dealwithsnvindel.germline_filtered
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.hrr as hrr {
|
|
||||||
input:
|
|
||||||
codesDir=codesDir,
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
germline_filtered = dealwithsnvindel.germline_filtered
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.hotspot as hotspot {
|
|
||||||
input:
|
|
||||||
name=tumor,
|
|
||||||
outputDir=workdir,
|
|
||||||
ref=ref,
|
|
||||||
rmdupBam=alignment.tumor_rmdupBam,
|
|
||||||
codesDir=codesDir,
|
|
||||||
}
|
|
||||||
|
|
||||||
call mytask.auto_report {
|
|
||||||
input:
|
|
||||||
tumor=tumor,
|
|
||||||
normal=normal,
|
|
||||||
outputDir=workdir,
|
|
||||||
codesDir=codesDir,
|
|
||||||
cancer=cancer,
|
|
||||||
cnv_cns=cnvkit.cns,
|
|
||||||
cnv_png=cnvkit.png,
|
|
||||||
fusion_pos=fusion.fusion,
|
|
||||||
snvindel_filtered=dealwithsnvindel.snvindel_filtered,
|
|
||||||
tmb=tmb.tmb,
|
|
||||||
mmr=mmr.mmr,
|
|
||||||
hrr=hrr.hrr,
|
|
||||||
hereditary_pre=hereditary.hereditary_pre
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,48 +4,75 @@ task bwa {
|
||||||
String name
|
String name
|
||||||
String read1
|
String read1
|
||||||
String read2
|
String read2
|
||||||
String outputDir
|
String output_dir
|
||||||
String ref
|
String ref
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/alignment ];then
|
if [ ! -d ${output_dir}/alignment ];then
|
||||||
mkdir ${outputDir}/alignment
|
mkdir ${output_dir}/alignment
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \
|
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \
|
||||||
samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${outputDir}/alignment/${name}.sorted.bam
|
samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${output_dir}/alignment/${name}.sorted.bam
|
||||||
samtools index ${outputDir}/alignment/${name}.sorted.bam
|
samtools index ${output_dir}/alignment/${name}.sorted.bam
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String sortedBam = "${outputDir}/alignment/${name}.sorted.bam"
|
String sorted_bam = "${output_dir}/alignment/${name}.sorted.bam"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#remove PCR duplicates
|
#remove PCR duplicates
|
||||||
|
|
||||||
task markduplicates {
|
task markduplicates_genecore {
|
||||||
String name
|
String name
|
||||||
String ref
|
String ref
|
||||||
String sortedBam
|
String sorted_bam
|
||||||
String outputDir
|
String output_dir
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${outputDir}/alignment ];then
|
if [ ! -d ${output_dir}/alignment ];then
|
||||||
mkdir ${outputDir}/alignment
|
mkdir ${output_dir}/alignment
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
gencore -i ${sorted_bam} \
|
||||||
|
-o ${output_dir}/alignment/${name}.rmdup.bam \
|
||||||
|
-r ${ref} \
|
||||||
|
-u UMI \
|
||||||
|
-j ${output_dir}/alignment/${name}_rmdup.json \
|
||||||
|
-h ${output_dir}/alignment/${name}_rmdup.html
|
||||||
|
|
||||||
|
samtools index ${output_dir}/alignment/${name}.rmdup.bam
|
||||||
|
>>>
|
||||||
|
|
||||||
|
output {
|
||||||
|
String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task markduplicates_picard {
|
||||||
|
String name
|
||||||
|
String ref
|
||||||
|
String sorted_bam
|
||||||
|
String output_dir
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
if [ ! -d ${output_dir}/alignment ];then
|
||||||
|
mkdir ${output_dir}/alignment
|
||||||
|
fi
|
||||||
|
|
||||||
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
|
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
|
||||||
I=${sortedBam} \
|
I=${sorted_bam} \
|
||||||
O=${outputDir}/alignment/${name}.rmdup.bam \
|
O=${output_dir}/alignment/${name}.rmdup.bam \
|
||||||
CREATE_INDEX=true \
|
CREATE_INDEX=true \
|
||||||
M=${outputDir}/alignment/${name}.rmdup.metrics.txt \
|
M=${output_dir}/alignment/${name}.rmdup.metrics.txt \
|
||||||
R=${ref}
|
R=${ref}
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String rmdupBam = "${outputDir}/alignment/${name}.rmdup.bam"
|
String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -56,16 +83,16 @@ task generater_mpileup {
|
||||||
String rmdupBam
|
String rmdupBam
|
||||||
String ref
|
String ref
|
||||||
String bed
|
String bed
|
||||||
String outputDir
|
String output_dir
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${outputDir}/alignment/${name}.pileup
|
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${output_dir}/alignment/${name}.pileup
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String pileup = "${outputDir}/alignment/${name}.pileup"
|
String pileup = "${output_dir}/alignment/${name}.pileup"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -79,9 +106,11 @@ workflow alignment {
|
||||||
String? normal_r1
|
String? normal_r1
|
||||||
String? normal_r2
|
String? normal_r2
|
||||||
|
|
||||||
|
Boolean umi
|
||||||
|
|
||||||
String ref
|
String ref
|
||||||
String bed
|
String bed
|
||||||
String outputDir
|
String output_dir
|
||||||
|
|
||||||
scatter(name in [tumor, normal]) {
|
scatter(name in [tumor, normal]) {
|
||||||
if (defined(name)) {
|
if (defined(name)) {
|
||||||
|
|
@ -89,36 +118,51 @@ workflow alignment {
|
||||||
input:
|
input:
|
||||||
name=name,
|
name=name,
|
||||||
ref=ref,
|
ref=ref,
|
||||||
outputDir=outputDir,
|
output_dir=output_dir,
|
||||||
read1=if name==tumor then tumor_r1 else normal_r1,
|
read1=if name==tumor then tumor_r1 else normal_r1,
|
||||||
read2=if name==tumor then tumor_r2 else normal_r2
|
read2=if name==tumor then tumor_r2 else normal_r2
|
||||||
|
|
||||||
}
|
}
|
||||||
call markduplicates {
|
if (name==tumor) {
|
||||||
input:
|
if (umi) {
|
||||||
name=name,
|
call markduplicates_genecore as tumor_markduplicates_genecore {
|
||||||
ref=ref,
|
input:
|
||||||
outputDir=outputDir,
|
name=name,
|
||||||
sortedBam=bwa.sortedBam
|
ref=ref,
|
||||||
|
output_dir=output_dir,
|
||||||
|
sorted_bam=bwa.sorted_bam,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!umi) {
|
||||||
|
call markduplicates_picard as tumor_markduplicates_picard {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
ref=ref,
|
||||||
|
output_dir=output_dir,
|
||||||
|
sorted_bam=bwa.sorted_bam,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
call generater_mpileup {
|
|
||||||
input:
|
if (name==select_first([normal, 'None'])) {
|
||||||
name=name,
|
call markduplicates_picard as normal_markduplicates_picard {
|
||||||
ref=ref,
|
input:
|
||||||
outputDir=outputDir,
|
name=name,
|
||||||
bed=bed,
|
ref=ref,
|
||||||
rmdupBam=markduplicates.rmdupBam
|
output_dir=output_dir,
|
||||||
|
sorted_bam=bwa.sorted_bam,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String tumor_sortedBam = "${outputDir}/alignment/${tumor}.sorted.bam"
|
String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam"
|
||||||
String tumor_rmdupBam = "${outputDir}/alignment/${tumor}.rmdup.bam"
|
String tumor_rmdup_bam = "${output_dir}/alignment/${tumor}.rmdup.bam"
|
||||||
String tumor_pileup = "${outputDir}/alignment/${tumor}.pileup"
|
String tumor_pileup = "${output_dir}/alignment/${tumor}.pileup"
|
||||||
String normal_sortedBam = "${outputDir}/alignment/${normal}.sorted.bam"
|
String normal_sorted_bam = "${output_dir}/alignment/${normal}.sorted.bam"
|
||||||
String normal_rmdupBam = "${outputDir}/alignment/${normal}.rmdup.bam"
|
String normal_rmdup_bam = "${output_dir}/alignment/${normal}.rmdup.bam"
|
||||||
String normal_pileup = "${outputDir}/alignment/${normal}.pileup"
|
String normal_pileup = "${output_dir}/alignment/${normal}.pileup"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,215 @@
|
||||||
|
task mutation_calling_umi {
|
||||||
|
String name
|
||||||
|
String output_dir
|
||||||
|
String rmdup_bam
|
||||||
|
String ref
|
||||||
|
String bed
|
||||||
|
command <<<
|
||||||
|
|
||||||
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
|
mkdir ${output_dir}/mutation
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
#1条call
|
||||||
|
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
||||||
|
# -r 3 是指有3条这样样的reads支撑
|
||||||
|
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
|
||||||
|
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
|
-G ${ref} \
|
||||||
|
-f 0.001 \
|
||||||
|
-N ${name} \
|
||||||
|
-b ${rmdup_bam} \
|
||||||
|
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
||||||
|
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||||||
|
|
||||||
|
#提取>=2条矫正的序列
|
||||||
|
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
|
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||||
|
|
||||||
|
# 保证 1r call mut umi family 里面有2条reads
|
||||||
|
#2条矫正的call
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||||||
|
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
||||||
|
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||||||
|
|
||||||
|
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||||||
|
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
|
||||||
|
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
|
table_annovar.pl \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||||
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
|
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||||||
|
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||||||
|
--intronhgvs 50 \
|
||||||
|
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||||||
|
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||||
|
|
||||||
|
>>>
|
||||||
|
|
||||||
|
output {
|
||||||
|
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task mutation_calling_tissue {
|
||||||
|
String name
|
||||||
|
String bed
|
||||||
|
String ref
|
||||||
|
String output_dir
|
||||||
|
String rmdup_bam
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
|
mkdir ${output_dir}/mutation
|
||||||
|
fi
|
||||||
|
|
||||||
|
# vardict
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
|
-G ${ref} \
|
||||||
|
-f 0.01 \
|
||||||
|
-N ${name} \
|
||||||
|
-b ${rmdup_bam} \
|
||||||
|
-UN \
|
||||||
|
-Q 20 \
|
||||||
|
-m 3 \
|
||||||
|
-r 3 \
|
||||||
|
-th 10 \
|
||||||
|
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||||
|
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
|
table_annovar.pl \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||||
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
|
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
||||||
|
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
||||||
|
-operation g,f,f,f,f,f,f,f,f,f \
|
||||||
|
--intronhgvs 50 \
|
||||||
|
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||||
|
|
||||||
|
>>>
|
||||||
|
|
||||||
|
output {
|
||||||
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task mutation_calling_tissue_control {
|
||||||
|
String name
|
||||||
|
String bed
|
||||||
|
String ref
|
||||||
|
String output_dir
|
||||||
|
String tumor_rmdup_bam
|
||||||
|
String normal_rmdup_bam
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
if [ ! -d ${output_dir}/mutation ];then
|
||||||
|
mkdir ${output_dir}/mutation
|
||||||
|
fi
|
||||||
|
|
||||||
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
|
-G ${ref} \
|
||||||
|
-f 0.01 \
|
||||||
|
-N ${name} \
|
||||||
|
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
|
||||||
|
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
||||||
|
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
|
||||||
|
|
||||||
|
table_annovar.pl \
|
||||||
|
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||||
|
/dataseq/jmdna/software/annovar/humandb/ \
|
||||||
|
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||||
|
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
||||||
|
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
||||||
|
-operation g,f,f,f,f,f,f,f,f,f \
|
||||||
|
--intronhgvs 50 \
|
||||||
|
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||||
|
|
||||||
|
>>>
|
||||||
|
|
||||||
|
output {
|
||||||
|
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow call_mutation {
|
||||||
|
|
||||||
|
String tumor
|
||||||
|
String tumor_rmdup_bam
|
||||||
|
String? normal
|
||||||
|
String? normal_rmdup_bam
|
||||||
|
Boolean umi
|
||||||
|
String output_dir
|
||||||
|
String ref
|
||||||
|
String bed
|
||||||
|
|
||||||
|
scatter(name in [tumor, normal]) {
|
||||||
|
if (defined(name)) {
|
||||||
|
if (name==tumor) {
|
||||||
|
if (umi) {
|
||||||
|
call mutation_calling_umi as tumor_mutation_calling_umi {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
rmdup_bam=tumor_rmdup_bam
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!umi) {
|
||||||
|
# 单样本模式,normal没有定义
|
||||||
|
if (name==select_first([normal, tumor])) {
|
||||||
|
call mutation_calling_tissue as tumor_mutation_calling_tissue {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
rmdup_bam=normal_rmdup_bam
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# 双样本模式,normal有定义
|
||||||
|
if (name!=select_first([normal, tumor])) {
|
||||||
|
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||||
|
normal_rmdup_bam=normal_rmdup_bam
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (name==select_first([normal, 'None'])) {
|
||||||
|
if (umi) {
|
||||||
|
call mutation_calling_tissue as normal_mutation_calling_tissue {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
output_dir=output_dir,
|
||||||
|
ref=ref,
|
||||||
|
bed=bed,
|
||||||
|
rmdup_bam=normal_rmdup_bam
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output {
|
||||||
|
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
|
||||||
|
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
|
||||||
|
}
|
||||||
|
}
|
||||||
118
wdl/qc.wdl
118
wdl/qc.wdl
|
|
@ -1,31 +1,71 @@
|
||||||
task runqc {
|
task runqc {
|
||||||
String name
|
String name
|
||||||
String inputDir
|
String input_dir
|
||||||
String outputDir
|
String output_dir
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
echo "###### ${name} fastp beginning at: $(date) ######"
|
# echo "###### ${name} fastp beginning at: $(date) ######"
|
||||||
|
|
||||||
if [ ! -d ${outputDir}/qc ];then
|
if [ ! -d ${output_dir}/qc ];then
|
||||||
mkdir ${outputDir}/qc
|
mkdir ${output_dir}/qc
|
||||||
fi
|
fi
|
||||||
|
|
||||||
fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \
|
fastp \
|
||||||
-I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \
|
-i ${input_dir}/*_${name}_*1.fq.gz \
|
||||||
|
-o ${output_dir}/qc/${name}_clean_R1.fq.gz \
|
||||||
|
-I ${input_dir}/*_${name}_*2.fq.gz \
|
||||||
|
-O ${output_dir}/qc/${name}_clean_R2.fq.gz \
|
||||||
-w 10 \
|
-w 10 \
|
||||||
|
-e 20 \
|
||||||
--correction \
|
--correction \
|
||||||
--overlap_len_require 10 \
|
--overlap_len_require 10 \
|
||||||
-j ${outputDir}/qc/${name}.json \
|
-j ${output_dir}/qc/${name}.json \
|
||||||
-h ${outputDir}/qc/${name}.html \
|
-h ${output_dir}/qc/${name}.html \
|
||||||
--report_title $name \
|
--report_title ${name}
|
||||||
-e 20
|
|
||||||
|
|
||||||
echo "###### ${name} fastp end at: $(date) ######"
|
|
||||||
>>>
|
>>>
|
||||||
output {
|
output {
|
||||||
String r1 = "${outputDir}/qc/${name}_clean_R1.fq.gz "
|
String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
|
||||||
String r2 = "${outputDir}/qc/${name}_clean_R2.fq.gz "
|
String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
|
||||||
String json = "${outputDir}/qc/${name}.json"
|
String json = "${output_dir}/qc/${name}.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
task umiqc {
|
||||||
|
String name
|
||||||
|
String input_dir
|
||||||
|
String output_dir
|
||||||
|
|
||||||
|
command <<<
|
||||||
|
# echo "###### ${name} fastp beginning at: $(date) ######"
|
||||||
|
|
||||||
|
if [ ! -d ${output_dir}/qc ];then
|
||||||
|
mkdir ${output_dir}/qc
|
||||||
|
fi
|
||||||
|
|
||||||
|
fastp -i ${input_dir}/*_${name}_*1.fq.gz -o ${output_dir}/qc/${name}_clean_R1.fq.gz \
|
||||||
|
-I ${input_dir}/*_${name}_*2.fq.gz -O ${output_dir}/qc/${name}_clean_R2.fq.gz \
|
||||||
|
-w 10 \
|
||||||
|
--correction \
|
||||||
|
--overlap_len_require=10 \
|
||||||
|
--umi \
|
||||||
|
--umi_loc=per_read \
|
||||||
|
--umi_len=4 \
|
||||||
|
--umi_prefix=UMI \
|
||||||
|
--umi_skip=3 \
|
||||||
|
--umi_delim \ : \
|
||||||
|
--disable_trim_poly_g \
|
||||||
|
-j ${output_dir}/qc/${name}.json \
|
||||||
|
-h ${output_dir}/qc/${name}.html \
|
||||||
|
--report_title ${name}
|
||||||
|
|
||||||
|
>>>
|
||||||
|
|
||||||
|
output {
|
||||||
|
String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
|
||||||
|
String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
|
||||||
|
String json = "${output_dir}/qc/${name}.json"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -34,27 +74,49 @@ workflow qc {
|
||||||
|
|
||||||
String tumor
|
String tumor
|
||||||
String? normal
|
String? normal
|
||||||
String inputDir
|
Boolean umi
|
||||||
String outputDir
|
String input_dir
|
||||||
|
String output_dir
|
||||||
|
|
||||||
scatter(name in [tumor, normal]) {
|
scatter(name in [tumor, normal]) {
|
||||||
if (defined(name)) {
|
if (defined(name)) {
|
||||||
call runqc {
|
if (name==tumor) {
|
||||||
input:
|
if (umi) {
|
||||||
name=name,
|
call umiqc as run_umi_qc {
|
||||||
inputDir=inputDir,
|
input:
|
||||||
outputDir=outputDir
|
name=name,
|
||||||
|
input_dir=input_dir,
|
||||||
|
output_dir=output_dir
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!umi) {
|
||||||
|
call runqc as run_tumor_qc {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
input_dir=input_dir,
|
||||||
|
output_dir=output_dir
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (name==select_first([normal, 'None'])) {
|
||||||
|
call runqc as run_normal_qc {
|
||||||
|
input:
|
||||||
|
name=name,
|
||||||
|
input_dir=input_dir,
|
||||||
|
output_dir=output_dir
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output {
|
output {
|
||||||
String tumor_r1 = "${outputDir}/qc/${tumor}_clean_R1.fq.gz "
|
String tumor_r1 = "${output_dir}/qc/${tumor}_clean_R1.fq.gz "
|
||||||
String tumor_r2 = "${outputDir}/qc/${tumor}_clean_R2.fq.gz "
|
String tumor_r2 = "${output_dir}/qc/${tumor}_clean_R2.fq.gz "
|
||||||
String tumor_json = "${outputDir}/qc/${tumor}.json"
|
String tumor_json = "${output_dir}/qc/${tumor}.json"
|
||||||
String normal_r1 = "${outputDir}/qc/${normal}_clean_R1.fq.gz "
|
String normal_r1 = "${output_dir}/qc/${normal}_clean_R1.fq.gz "
|
||||||
String normal_r2 = "${outputDir}/qc/${normal}_clean_R2.fq.gz "
|
String normal_r2 = "${output_dir}/qc/${normal}_clean_R2.fq.gz "
|
||||||
String normal_json = "${outputDir}/qc/${normal}.json"
|
String normal_json = "${output_dir}/qc/${normal}.json"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
69
wdl/task.wdl
69
wdl/task.wdl
|
|
@ -3,7 +3,7 @@
|
||||||
task create_dir {
|
task create_dir {
|
||||||
String workdir
|
String workdir
|
||||||
command <<<
|
command <<<
|
||||||
if [ ! -d ${workdir}];then
|
if [ ! -d ${workdir} ];then
|
||||||
mkdir -p ${workdir}/log
|
mkdir -p ${workdir}/log
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -12,9 +12,10 @@ task create_dir {
|
||||||
|
|
||||||
task mutation_calling {
|
task mutation_calling {
|
||||||
String name
|
String name
|
||||||
String tumor_pileup
|
String tumor_rmdupBam
|
||||||
String normal_pileup
|
String normal_rmdupBam
|
||||||
String outputDir
|
String outputDir
|
||||||
|
String bed
|
||||||
|
|
||||||
command <<<
|
command <<<
|
||||||
|
|
||||||
|
|
@ -22,47 +23,27 @@ task mutation_calling {
|
||||||
mkdir ${outputDir}/mutation
|
mkdir ${outputDir}/mutation
|
||||||
fi
|
fi
|
||||||
|
|
||||||
java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \
|
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||||
--output-snp ${outputDir}/mutation/${name}.snp.vcf \
|
-G /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
||||||
--output-indel ${outputDir}/mutation/${name}.indel.vcf \
|
-f 0.01 \
|
||||||
--min-var-freq 0.01 \
|
-N ${name} \
|
||||||
--min-freq-for-hom 0.9 \
|
-b "${tumor_rmdupBam}|${normal_rmdupBam}" \
|
||||||
--somatic-p-value 0.05 \
|
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \
|
||||||
--output-vcf 1 \
|
/dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \
|
||||||
--min-avg-qual 20 \
|
/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
|
||||||
--min-coverage-normal 10 \
|
> ${outputDir}/mutation/${name}_vardict.snp.indel.vcf
|
||||||
--min-coverage-tumor 30 \
|
|
||||||
--min-reads2 3
|
|
||||||
|
|
||||||
java -jar $VARSCAN processSomatic \
|
vep \
|
||||||
${outputDir}/mutation/${name}.snp.vcf \
|
--input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \
|
||||||
--min-tumor-freq 0.01 \
|
--output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \
|
||||||
--max-normal-freq 0.01 \
|
--format vcf \
|
||||||
--p-value 0.05
|
--vcf \
|
||||||
|
--symbol \
|
||||||
java -jar $VARSCAN processSomatic \
|
--terms SO \
|
||||||
${outputDir}/mutation/${name}.indel.vcf \
|
--hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
||||||
--min-tumor-freq 0.01 \
|
--offline --cache --dir_cache /home/software/.vep \
|
||||||
--max-normal-freq 0.01 \
|
--pick \
|
||||||
--p-value 0.05
|
--force_overwrite
|
||||||
|
|
||||||
java -jar $GATK MergeVcfs \
|
|
||||||
-I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \
|
|
||||||
-I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \
|
|
||||||
-O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \
|
|
||||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
|
||||||
|
|
||||||
java -jar $GATK MergeVcfs \
|
|
||||||
-I ${outputDir}/mutation/${name}.snp.Germline.vcf \
|
|
||||||
-I ${outputDir}/mutation/${name}.indel.Germline.vcf \
|
|
||||||
-O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \
|
|
||||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
|
||||||
|
|
||||||
java -jar $GATK MergeVcfs \
|
|
||||||
-I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \
|
|
||||||
-I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \
|
|
||||||
-O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \
|
|
||||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
|
||||||
|
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
|
|
@ -580,7 +561,7 @@ task auto_report {
|
||||||
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
|
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
|
||||||
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
|
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
|
||||||
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
|
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
|
||||||
python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
||||||
|
|
||||||
ln -s ${cnv_cns} ${outputDir}/report/
|
ln -s ${cnv_cns} ${outputDir}/report/
|
||||||
ln -s ${cnv_png} ${outputDir}/report/
|
ln -s ${cnv_png} ${outputDir}/report/
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue