调整call_mutation
parent
82ce1d74bf
commit
188e7dd8a4
|
|
@ -2,8 +2,10 @@
|
|||
|
||||
## run script example
|
||||
```bash
|
||||
/home/zhangchao/soft/jdk-17.0.7+7/bin/java -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \
|
||||
-jar /home/zhangchao/soft/cromwell-85.jar run \
|
||||
export PATH=/dataseq/product/workflow/software/bin:$PATH
|
||||
|
||||
java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \
|
||||
-jar /dataseq/product/workflow/software/cromwell-85.jar run \
|
||||
--inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \
|
||||
/home/zhangchao/project/pipeline/workflow/pipeline.wdl
|
||||
```
|
||||
|
|
|
|||
201
pipeline.wdl
201
pipeline.wdl
|
|
@ -1,21 +1,23 @@
|
|||
import "./wdl/task.wdl" as mytask
|
||||
import "./wdl/qc.wdl"
|
||||
import "./wdl/alignment.wdl"
|
||||
import "./wdl/statistics.wdl"
|
||||
import "./wdl/call_mutation.wdl"
|
||||
|
||||
workflow pipeline {
|
||||
|
||||
String tumor
|
||||
String? normal
|
||||
String inputDir
|
||||
String outputDir
|
||||
String cancer
|
||||
String project="650gene"
|
||||
String codesDir="/dataseq/jmdna/codes/pancancer_controlsample"
|
||||
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
|
||||
String bed = "/dataseq/jmdna/database/bed/650.bed"
|
||||
Boolean umi=false
|
||||
|
||||
String workdir="${outputDir}/${tumor}"
|
||||
String input_dir
|
||||
String output_dir
|
||||
|
||||
String bed
|
||||
|
||||
String codesDir="/home/zhangchao/project/pipeline/workflow/script"
|
||||
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
|
||||
|
||||
String workdir="${output_dir}"
|
||||
|
||||
call mytask.create_dir as create_dir {
|
||||
input:
|
||||
|
|
@ -26,8 +28,9 @@ workflow pipeline {
|
|||
input:
|
||||
tumor=tumor,
|
||||
normal=normal,
|
||||
inputDir=inputDir,
|
||||
outputDir=workdir
|
||||
umi=umi,
|
||||
input_dir=input_dir,
|
||||
output_dir=workdir
|
||||
}
|
||||
|
||||
call alignment.alignment as alignment {
|
||||
|
|
@ -41,183 +44,23 @@ workflow pipeline {
|
|||
normal_r1=qc.normal_r1,
|
||||
normal_r2=qc.normal_r2,
|
||||
|
||||
umi=umi,
|
||||
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
outputDir=workdir
|
||||
output_dir=workdir
|
||||
}
|
||||
|
||||
call statistics.statistics as statistics {
|
||||
call call_mutation.call_mutation as call_mutation {
|
||||
input:
|
||||
tumor=tumor,
|
||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
||||
|
||||
tumor_rmdup_bam=alignment.tumor_rmdup_bam,
|
||||
normal=normal,
|
||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
||||
normal_rmdup_bam=alignment.normal_rmdup_bam,
|
||||
|
||||
umi=umi,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
outputDir=workdir,
|
||||
codesDir=codesDir
|
||||
}
|
||||
|
||||
call mytask.conpair as conpair {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
||||
outputDir=workdir,
|
||||
ref=ref
|
||||
}
|
||||
|
||||
call mytask.mutation_calling as mutation_calling {
|
||||
input:
|
||||
name=tumor,
|
||||
tumor_pileup=alignment.tumor_pileup,
|
||||
normal_pileup=alignment.normal_pileup,
|
||||
outputDir=workdir
|
||||
}
|
||||
|
||||
call mytask.annovar as annovar {
|
||||
input:
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
ref=ref,
|
||||
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
|
||||
germline_vcf=mutation_calling.germline_vcf,
|
||||
loh_hc_vcf=mutation_calling.loh_hc_vcf,
|
||||
rmdupBam=alignment.tumor_rmdupBam
|
||||
}
|
||||
|
||||
call mytask.dealwithsnvindel as dealwithsnvindel {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
somatic_all_anno=annovar.somatic_all_anno,
|
||||
germline_anno=annovar.germline_anno,
|
||||
project=project,
|
||||
outputDir=workdir,
|
||||
cancer=cancer
|
||||
}
|
||||
|
||||
call mytask.hereditary as hereditary {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
project=project,
|
||||
germline_filtered = dealwithsnvindel.germline_filtered
|
||||
}
|
||||
|
||||
call mytask.tmb as tmb {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
somatic_anno=annovar.somatic_anno
|
||||
}
|
||||
|
||||
call mytask.fusion as fusion {
|
||||
input:
|
||||
name=tumor,
|
||||
ref=ref,
|
||||
codesDir=codesDir,
|
||||
outputDir=workdir,
|
||||
rmdupBam=alignment.tumor_rmdupBam,
|
||||
cancer=cancer,
|
||||
project=project,
|
||||
tumor_bamdst_depth=statistics.tumor_bamdst_depth
|
||||
}
|
||||
|
||||
call mytask.cnvkit as cnvkit {
|
||||
input:
|
||||
tumor=tumor,
|
||||
normal=normal,
|
||||
|
||||
tumor_rmdupBam=alignment.tumor_rmdupBam,
|
||||
normal_rmdupBam=alignment.normal_rmdupBam,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
outputDir=workdir,
|
||||
cancer=cancer,
|
||||
codesDir=codesDir,
|
||||
project=project,
|
||||
}
|
||||
|
||||
call mytask.chemo as chemo {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
outputDir=workdir,
|
||||
normal=normal,
|
||||
project=project,
|
||||
rmdupBam=alignment.tumor_rmdupBam,
|
||||
}
|
||||
|
||||
call mytask.msi as msi {
|
||||
input:
|
||||
bed=bed,
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
tumor_rmdupBam = alignment.tumor_rmdupBam,
|
||||
normal_rmdupBam =alignment.normal_rmdupBam
|
||||
}
|
||||
|
||||
call mytask.hla as hla {
|
||||
input:
|
||||
inputDir=inputDir,
|
||||
outputDir=workdir,
|
||||
normal=normal,
|
||||
}
|
||||
|
||||
call mytask.neoantigen as neoantigen {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
outputDir=workdir,
|
||||
name=tumor,
|
||||
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
|
||||
normal=normal,
|
||||
hla=hla.hla
|
||||
}
|
||||
|
||||
call mytask.mmr as mmr {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
germline_filtered = dealwithsnvindel.germline_filtered
|
||||
}
|
||||
|
||||
call mytask.hrr as hrr {
|
||||
input:
|
||||
codesDir=codesDir,
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
germline_filtered = dealwithsnvindel.germline_filtered
|
||||
}
|
||||
|
||||
call mytask.hotspot as hotspot {
|
||||
input:
|
||||
name=tumor,
|
||||
outputDir=workdir,
|
||||
ref=ref,
|
||||
rmdupBam=alignment.tumor_rmdupBam,
|
||||
codesDir=codesDir,
|
||||
}
|
||||
|
||||
call mytask.auto_report {
|
||||
input:
|
||||
tumor=tumor,
|
||||
normal=normal,
|
||||
outputDir=workdir,
|
||||
codesDir=codesDir,
|
||||
cancer=cancer,
|
||||
cnv_cns=cnvkit.cns,
|
||||
cnv_png=cnvkit.png,
|
||||
fusion_pos=fusion.fusion,
|
||||
snvindel_filtered=dealwithsnvindel.snvindel_filtered,
|
||||
tmb=tmb.tmb,
|
||||
mmr=mmr.mmr,
|
||||
hrr=hrr.hrr,
|
||||
hereditary_pre=hereditary.hereditary_pre
|
||||
output_dir=workdir
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,48 +4,75 @@ task bwa {
|
|||
String name
|
||||
String read1
|
||||
String read2
|
||||
String outputDir
|
||||
String output_dir
|
||||
String ref
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${outputDir}/alignment ];then
|
||||
mkdir ${outputDir}/alignment
|
||||
if [ ! -d ${output_dir}/alignment ];then
|
||||
mkdir ${output_dir}/alignment
|
||||
fi
|
||||
|
||||
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \
|
||||
samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${outputDir}/alignment/${name}.sorted.bam
|
||||
samtools index ${outputDir}/alignment/${name}.sorted.bam
|
||||
samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${output_dir}/alignment/${name}.sorted.bam
|
||||
samtools index ${output_dir}/alignment/${name}.sorted.bam
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String sortedBam = "${outputDir}/alignment/${name}.sorted.bam"
|
||||
String sorted_bam = "${output_dir}/alignment/${name}.sorted.bam"
|
||||
}
|
||||
}
|
||||
|
||||
#remove PCR duplicates
|
||||
|
||||
task markduplicates {
|
||||
task markduplicates_genecore {
|
||||
String name
|
||||
String ref
|
||||
String sortedBam
|
||||
String outputDir
|
||||
String sorted_bam
|
||||
String output_dir
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${outputDir}/alignment ];then
|
||||
mkdir ${outputDir}/alignment
|
||||
if [ ! -d ${output_dir}/alignment ];then
|
||||
mkdir ${output_dir}/alignment
|
||||
fi
|
||||
|
||||
gencore -i ${sorted_bam} \
|
||||
-o ${output_dir}/alignment/${name}.rmdup.bam \
|
||||
-r ${ref} \
|
||||
-u UMI \
|
||||
-j ${output_dir}/alignment/${name}_rmdup.json \
|
||||
-h ${output_dir}/alignment/${name}_rmdup.html
|
||||
|
||||
samtools index ${output_dir}/alignment/${name}.rmdup.bam
|
||||
>>>
|
||||
|
||||
output {
|
||||
String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
|
||||
}
|
||||
}
|
||||
|
||||
task markduplicates_picard {
|
||||
String name
|
||||
String ref
|
||||
String sorted_bam
|
||||
String output_dir
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/alignment ];then
|
||||
mkdir ${output_dir}/alignment
|
||||
fi
|
||||
|
||||
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
|
||||
I=${sortedBam} \
|
||||
O=${outputDir}/alignment/${name}.rmdup.bam \
|
||||
I=${sorted_bam} \
|
||||
O=${output_dir}/alignment/${name}.rmdup.bam \
|
||||
CREATE_INDEX=true \
|
||||
M=${outputDir}/alignment/${name}.rmdup.metrics.txt \
|
||||
M=${output_dir}/alignment/${name}.rmdup.metrics.txt \
|
||||
R=${ref}
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String rmdupBam = "${outputDir}/alignment/${name}.rmdup.bam"
|
||||
String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,16 +83,16 @@ task generater_mpileup {
|
|||
String rmdupBam
|
||||
String ref
|
||||
String bed
|
||||
String outputDir
|
||||
String output_dir
|
||||
|
||||
command <<<
|
||||
|
||||
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${outputDir}/alignment/${name}.pileup
|
||||
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${output_dir}/alignment/${name}.pileup
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String pileup = "${outputDir}/alignment/${name}.pileup"
|
||||
String pileup = "${output_dir}/alignment/${name}.pileup"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -79,9 +106,11 @@ workflow alignment {
|
|||
String? normal_r1
|
||||
String? normal_r2
|
||||
|
||||
Boolean umi
|
||||
|
||||
String ref
|
||||
String bed
|
||||
String outputDir
|
||||
String output_dir
|
||||
|
||||
scatter(name in [tumor, normal]) {
|
||||
if (defined(name)) {
|
||||
|
|
@ -89,36 +118,51 @@ workflow alignment {
|
|||
input:
|
||||
name=name,
|
||||
ref=ref,
|
||||
outputDir=outputDir,
|
||||
output_dir=output_dir,
|
||||
read1=if name==tumor then tumor_r1 else normal_r1,
|
||||
read2=if name==tumor then tumor_r2 else normal_r2
|
||||
|
||||
}
|
||||
call markduplicates {
|
||||
if (name==tumor) {
|
||||
if (umi) {
|
||||
call markduplicates_genecore as tumor_markduplicates_genecore {
|
||||
input:
|
||||
name=name,
|
||||
ref=ref,
|
||||
outputDir=outputDir,
|
||||
sortedBam=bwa.sortedBam
|
||||
output_dir=output_dir,
|
||||
sorted_bam=bwa.sorted_bam,
|
||||
}
|
||||
call generater_mpileup {
|
||||
}
|
||||
if (!umi) {
|
||||
call markduplicates_picard as tumor_markduplicates_picard {
|
||||
input:
|
||||
name=name,
|
||||
ref=ref,
|
||||
outputDir=outputDir,
|
||||
bed=bed,
|
||||
rmdupBam=markduplicates.rmdupBam
|
||||
output_dir=output_dir,
|
||||
sorted_bam=bwa.sorted_bam,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (name==select_first([normal, 'None'])) {
|
||||
call markduplicates_picard as normal_markduplicates_picard {
|
||||
input:
|
||||
name=name,
|
||||
ref=ref,
|
||||
output_dir=output_dir,
|
||||
sorted_bam=bwa.sorted_bam,
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
String tumor_sortedBam = "${outputDir}/alignment/${tumor}.sorted.bam"
|
||||
String tumor_rmdupBam = "${outputDir}/alignment/${tumor}.rmdup.bam"
|
||||
String tumor_pileup = "${outputDir}/alignment/${tumor}.pileup"
|
||||
String normal_sortedBam = "${outputDir}/alignment/${normal}.sorted.bam"
|
||||
String normal_rmdupBam = "${outputDir}/alignment/${normal}.rmdup.bam"
|
||||
String normal_pileup = "${outputDir}/alignment/${normal}.pileup"
|
||||
String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam"
|
||||
String tumor_rmdup_bam = "${output_dir}/alignment/${tumor}.rmdup.bam"
|
||||
String tumor_pileup = "${output_dir}/alignment/${tumor}.pileup"
|
||||
String normal_sorted_bam = "${output_dir}/alignment/${normal}.sorted.bam"
|
||||
String normal_rmdup_bam = "${output_dir}/alignment/${normal}.rmdup.bam"
|
||||
String normal_pileup = "${output_dir}/alignment/${normal}.pileup"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
task mutation_calling_umi {
|
||||
String name
|
||||
String output_dir
|
||||
String rmdup_bam
|
||||
String ref
|
||||
String bed
|
||||
command <<<
|
||||
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
mkdir ${output_dir}/mutation
|
||||
fi
|
||||
|
||||
|
||||
#1条call
|
||||
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
||||
# -r 3 是指有3条这样样的reads支撑
|
||||
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
|
||||
|
||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||
-G ${ref} \
|
||||
-f 0.001 \
|
||||
-N ${name} \
|
||||
-b ${rmdup_bam} \
|
||||
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
||||
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||||
|
||||
#提取>=2条矫正的序列
|
||||
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||||
|
||||
# 保证 1r call mut umi family 里面有2条reads
|
||||
#2条矫正的call
|
||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||||
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
||||
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||||
|
||||
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||||
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
|
||||
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||||
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||||
${output_dir}/mutation/${name}.snp.indel.vcf
|
||||
|
||||
table_annovar.pl \
|
||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||
/dataseq/jmdna/software/annovar/humandb/ \
|
||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||||
--intronhgvs 50 \
|
||||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
|
||||
}
|
||||
}
|
||||
|
||||
task mutation_calling_tissue {
|
||||
String name
|
||||
String bed
|
||||
String ref
|
||||
String output_dir
|
||||
String rmdup_bam
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
mkdir ${output_dir}/mutation
|
||||
fi
|
||||
|
||||
# vardict
|
||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||
-G ${ref} \
|
||||
-f 0.01 \
|
||||
-N ${name} \
|
||||
-b ${rmdup_bam} \
|
||||
-UN \
|
||||
-Q 20 \
|
||||
-m 3 \
|
||||
-r 3 \
|
||||
-th 10 \
|
||||
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||||
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
|
||||
|
||||
table_annovar.pl \
|
||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||
/dataseq/jmdna/software/annovar/humandb/ \
|
||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
||||
-operation g,f,f,f,f,f,f,f,f,f \
|
||||
--intronhgvs 50 \
|
||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||
}
|
||||
}
|
||||
|
||||
task mutation_calling_tissue_control {
|
||||
String name
|
||||
String bed
|
||||
String ref
|
||||
String output_dir
|
||||
String tumor_rmdup_bam
|
||||
String normal_rmdup_bam
|
||||
|
||||
command <<<
|
||||
if [ ! -d ${output_dir}/mutation ];then
|
||||
mkdir ${output_dir}/mutation
|
||||
fi
|
||||
|
||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||
-G ${ref} \
|
||||
-f 0.01 \
|
||||
-N ${name} \
|
||||
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
|
||||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
||||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
|
||||
|
||||
table_annovar.pl \
|
||||
${output_dir}/mutation/${name}.snp.indel.vcf \
|
||||
/dataseq/jmdna/software/annovar/humandb/ \
|
||||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
|
||||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
|
||||
-operation g,f,f,f,f,f,f,f,f,f \
|
||||
--intronhgvs 50 \
|
||||
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
|
||||
}
|
||||
}
|
||||
|
||||
workflow call_mutation {
|
||||
|
||||
String tumor
|
||||
String tumor_rmdup_bam
|
||||
String? normal
|
||||
String? normal_rmdup_bam
|
||||
Boolean umi
|
||||
String output_dir
|
||||
String ref
|
||||
String bed
|
||||
|
||||
scatter(name in [tumor, normal]) {
|
||||
if (defined(name)) {
|
||||
if (name==tumor) {
|
||||
if (umi) {
|
||||
call mutation_calling_umi as tumor_mutation_calling_umi {
|
||||
input:
|
||||
name=name,
|
||||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
rmdup_bam=tumor_rmdup_bam
|
||||
}
|
||||
}
|
||||
if (!umi) {
|
||||
# 单样本模式,normal没有定义
|
||||
if (name==select_first([normal, tumor])) {
|
||||
call mutation_calling_tissue as tumor_mutation_calling_tissue {
|
||||
input:
|
||||
name=name,
|
||||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
rmdup_bam=normal_rmdup_bam
|
||||
}
|
||||
}
|
||||
# 双样本模式,normal有定义
|
||||
if (name!=select_first([normal, tumor])) {
|
||||
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
|
||||
input:
|
||||
name=name,
|
||||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
tumor_rmdup_bam=tumor_rmdup_bam,
|
||||
normal_rmdup_bam=normal_rmdup_bam
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (name==select_first([normal, 'None'])) {
|
||||
if (umi) {
|
||||
call mutation_calling_tissue as normal_mutation_calling_tissue {
|
||||
input:
|
||||
name=name,
|
||||
output_dir=output_dir,
|
||||
ref=ref,
|
||||
bed=bed,
|
||||
rmdup_bam=normal_rmdup_bam
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
|
||||
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
|
||||
}
|
||||
}
|
||||
114
wdl/qc.wdl
114
wdl/qc.wdl
|
|
@ -1,31 +1,71 @@
|
|||
task runqc {
|
||||
String name
|
||||
String inputDir
|
||||
String outputDir
|
||||
String input_dir
|
||||
String output_dir
|
||||
|
||||
command <<<
|
||||
echo "###### ${name} fastp beginning at: $(date) ######"
|
||||
# echo "###### ${name} fastp beginning at: $(date) ######"
|
||||
|
||||
if [ ! -d ${outputDir}/qc ];then
|
||||
mkdir ${outputDir}/qc
|
||||
if [ ! -d ${output_dir}/qc ];then
|
||||
mkdir ${output_dir}/qc
|
||||
fi
|
||||
|
||||
fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \
|
||||
-I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \
|
||||
fastp \
|
||||
-i ${input_dir}/*_${name}_*1.fq.gz \
|
||||
-o ${output_dir}/qc/${name}_clean_R1.fq.gz \
|
||||
-I ${input_dir}/*_${name}_*2.fq.gz \
|
||||
-O ${output_dir}/qc/${name}_clean_R2.fq.gz \
|
||||
-w 10 \
|
||||
-e 20 \
|
||||
--correction \
|
||||
--overlap_len_require 10 \
|
||||
-j ${outputDir}/qc/${name}.json \
|
||||
-h ${outputDir}/qc/${name}.html \
|
||||
--report_title $name \
|
||||
-e 20
|
||||
-j ${output_dir}/qc/${name}.json \
|
||||
-h ${output_dir}/qc/${name}.html \
|
||||
--report_title ${name}
|
||||
|
||||
echo "###### ${name} fastp end at: $(date) ######"
|
||||
>>>
|
||||
output {
|
||||
String r1 = "${outputDir}/qc/${name}_clean_R1.fq.gz "
|
||||
String r2 = "${outputDir}/qc/${name}_clean_R2.fq.gz "
|
||||
String json = "${outputDir}/qc/${name}.json"
|
||||
String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
|
||||
String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
|
||||
String json = "${output_dir}/qc/${name}.json"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
task umiqc {
|
||||
String name
|
||||
String input_dir
|
||||
String output_dir
|
||||
|
||||
command <<<
|
||||
# echo "###### ${name} fastp beginning at: $(date) ######"
|
||||
|
||||
if [ ! -d ${output_dir}/qc ];then
|
||||
mkdir ${output_dir}/qc
|
||||
fi
|
||||
|
||||
fastp -i ${input_dir}/*_${name}_*1.fq.gz -o ${output_dir}/qc/${name}_clean_R1.fq.gz \
|
||||
-I ${input_dir}/*_${name}_*2.fq.gz -O ${output_dir}/qc/${name}_clean_R2.fq.gz \
|
||||
-w 10 \
|
||||
--correction \
|
||||
--overlap_len_require=10 \
|
||||
--umi \
|
||||
--umi_loc=per_read \
|
||||
--umi_len=4 \
|
||||
--umi_prefix=UMI \
|
||||
--umi_skip=3 \
|
||||
--umi_delim \ : \
|
||||
--disable_trim_poly_g \
|
||||
-j ${output_dir}/qc/${name}.json \
|
||||
-h ${output_dir}/qc/${name}.html \
|
||||
--report_title ${name}
|
||||
|
||||
>>>
|
||||
|
||||
output {
|
||||
String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
|
||||
String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
|
||||
String json = "${output_dir}/qc/${name}.json"
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -34,27 +74,49 @@ workflow qc {
|
|||
|
||||
String tumor
|
||||
String? normal
|
||||
String inputDir
|
||||
String outputDir
|
||||
Boolean umi
|
||||
String input_dir
|
||||
String output_dir
|
||||
|
||||
scatter(name in [tumor, normal]) {
|
||||
if (defined(name)) {
|
||||
call runqc {
|
||||
if (name==tumor) {
|
||||
if (umi) {
|
||||
call umiqc as run_umi_qc {
|
||||
input:
|
||||
name=name,
|
||||
inputDir=inputDir,
|
||||
outputDir=outputDir
|
||||
input_dir=input_dir,
|
||||
output_dir=output_dir
|
||||
}
|
||||
}
|
||||
if (!umi) {
|
||||
call runqc as run_tumor_qc {
|
||||
input:
|
||||
name=name,
|
||||
input_dir=input_dir,
|
||||
output_dir=output_dir
|
||||
}
|
||||
}
|
||||
}
|
||||
if (name==select_first([normal, 'None'])) {
|
||||
call runqc as run_normal_qc {
|
||||
input:
|
||||
name=name,
|
||||
input_dir=input_dir,
|
||||
output_dir=output_dir
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
String tumor_r1 = "${outputDir}/qc/${tumor}_clean_R1.fq.gz "
|
||||
String tumor_r2 = "${outputDir}/qc/${tumor}_clean_R2.fq.gz "
|
||||
String tumor_json = "${outputDir}/qc/${tumor}.json"
|
||||
String normal_r1 = "${outputDir}/qc/${normal}_clean_R1.fq.gz "
|
||||
String normal_r2 = "${outputDir}/qc/${normal}_clean_R2.fq.gz "
|
||||
String normal_json = "${outputDir}/qc/${normal}.json"
|
||||
String tumor_r1 = "${output_dir}/qc/${tumor}_clean_R1.fq.gz "
|
||||
String tumor_r2 = "${output_dir}/qc/${tumor}_clean_R2.fq.gz "
|
||||
String tumor_json = "${output_dir}/qc/${tumor}.json"
|
||||
String normal_r1 = "${output_dir}/qc/${normal}_clean_R1.fq.gz "
|
||||
String normal_r2 = "${output_dir}/qc/${normal}_clean_R2.fq.gz "
|
||||
String normal_json = "${output_dir}/qc/${normal}.json"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
67
wdl/task.wdl
67
wdl/task.wdl
|
|
@ -12,9 +12,10 @@ task create_dir {
|
|||
|
||||
task mutation_calling {
|
||||
String name
|
||||
String tumor_pileup
|
||||
String normal_pileup
|
||||
String tumor_rmdupBam
|
||||
String normal_rmdupBam
|
||||
String outputDir
|
||||
String bed
|
||||
|
||||
command <<<
|
||||
|
||||
|
|
@ -22,47 +23,27 @@ task mutation_calling {
|
|||
mkdir ${outputDir}/mutation
|
||||
fi
|
||||
|
||||
java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \
|
||||
--output-snp ${outputDir}/mutation/${name}.snp.vcf \
|
||||
--output-indel ${outputDir}/mutation/${name}.indel.vcf \
|
||||
--min-var-freq 0.01 \
|
||||
--min-freq-for-hom 0.9 \
|
||||
--somatic-p-value 0.05 \
|
||||
--output-vcf 1 \
|
||||
--min-avg-qual 20 \
|
||||
--min-coverage-normal 10 \
|
||||
--min-coverage-tumor 30 \
|
||||
--min-reads2 3
|
||||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||||
-G /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
||||
-f 0.01 \
|
||||
-N ${name} \
|
||||
-b "${tumor_rmdupBam}|${normal_rmdupBam}" \
|
||||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \
|
||||
/dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \
|
||||
/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
|
||||
> ${outputDir}/mutation/${name}_vardict.snp.indel.vcf
|
||||
|
||||
java -jar $VARSCAN processSomatic \
|
||||
${outputDir}/mutation/${name}.snp.vcf \
|
||||
--min-tumor-freq 0.01 \
|
||||
--max-normal-freq 0.01 \
|
||||
--p-value 0.05
|
||||
|
||||
java -jar $VARSCAN processSomatic \
|
||||
${outputDir}/mutation/${name}.indel.vcf \
|
||||
--min-tumor-freq 0.01 \
|
||||
--max-normal-freq 0.01 \
|
||||
--p-value 0.05
|
||||
|
||||
java -jar $GATK MergeVcfs \
|
||||
-I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \
|
||||
-I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \
|
||||
-O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \
|
||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
||||
|
||||
java -jar $GATK MergeVcfs \
|
||||
-I ${outputDir}/mutation/${name}.snp.Germline.vcf \
|
||||
-I ${outputDir}/mutation/${name}.indel.Germline.vcf \
|
||||
-O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \
|
||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
||||
|
||||
java -jar $GATK MergeVcfs \
|
||||
-I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \
|
||||
-I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \
|
||||
-O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \
|
||||
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
|
||||
vep \
|
||||
--input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \
|
||||
--output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \
|
||||
--format vcf \
|
||||
--vcf \
|
||||
--symbol \
|
||||
--terms SO \
|
||||
--hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \
|
||||
--offline --cache --dir_cache /home/software/.vep \
|
||||
--pick \
|
||||
--force_overwrite
|
||||
|
||||
>>>
|
||||
|
||||
|
|
@ -580,7 +561,7 @@ task auto_report {
|
|||
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
|
||||
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
|
||||
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
|
||||
python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
||||
python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
||||
|
||||
ln -s ${cnv_cns} ${outputDir}/report/
|
||||
ln -s ${cnv_png} ${outputDir}/report/
|
||||
|
|
|
|||
Loading…
Reference in New Issue