调整call_mutation

master
chaopower 2023-09-27 10:47:03 +08:00
parent 82ce1d74bf
commit 188e7dd8a4
6 changed files with 439 additions and 292 deletions

View File

@ -2,8 +2,10 @@
## run script example ## run script example
```bash ```bash
/home/zhangchao/soft/jdk-17.0.7+7/bin/java -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \ export PATH=/dataseq/product/workflow/software/bin:$PATH
-jar /home/zhangchao/soft/cromwell-85.jar run \
java17 -Dconfig.file=/home/zhangchao/project/pipeline/wdl/cromwell.examples.conf \
-jar /dataseq/product/workflow/software/cromwell-85.jar run \
--inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \ --inputs /home/zhangchao/project/pipeline/workflow/test/20230814.json \
/home/zhangchao/project/pipeline/workflow/pipeline.wdl /home/zhangchao/project/pipeline/workflow/pipeline.wdl
``` ```

View File

@ -1,21 +1,23 @@
import "./wdl/task.wdl" as mytask import "./wdl/task.wdl" as mytask
import "./wdl/qc.wdl" import "./wdl/qc.wdl"
import "./wdl/alignment.wdl" import "./wdl/alignment.wdl"
import "./wdl/statistics.wdl" import "./wdl/call_mutation.wdl"
workflow pipeline { workflow pipeline {
String tumor String tumor
String? normal String? normal
String inputDir Boolean umi=false
String outputDir
String cancer
String project="650gene"
String codesDir="/dataseq/jmdna/codes/pancancer_controlsample"
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
String bed = "/dataseq/jmdna/database/bed/650.bed"
String workdir="${outputDir}/${tumor}" String input_dir
String output_dir
String bed
String codesDir="/home/zhangchao/project/pipeline/workflow/script"
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
String workdir="${output_dir}"
call mytask.create_dir as create_dir { call mytask.create_dir as create_dir {
input: input:
@ -26,8 +28,9 @@ workflow pipeline {
input: input:
tumor=tumor, tumor=tumor,
normal=normal, normal=normal,
inputDir=inputDir, umi=umi,
outputDir=workdir input_dir=input_dir,
output_dir=workdir
} }
call alignment.alignment as alignment { call alignment.alignment as alignment {
@ -41,183 +44,23 @@ workflow pipeline {
normal_r1=qc.normal_r1, normal_r1=qc.normal_r1,
normal_r2=qc.normal_r2, normal_r2=qc.normal_r2,
umi=umi,
ref=ref, ref=ref,
bed=bed, bed=bed,
outputDir=workdir output_dir=workdir
} }
call statistics.statistics as statistics { call call_mutation.call_mutation as call_mutation {
input: input:
tumor=tumor, tumor=tumor,
tumor_rmdupBam=alignment.tumor_rmdupBam, tumor_rmdup_bam=alignment.tumor_rmdup_bam,
normal=normal, normal=normal,
normal_rmdupBam=alignment.normal_rmdupBam, normal_rmdup_bam=alignment.normal_rmdup_bam,
umi=umi,
ref=ref, ref=ref,
bed=bed, bed=bed,
outputDir=workdir, output_dir=workdir
codesDir=codesDir
}
call mytask.conpair as conpair {
input:
codesDir=codesDir,
name=tumor,
tumor_rmdupBam=alignment.tumor_rmdupBam,
normal_rmdupBam=alignment.normal_rmdupBam,
outputDir=workdir,
ref=ref
}
call mytask.mutation_calling as mutation_calling {
input:
name=tumor,
tumor_pileup=alignment.tumor_pileup,
normal_pileup=alignment.normal_pileup,
outputDir=workdir
}
call mytask.annovar as annovar {
input:
name=tumor,
outputDir=workdir,
ref=ref,
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
germline_vcf=mutation_calling.germline_vcf,
loh_hc_vcf=mutation_calling.loh_hc_vcf,
rmdupBam=alignment.tumor_rmdupBam
}
call mytask.dealwithsnvindel as dealwithsnvindel {
input:
codesDir=codesDir,
name=tumor,
somatic_all_anno=annovar.somatic_all_anno,
germline_anno=annovar.germline_anno,
project=project,
outputDir=workdir,
cancer=cancer
}
call mytask.hereditary as hereditary {
input:
codesDir=codesDir,
name=tumor,
outputDir=workdir,
project=project,
germline_filtered = dealwithsnvindel.germline_filtered
}
call mytask.tmb as tmb {
input:
codesDir=codesDir,
name=tumor,
outputDir=workdir,
somatic_anno=annovar.somatic_anno
}
call mytask.fusion as fusion {
input:
name=tumor,
ref=ref,
codesDir=codesDir,
outputDir=workdir,
rmdupBam=alignment.tumor_rmdupBam,
cancer=cancer,
project=project,
tumor_bamdst_depth=statistics.tumor_bamdst_depth
}
call mytask.cnvkit as cnvkit {
input:
tumor=tumor,
normal=normal,
tumor_rmdupBam=alignment.tumor_rmdupBam,
normal_rmdupBam=alignment.normal_rmdupBam,
ref=ref,
bed=bed,
outputDir=workdir,
cancer=cancer,
codesDir=codesDir,
project=project,
}
call mytask.chemo as chemo {
input:
codesDir=codesDir,
outputDir=workdir,
normal=normal,
project=project,
rmdupBam=alignment.tumor_rmdupBam,
}
call mytask.msi as msi {
input:
bed=bed,
name=tumor,
outputDir=workdir,
tumor_rmdupBam = alignment.tumor_rmdupBam,
normal_rmdupBam =alignment.normal_rmdupBam
}
call mytask.hla as hla {
input:
inputDir=inputDir,
outputDir=workdir,
normal=normal,
}
call mytask.neoantigen as neoantigen {
input:
codesDir=codesDir,
outputDir=workdir,
name=tumor,
somatic_hc_vcf=mutation_calling.somatic_hc_vcf,
normal=normal,
hla=hla.hla
}
call mytask.mmr as mmr {
input:
codesDir=codesDir,
name=tumor,
outputDir=workdir,
germline_filtered = dealwithsnvindel.germline_filtered
}
call mytask.hrr as hrr {
input:
codesDir=codesDir,
name=tumor,
outputDir=workdir,
germline_filtered = dealwithsnvindel.germline_filtered
}
call mytask.hotspot as hotspot {
input:
name=tumor,
outputDir=workdir,
ref=ref,
rmdupBam=alignment.tumor_rmdupBam,
codesDir=codesDir,
}
call mytask.auto_report {
input:
tumor=tumor,
normal=normal,
outputDir=workdir,
codesDir=codesDir,
cancer=cancer,
cnv_cns=cnvkit.cns,
cnv_png=cnvkit.png,
fusion_pos=fusion.fusion,
snvindel_filtered=dealwithsnvindel.snvindel_filtered,
tmb=tmb.tmb,
mmr=mmr.mmr,
hrr=hrr.hrr,
hereditary_pre=hereditary.hereditary_pre
} }
} }

View File

@ -4,48 +4,75 @@ task bwa {
String name String name
String read1 String read1
String read2 String read2
String outputDir String output_dir
String ref String ref
command <<< command <<<
if [ ! -d ${outputDir}/alignment ];then if [ ! -d ${output_dir}/alignment ];then
mkdir ${outputDir}/alignment mkdir ${output_dir}/alignment
fi fi
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \ bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} ${read1} ${read2} | \
samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${outputDir}/alignment/${name}.sorted.bam samtools view -@ 10 -bh -o - | samtools sort -@ 10 -o ${output_dir}/alignment/${name}.sorted.bam
samtools index ${outputDir}/alignment/${name}.sorted.bam samtools index ${output_dir}/alignment/${name}.sorted.bam
>>> >>>
output { output {
String sortedBam = "${outputDir}/alignment/${name}.sorted.bam" String sorted_bam = "${output_dir}/alignment/${name}.sorted.bam"
} }
} }
#remove PCR duplicates #remove PCR duplicates
task markduplicates { task markduplicates_genecore {
String name String name
String ref String ref
String sortedBam String sorted_bam
String outputDir String output_dir
command <<< command <<<
if [ ! -d ${outputDir}/alignment ];then if [ ! -d ${output_dir}/alignment ];then
mkdir ${outputDir}/alignment mkdir ${output_dir}/alignment
fi fi
gencore -i ${sorted_bam} \
-o ${output_dir}/alignment/${name}.rmdup.bam \
-r ${ref} \
-u UMI \
-j ${output_dir}/alignment/${name}_rmdup.json \
-h ${output_dir}/alignment/${name}_rmdup.html
samtools index ${output_dir}/alignment/${name}.rmdup.bam
>>>
output {
String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
}
}
task markduplicates_picard {
String name
String ref
String sorted_bam
String output_dir
command <<<
if [ ! -d ${output_dir}/alignment ];then
mkdir ${output_dir}/alignment
fi
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \ java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
I=${sortedBam} \ I=${sorted_bam} \
O=${outputDir}/alignment/${name}.rmdup.bam \ O=${output_dir}/alignment/${name}.rmdup.bam \
CREATE_INDEX=true \ CREATE_INDEX=true \
M=${outputDir}/alignment/${name}.rmdup.metrics.txt \ M=${output_dir}/alignment/${name}.rmdup.metrics.txt \
R=${ref} R=${ref}
>>> >>>
output { output {
String rmdupBam = "${outputDir}/alignment/${name}.rmdup.bam" String rmdup_bam = "${output_dir}/alignment/${name}.rmdup.bam"
} }
} }
@ -56,16 +83,16 @@ task generater_mpileup {
String rmdupBam String rmdupBam
String ref String ref
String bed String bed
String outputDir String output_dir
command <<< command <<<
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${outputDir}/alignment/${name}.pileup samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} ${rmdupBam} -o ${output_dir}/alignment/${name}.pileup
>>> >>>
output { output {
String pileup = "${outputDir}/alignment/${name}.pileup" String pileup = "${output_dir}/alignment/${name}.pileup"
} }
} }
@ -79,9 +106,11 @@ workflow alignment {
String? normal_r1 String? normal_r1
String? normal_r2 String? normal_r2
Boolean umi
String ref String ref
String bed String bed
String outputDir String output_dir
scatter(name in [tumor, normal]) { scatter(name in [tumor, normal]) {
if (defined(name)) { if (defined(name)) {
@ -89,36 +118,51 @@ workflow alignment {
input: input:
name=name, name=name,
ref=ref, ref=ref,
outputDir=outputDir, output_dir=output_dir,
read1=if name==tumor then tumor_r1 else normal_r1, read1=if name==tumor then tumor_r1 else normal_r1,
read2=if name==tumor then tumor_r2 else normal_r2 read2=if name==tumor then tumor_r2 else normal_r2
} }
call markduplicates { if (name==tumor) {
if (umi) {
call markduplicates_genecore as tumor_markduplicates_genecore {
input: input:
name=name, name=name,
ref=ref, ref=ref,
outputDir=outputDir, output_dir=output_dir,
sortedBam=bwa.sortedBam sorted_bam=bwa.sorted_bam,
} }
call generater_mpileup { }
if (!umi) {
call markduplicates_picard as tumor_markduplicates_picard {
input: input:
name=name, name=name,
ref=ref, ref=ref,
outputDir=outputDir, output_dir=output_dir,
bed=bed, sorted_bam=bwa.sorted_bam,
rmdupBam=markduplicates.rmdupBam }
}
}
if (name==select_first([normal, 'None'])) {
call markduplicates_picard as normal_markduplicates_picard {
input:
name=name,
ref=ref,
output_dir=output_dir,
sorted_bam=bwa.sorted_bam,
}
} }
} }
} }
output { output {
String tumor_sortedBam = "${outputDir}/alignment/${tumor}.sorted.bam" String tumor_sorted_bam = "${output_dir}/alignment/${tumor}.sorted.bam"
String tumor_rmdupBam = "${outputDir}/alignment/${tumor}.rmdup.bam" String tumor_rmdup_bam = "${output_dir}/alignment/${tumor}.rmdup.bam"
String tumor_pileup = "${outputDir}/alignment/${tumor}.pileup" String tumor_pileup = "${output_dir}/alignment/${tumor}.pileup"
String normal_sortedBam = "${outputDir}/alignment/${normal}.sorted.bam" String normal_sorted_bam = "${output_dir}/alignment/${normal}.sorted.bam"
String normal_rmdupBam = "${outputDir}/alignment/${normal}.rmdup.bam" String normal_rmdup_bam = "${output_dir}/alignment/${normal}.rmdup.bam"
String normal_pileup = "${outputDir}/alignment/${normal}.pileup" String normal_pileup = "${output_dir}/alignment/${normal}.pileup"
} }
} }

View File

@ -0,0 +1,215 @@
task mutation_calling_umi {
String name
String output_dir
String rmdup_bam
String ref
String bed
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
#1条call
# 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多
# -r 3 是指有3条这样样的reads支撑
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
}
}
task mutation_calling_tissue {
String name
String bed
String ref
String output_dir
String rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# vardict
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
task mutation_calling_tissue_control {
String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
workflow call_mutation {
String tumor
String tumor_rmdup_bam
String? normal
String? normal_rmdup_bam
Boolean umi
String output_dir
String ref
String bed
scatter(name in [tumor, normal]) {
if (defined(name)) {
if (name==tumor) {
if (umi) {
call mutation_calling_umi as tumor_mutation_calling_umi {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
}
if (!umi) {
# 单样本模式normal没有定义
if (name==select_first([normal, tumor])) {
call mutation_calling_tissue as tumor_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
# 双样本模式normal有定义
if (name!=select_first([normal, tumor])) {
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}
}
}
if (name==select_first([normal, 'None'])) {
if (umi) {
call mutation_calling_tissue as normal_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
}
}
}
output {
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
}
}

View File

@ -1,31 +1,71 @@
task runqc { task runqc {
String name String name
String inputDir String input_dir
String outputDir String output_dir
command <<< command <<<
echo "###### ${name} fastp beginning at: $(date) ######" # echo "###### ${name} fastp beginning at: $(date) ######"
if [ ! -d ${outputDir}/qc ];then if [ ! -d ${output_dir}/qc ];then
mkdir ${outputDir}/qc mkdir ${output_dir}/qc
fi fi
fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \ fastp \
-I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \ -i ${input_dir}/*_${name}_*1.fq.gz \
-o ${output_dir}/qc/${name}_clean_R1.fq.gz \
-I ${input_dir}/*_${name}_*2.fq.gz \
-O ${output_dir}/qc/${name}_clean_R2.fq.gz \
-w 10 \ -w 10 \
-e 20 \
--correction \ --correction \
--overlap_len_require 10 \ --overlap_len_require 10 \
-j ${outputDir}/qc/${name}.json \ -j ${output_dir}/qc/${name}.json \
-h ${outputDir}/qc/${name}.html \ -h ${output_dir}/qc/${name}.html \
--report_title $name \ --report_title ${name}
-e 20
echo "###### ${name} fastp end at: $(date) ######"
>>> >>>
output { output {
String r1 = "${outputDir}/qc/${name}_clean_R1.fq.gz " String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
String r2 = "${outputDir}/qc/${name}_clean_R2.fq.gz " String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
String json = "${outputDir}/qc/${name}.json" String json = "${output_dir}/qc/${name}.json"
}
}
task umiqc {
String name
String input_dir
String output_dir
command <<<
# echo "###### ${name} fastp beginning at: $(date) ######"
if [ ! -d ${output_dir}/qc ];then
mkdir ${output_dir}/qc
fi
fastp -i ${input_dir}/*_${name}_*1.fq.gz -o ${output_dir}/qc/${name}_clean_R1.fq.gz \
-I ${input_dir}/*_${name}_*2.fq.gz -O ${output_dir}/qc/${name}_clean_R2.fq.gz \
-w 10 \
--correction \
--overlap_len_require=10 \
--umi \
--umi_loc=per_read \
--umi_len=4 \
--umi_prefix=UMI \
--umi_skip=3 \
--umi_delim \ : \
--disable_trim_poly_g \
-j ${output_dir}/qc/${name}.json \
-h ${output_dir}/qc/${name}.html \
--report_title ${name}
>>>
output {
String r1 = "${output_dir}/qc/${name}_clean_R1.fq.gz "
String r2 = "${output_dir}/qc/${name}_clean_R2.fq.gz "
String json = "${output_dir}/qc/${name}.json"
} }
} }
@ -34,27 +74,49 @@ workflow qc {
String tumor String tumor
String? normal String? normal
String inputDir Boolean umi
String outputDir String input_dir
String output_dir
scatter(name in [tumor, normal]) { scatter(name in [tumor, normal]) {
if (defined(name)) { if (defined(name)) {
call runqc { if (name==tumor) {
if (umi) {
call umiqc as run_umi_qc {
input: input:
name=name, name=name,
inputDir=inputDir, input_dir=input_dir,
outputDir=outputDir output_dir=output_dir
} }
} }
if (!umi) {
call runqc as run_tumor_qc {
input:
name=name,
input_dir=input_dir,
output_dir=output_dir
}
}
}
if (name==select_first([normal, 'None'])) {
call runqc as run_normal_qc {
input:
name=name,
input_dir=input_dir,
output_dir=output_dir
}
}
}
} }
output { output {
String tumor_r1 = "${outputDir}/qc/${tumor}_clean_R1.fq.gz " String tumor_r1 = "${output_dir}/qc/${tumor}_clean_R1.fq.gz "
String tumor_r2 = "${outputDir}/qc/${tumor}_clean_R2.fq.gz " String tumor_r2 = "${output_dir}/qc/${tumor}_clean_R2.fq.gz "
String tumor_json = "${outputDir}/qc/${tumor}.json" String tumor_json = "${output_dir}/qc/${tumor}.json"
String normal_r1 = "${outputDir}/qc/${normal}_clean_R1.fq.gz " String normal_r1 = "${output_dir}/qc/${normal}_clean_R1.fq.gz "
String normal_r2 = "${outputDir}/qc/${normal}_clean_R2.fq.gz " String normal_r2 = "${output_dir}/qc/${normal}_clean_R2.fq.gz "
String normal_json = "${outputDir}/qc/${normal}.json" String normal_json = "${output_dir}/qc/${normal}.json"
} }
} }

View File

@ -12,9 +12,10 @@ task create_dir {
task mutation_calling { task mutation_calling {
String name String name
String tumor_pileup String tumor_rmdupBam
String normal_pileup String normal_rmdupBam
String outputDir String outputDir
String bed
command <<< command <<<
@ -22,47 +23,27 @@ task mutation_calling {
mkdir ${outputDir}/mutation mkdir ${outputDir}/mutation
fi fi
java -jar $VARSCAN somatic ${tumor_pileup} ${normal_pileup} \ java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
--output-snp ${outputDir}/mutation/${name}.snp.vcf \ -G /dataseq/jmdna/database/genome/hg19/hg19.fa \
--output-indel ${outputDir}/mutation/${name}.indel.vcf \ -f 0.01 \
--min-var-freq 0.01 \ -N ${name} \
--min-freq-for-hom 0.9 \ -b "${tumor_rmdupBam}|${normal_rmdupBam}" \
--somatic-p-value 0.05 \ -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \
--output-vcf 1 \ /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \
--min-avg-qual 20 \ /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
--min-coverage-normal 10 \ > ${outputDir}/mutation/${name}_vardict.snp.indel.vcf
--min-coverage-tumor 30 \
--min-reads2 3
java -jar $VARSCAN processSomatic \ vep \
${outputDir}/mutation/${name}.snp.vcf \ --input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \
--min-tumor-freq 0.01 \ --output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \
--max-normal-freq 0.01 \ --format vcf \
--p-value 0.05 --vcf \
--symbol \
java -jar $VARSCAN processSomatic \ --terms SO \
${outputDir}/mutation/${name}.indel.vcf \ --hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \
--min-tumor-freq 0.01 \ --offline --cache --dir_cache /home/software/.vep \
--max-normal-freq 0.01 \ --pick \
--p-value 0.05 --force_overwrite
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.Somatic.hc.vcf \
-I ${outputDir}/mutation/${name}.indel.Somatic.hc.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.Germline.vcf \
-I ${outputDir}/mutation/${name}.indel.Germline.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.Germline.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs \
-I ${outputDir}/mutation/${name}.snp.LOH.hc.vcf \
-I ${outputDir}/mutation/${name}.indel.LOH.hc.vcf \
-O ${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf \
-D /dataseq/jmdna/database/genome/hg19/hg19.dict
>>> >>>
@ -580,7 +561,7 @@ task auto_report {
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor} python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor}
perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor} perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor}
python3 ${codesDir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
ln -s ${cnv_cns} ${outputDir}/report/ ln -s ${cnv_cns} ${outputDir}/report/
ln -s ${cnv_png} ${outputDir}/report/ ln -s ${cnv_png} ${outputDir}/report/