987 lines
30 KiB
Plaintext
987 lines
30 KiB
Plaintext
workflow pancancer_singlesample_umi{
|
||
String normal
|
||
String tumor
|
||
String cancer
|
||
String project = "624gene"
|
||
String raw_bed = "/dataseq/jmdna/database/bed/624.merge.bed"
|
||
String bed = "/dataseq/jmdna/database/bed/624plus20bp_merge.bed"
|
||
String outputDir
|
||
String inputDir
|
||
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
|
||
String codes_dir = "/dataseq/jmdna/codes/624"
|
||
String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed"
|
||
String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt"
|
||
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
|
||
#创建目录
|
||
call create_dir{
|
||
input:
|
||
outputDir=outputDir
|
||
}
|
||
scatter (read in ["1","2"]){
|
||
call changehead{
|
||
input:
|
||
newdir=create_dir.newdir,
|
||
tumor=tumor,
|
||
inputDir=inputDir,
|
||
outputDir=outputDir,
|
||
read=read
|
||
}
|
||
}
|
||
|
||
call qc{
|
||
input:
|
||
data=changehead.outputFile,
|
||
tumor=tumor,
|
||
inputDir=inputDir,
|
||
outputDir=outputDir,
|
||
codes_dir=codes_dir
|
||
}
|
||
call qc_normal{
|
||
input:
|
||
data=changehead.outputFile,
|
||
normal=normal,
|
||
inputDir=inputDir,
|
||
outputDir=outputDir
|
||
}
|
||
scatter (name in [normal,tumor]){
|
||
call alignment_bwa{
|
||
input:
|
||
name=name,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
qc_T=qc.outputFile,
|
||
qc_N=qc_normal.outputFile
|
||
}
|
||
call rmdup{
|
||
input:
|
||
name=name,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
bam=alignment_bwa.sorted
|
||
}
|
||
call generater_mpileup{
|
||
input:
|
||
rmdup=rmdup.rmdup,
|
||
name=name,
|
||
ref=ref,
|
||
bed=bed,
|
||
outputDir=outputDir
|
||
}
|
||
}
|
||
call mutation_calling{
|
||
input:
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
bed=bed,
|
||
normal=normal,
|
||
codes_dir=codes_dir,
|
||
rmdup=rmdup.rmdup[1]
|
||
}
|
||
call mutation_calling_normal{
|
||
input:
|
||
normal=normal,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
bed=bed,
|
||
rmdup=rmdup.rmdup[0]
|
||
}
|
||
call hotspot{
|
||
input:
|
||
rmdupBam=rmdup.rmdup[1],
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
codes_dir=codes_dir
|
||
}
|
||
call annovar{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
normal=normal,
|
||
outputDir=outputDir,
|
||
tvcf=mutation_calling.vcf,
|
||
nvcf=mutation_calling_normal.vcf
|
||
}
|
||
scatter (name in [normal,tumor]){
|
||
call qc_2{
|
||
input:
|
||
ref=ref,
|
||
bed=raw_bed,
|
||
name=name,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
codes_dir=codes_dir,
|
||
anno=annovar.anno
|
||
}
|
||
}
|
||
call fusion{
|
||
input:
|
||
ref=ref,
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
cancer=cancer,
|
||
outputDir=outputDir,
|
||
Bam=alignment_bwa.sorted[1],
|
||
project=project
|
||
}
|
||
call TMB{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
anno=annovar.anno
|
||
}
|
||
call conpair{
|
||
input:
|
||
tumor=tumor,
|
||
normal=normal,
|
||
ref=ref,
|
||
outputDir=outputDir,
|
||
rmdupBam=rmdup.rmdup
|
||
}
|
||
call chemoTherapy{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
normal=normal,
|
||
outputDir=outputDir,
|
||
ref=ref,
|
||
project=project,
|
||
rmdupBam=rmdup.rmdup[0]
|
||
}
|
||
call tumor_content{
|
||
input:
|
||
ref=ref,
|
||
tumor=tumor,
|
||
normal=normal,
|
||
outputDir=outputDir,
|
||
gc_wiggle=gc_wiggle,
|
||
codes_dir=codes_dir,
|
||
pileup=generater_mpileup.pileup
|
||
}
|
||
call cnvkit{
|
||
input:
|
||
tumor=tumor,
|
||
cancer=cancer,
|
||
outputDir=outputDir,
|
||
rmdupBam=rmdup.rmdup[1],
|
||
codes_dir=codes_dir,
|
||
project=project,
|
||
purity=tumor_content.purity
|
||
}
|
||
call dealwithsnvindel{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
project=project,
|
||
outputDir=outputDir,
|
||
tumor=tumor,
|
||
cancer=cancer,
|
||
anno=annovar.anno
|
||
}
|
||
call MMR{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
snv=dealwithsnvindel.snv
|
||
}
|
||
call HRR{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
snv=dealwithsnvindel.snv
|
||
}
|
||
call HLA{
|
||
input:
|
||
inputDir=inputDir,
|
||
outputDir=outputDir,
|
||
normal=normal,
|
||
newdir=create_dir.newdir
|
||
}
|
||
call neoantigen{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
outputDir=outputDir,
|
||
normal=normal,
|
||
tumor=tumor,
|
||
hla=HLA.hla,
|
||
vcf=mutation_calling.vcf
|
||
}
|
||
call MSI{
|
||
input:
|
||
bed=bed,
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
rmdupBam=rmdup.rmdup[1]
|
||
}
|
||
call hereditary{
|
||
input:
|
||
codes_dir=codes_dir,
|
||
tumor=tumor,
|
||
outputDir=outputDir,
|
||
project=project,
|
||
snv=dealwithsnvindel.snv
|
||
}
|
||
call auto_report{
|
||
input:
|
||
outputDir=outputDir,
|
||
tumor=tumor,
|
||
normal=normal,
|
||
cancer=cancer,
|
||
codes_dir=codes_dir,
|
||
qc_2=qc_2.qc,
|
||
concordance=conpair.concordance,
|
||
snv_result=dealwithsnvindel.snv,
|
||
cnv_result=cnvkit.cnv,
|
||
fusion_result=fusion.fusion,
|
||
tmb=TMB.tmb
|
||
}
|
||
}
|
||
|
||
|
||
#create project directory
|
||
task create_dir{
|
||
String outputDir
|
||
command <<<
|
||
#创建目录
|
||
if [ ! -d ${outputDir} ];then
|
||
mkdir ${outputDir}
|
||
fi
|
||
|
||
#创建qc目录
|
||
if [ ! -d ${outputDir}/qc ];then
|
||
mkdir ${outputDir}/qc
|
||
fi
|
||
|
||
#创建alignment目录
|
||
if [ ! -d ${outputDir}/alignment ];then
|
||
mkdir ${outputDir}/alignment
|
||
fi
|
||
|
||
#创建mutation目录
|
||
if [ ! -d ${outputDir}/mutation ];then
|
||
mkdir ${outputDir}/mutation
|
||
fi
|
||
|
||
#创建cnv目录
|
||
if [ ! -d ${outputDir}/cnvkit ];then
|
||
mkdir ${outputDir}/cnvkit
|
||
fi
|
||
|
||
#创建chemo目录
|
||
if [ ! -d ${outputDir}/chemo ];then
|
||
mkdir ${outputDir}/chemo
|
||
fi
|
||
|
||
#创建fusion目录
|
||
if [ ! -d ${outputDir}/fusion ];then
|
||
mkdir ${outputDir}/fusion
|
||
fi
|
||
|
||
#创建MSI目录
|
||
if [ ! -d ${outputDir}/MSI ];then
|
||
mkdir ${outputDir}/MSI
|
||
fi
|
||
|
||
#创建MMR目录
|
||
if [ ! -d ${outputDir}/MMR ];then
|
||
mkdir ${outputDir}/MMR
|
||
fi
|
||
|
||
#创建HPD目录
|
||
if [ ! -d ${outputDir}/HPD ];then
|
||
mkdir ${outputDir}/HPD
|
||
fi
|
||
|
||
##创建HRR目录
|
||
if [ ! -d ${outputDir}/HRR ];then
|
||
mkdir ${outputDir}/HRR
|
||
fi
|
||
|
||
#创建neoantigen目录
|
||
if [ ! -d ${outputDir}/neoantigen/HLA ];then
|
||
mkdir -p ${outputDir}/neoantigen/HLA
|
||
fi
|
||
|
||
#创建hereditary目录
|
||
if [ ! -d ${outputDir}/hereditary ];then
|
||
mkdir ${outputDir}/hereditary
|
||
fi
|
||
|
||
#创建report目录
|
||
if [ ! -d ${outputDir}/report ];then
|
||
mkdir -p ${outputDir}/report
|
||
fi
|
||
|
||
>>>
|
||
output{
|
||
String newdir = "${outputDir}/report"
|
||
}
|
||
}
|
||
|
||
#generator raw fastq to clean fastq
|
||
task changehead{
|
||
String newdir
|
||
String tumor
|
||
String inputDir
|
||
String outputDir
|
||
String read
|
||
|
||
command <<<
|
||
seqkit replace -p "/${read}" -r " ${read}" -j 10 ${inputDir}/*_${tumor}_*${read}.fq.gz -o ${outputDir}/qc/${tumor}_changehead_R${read}.fq
|
||
>>>
|
||
|
||
output{
|
||
String outputFile = "${outputDir}/qc/${tumor}_changehead_R${read}.fq"
|
||
}
|
||
|
||
}
|
||
task qc_normal{
|
||
Array[String] data
|
||
String normal
|
||
String inputDir
|
||
String outputDir
|
||
|
||
command <<<
|
||
echo processing raw reads with fastp
|
||
fastp -i ${inputDir}/*_${normal}_*1.fq.gz -o ${outputDir}/qc/${normal}_clean_R1.fq.gz \
|
||
-I ${inputDir}/*_${normal}_*2.fq.gz -O ${outputDir}/qc/${normal}_clean_R2.fq.gz \
|
||
-w 10 \
|
||
--correction \
|
||
--overlap_len_require 20 \
|
||
--adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \
|
||
--adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
|
||
-j ${outputDir}/qc/${normal}.json \
|
||
-h ${outputDir}/qc/${normal}.html --report_title ${normal} -e 20
|
||
|
||
>>>
|
||
|
||
output{
|
||
Array[String] outputFile = [
|
||
"${outputDir}/qc/${normal}_clean_R1.fq.gz",
|
||
"${outputDir}/qc/${normal}_clean_R2.fq.gz",
|
||
"${outputDir}/qc/${normal}.json",
|
||
"${outputDir}/qc/${normal}.html"
|
||
]
|
||
}
|
||
}
|
||
task qc{
|
||
String tumor
|
||
String inputDir
|
||
String outputDir
|
||
String codes_dir
|
||
Array[String] data
|
||
|
||
command <<<
|
||
echo processing raw reads with fastp
|
||
fastp -i ${outputDir}/qc/${tumor}_changehead_R1.fq -o ${outputDir}/qc/${tumor}_clean_R1.fq.gz \
|
||
-I ${outputDir}/qc/${tumor}_changehead_R2.fq -O ${outputDir}/qc/${tumor}_clean_R2.fq.gz \
|
||
-w 10 \
|
||
-U --umi_loc=per_read --umi_len=4 --umi_prefix=UMI --umi_skip=3 \
|
||
--disable_trim_poly_g \
|
||
--disable_quality_filtering \
|
||
--adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \
|
||
--adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
|
||
--correction \
|
||
--overlap_len_require 10 \
|
||
-j ${outputDir}/qc/${tumor}.json \
|
||
-h ${outputDir}/qc/${tumor}.html --report_title ${tumor}
|
||
|
||
# UMI
|
||
python3 ${codes_dir}/UMI_Project.py -i ${inputDir} -o ${outputDir} -t ${tumor}
|
||
|
||
rm ${outputDir}/qc/${tumor}_changehead_R1.fq ${outputDir}/qc/${tumor}_changehead_R2.fq
|
||
>>>
|
||
|
||
output{
|
||
Array[String] outputFile = [
|
||
"${outputDir}/qc/${tumor}_clean_R1.fq.gz",
|
||
"${outputDir}/qc/${tumor}_clean_R2.fq.gz",
|
||
"${outputDir}/qc/${tumor}.json",
|
||
"${outputDir}/qc/${tumor}.html"
|
||
]
|
||
}
|
||
}
|
||
#alignment clean fastq to reference
|
||
task alignment_bwa{
|
||
String name
|
||
String ref
|
||
String outputDir
|
||
Array[String] qc_T
|
||
Array[String] qc_N
|
||
command<<<
|
||
|
||
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} \
|
||
${outputDir}/qc/${name}_clean_R1.fq.gz ${outputDir}/qc/${name}_clean_R2.fq.gz | \
|
||
samtools view -@ 5 -bh -o - | samtools sort -@ 5 -o ${outputDir}/alignment/${name}.sorted.bam
|
||
|
||
samtools index ${outputDir}/alignment/${name}.sorted.bam
|
||
|
||
>>>
|
||
|
||
output{
|
||
String sorted = "${outputDir}/alignment/${name}.sorted.bam"
|
||
}
|
||
}
|
||
##rmdup
|
||
task rmdup{
|
||
String name
|
||
String outputDir
|
||
String ref
|
||
String bam
|
||
command<<<
|
||
gencore -i ${outputDir}/alignment/${name}.sorted.bam -o ${outputDir}/alignment/${name}.rmdup.bam \
|
||
-r ${ref} -j ${outputDir}/qc/${name}_rmdup.json -h ${outputDir}/qc/${name}_rmdup.html
|
||
|
||
samtools index ${outputDir}/alignment/${name}.rmdup.bam
|
||
>>>
|
||
output{
|
||
String rmdup = "${outputDir}/alignment/${name}.rmdup.bam"
|
||
}
|
||
}
|
||
# generater mpileup file
|
||
task generater_mpileup{
|
||
String rmdup
|
||
String name
|
||
String ref
|
||
String bed
|
||
String outputDir
|
||
|
||
command<<<
|
||
|
||
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \
|
||
${outputDir}/alignment/${name}.rmdup.bam -o ${outputDir}/alignment/${name}.pileup
|
||
|
||
>>>
|
||
|
||
output{
|
||
String pileup = "${outputDir}/alignment/${name}.pileup"
|
||
}
|
||
}
|
||
task mutation_calling{
|
||
String tumor
|
||
String rmdup
|
||
String outputDir
|
||
String ref
|
||
String bed
|
||
String codes_dir
|
||
String normal
|
||
command<<<
|
||
##1条call
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||
-f 0.002 -N ${tumor} -b ${outputDir}/alignment/${tumor}.rmdup.bam \
|
||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.002 >${outputDir}/mutation/${tumor}.1r.snp.indel.vcf
|
||
##提取2条矫正的序列和NM<4的序列
|
||
python3 ${codes_dir}/fetch_bam.py ${outputDir} ${tumor}
|
||
##2条矫正的call
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||
-f 0.0001 -N ${tumor}_2r -b ${outputDir}/alignment/${tumor}.2r.rmdup.bam \
|
||
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.001 >${outputDir}/mutation/${tumor}.2r.snp.indel.vcf
|
||
|
||
##merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||
perl ${codes_dir}/1r_plus_2r.pl ${outputDir} ${tumor}
|
||
|
||
##去除control,control 5%以上的cfDNA都去除,1%-5%的,cfDNA/control<5 的都去除。
|
||
perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal}
|
||
|
||
##add msi and strandbias flag
|
||
perl ${codes_dir}/add_flag.pl ${outputDir} ${tumor} ${normal}
|
||
|
||
##add malt flag
|
||
grep -v ^# ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - \
|
||
>${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed
|
||
|
||
samtools mpileup -aBq 20 -Q 20 -f ${ref} -l ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed \
|
||
${outputDir}/alignment/${tumor}.2r.rmdup.bam -o ${outputDir}/alignment/${tumor}_substract_${normal}.snp.indel.pileup
|
||
|
||
python ${codes_dir}/add_malt.py ${outputDir} ${tumor} ${normal}
|
||
|
||
##filter flag
|
||
perl ${codes_dir}/filter_flag.pl ${outputDir} ${tumor} ${normal}
|
||
>>>
|
||
|
||
output{
|
||
String vcf = "${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf"
|
||
}
|
||
}
|
||
task mutation_calling_normal{
|
||
String normal
|
||
String rmdup
|
||
String outputDir
|
||
String ref
|
||
String bed
|
||
|
||
command<<<
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||
-f 0.01 -N ${normal} -b ${outputDir}/alignment/${normal}.rmdup.bam \
|
||
-Q 20 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${normal} -E -f 0.01 >${outputDir}/mutation/${normal}.snp.indel.vcf
|
||
>>>
|
||
|
||
output{
|
||
String vcf = "${outputDir}/mutation/${normal}.snp.indel.vcf"
|
||
}
|
||
}
|
||
# hotspot
|
||
task hotspot{
|
||
String tumor
|
||
String outputDir
|
||
String ref
|
||
String rmdupBam
|
||
String codes_dir
|
||
command<<<
|
||
|
||
mkdir -p ${outputDir}/mutation/hotspot
|
||
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l /dataseq/jmdna/codes/public/hotspot.bed ${rmdupBam} -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup
|
||
java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.001 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 2 --strand-filter 0 >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf
|
||
# java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.002 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 3 --strand-filter 1 >${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf
|
||
# perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor}
|
||
|
||
# if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then
|
||
table_annovar.pl \
|
||
${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \
|
||
/dataseq/jmdna/software/annovar/humandb/ \
|
||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||
-protocol refGene \
|
||
-argument '-hgvs' \
|
||
-operation g \
|
||
--outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno
|
||
|
||
# fi
|
||
perl ${codes_dir}/hotspot.filter.pl ${outputDir} ${tumor}
|
||
>>>
|
||
output{
|
||
String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.filter.anno.hg19_multianno.txt"
|
||
}
|
||
}
|
||
task annovar{
|
||
String codes_dir
|
||
String tumor
|
||
String normal
|
||
String outputDir
|
||
String tvcf
|
||
String nvcf
|
||
command<<<
|
||
perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal}
|
||
|
||
table_annovar.pl \
|
||
${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf \
|
||
/dataseq/jmdna/software/annovar/humandb/ \
|
||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||
--intronhgvs 50 \
|
||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall
|
||
|
||
table_annovar.pl \
|
||
${outputDir}/mutation/${normal}.snp.indel.vcf \
|
||
/dataseq/jmdna/software/annovar/humandb/ \
|
||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||
--intronhgvs 50 \
|
||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Germline.anno
|
||
|
||
>>>
|
||
output{
|
||
Array[String] anno = [
|
||
"${outputDir}/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno.txt",
|
||
"${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt",
|
||
]
|
||
}
|
||
}
|
||
task qc_2{
|
||
String ref
|
||
String bed
|
||
String name
|
||
String tumor
|
||
String outputDir
|
||
String codes_dir
|
||
#for task chain
|
||
Array[String] anno
|
||
|
||
command <<<
|
||
|
||
samtools flagstat -@ 10 ${outputDir}/alignment/${name}.rmdup.bam >${outputDir}/qc/${name}.rmdup.flagstat
|
||
samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${name}.rmdup.bam > ${outputDir}/alignment/${name}.stat
|
||
mkdir ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/qc/${name}_rmdup_bamdst
|
||
bamdst -p ${bed} -o ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/alignment/${name}.sorted.bam
|
||
bamdst -p ${bed} -o ${outputDir}/qc/${name}_rmdup_bamdst ${outputDir}/alignment/${name}.rmdup.bam
|
||
Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${name}_InsertAndDepthStat ${outputDir}/qc/${name}_rmdup_bamdst/insertsize.plot ${outputDir}/qc/${name}_rmdup_bamdst/depth_distribution.plot
|
||
|
||
python3 ${codes_dir}/qc_stat_umi.py ${outputDir} ${name} ${tumor}
|
||
|
||
>>>
|
||
output{
|
||
String qc = "${outputDir}/qc/${name}_qc.txt"
|
||
}
|
||
}
|
||
task fusion{
|
||
String ref
|
||
String codes_dir
|
||
String tumor
|
||
String cancer
|
||
String outputDir
|
||
#for task chain
|
||
String Bam
|
||
String project
|
||
|
||
command<<<
|
||
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
|
||
I=${outputDir}/alignment/${tumor}.sorted.bam \
|
||
O=${outputDir}/alignment/${tumor}.picard.rmdup.bam \
|
||
CREATE_INDEX=true \
|
||
M=${outputDir}/alignment/${tumor}.picard.rmdup.metrics.txt \
|
||
R=${ref}
|
||
|
||
# Extract the discordant paired-end alignments.
|
||
samtools view -b -F 1294 ${outputDir}/alignment/${tumor}.picard.rmdup.bam > ${outputDir}/fusion/${tumor}.discordants.bam
|
||
|
||
# Extract the split-read alignments
|
||
samtools view -h ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
|
||
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
|
||
| samtools view -Sb - \
|
||
> ${outputDir}/fusion/${tumor}.splitters.bam
|
||
|
||
lumpyexpress \
|
||
-B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
|
||
-S ${outputDir}/fusion/${tumor}.splitters.bam \
|
||
-D ${outputDir}/fusion/${tumor}.discordants.bam \
|
||
-o ${outputDir}/fusion/${tumor}.fusion.vcf
|
||
|
||
perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf
|
||
|
||
svtyper \
|
||
-B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
|
||
-i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \
|
||
-T ${ref} \
|
||
-o ${outputDir}/fusion/${tumor}.fusion.gt.vcf
|
||
|
||
table_annovar.pl \
|
||
${outputDir}/fusion/${tumor}.fusion.gt.vcf \
|
||
/dataseq/jmdna/software/annovar/humandb/ \
|
||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||
-protocol refGene \
|
||
-operation g \
|
||
--outfile ${outputDir}/fusion/${tumor}.fusion
|
||
|
||
perl ${codes_dir}/fusion.reanno.624.pl ${outputDir}/qc/${tumor}_sorted_bamdst/depth.tsv.gz ${outputDir} ${tumor}
|
||
perl ${codes_dir}/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
|
||
|
||
>>>
|
||
|
||
output{
|
||
String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt"
|
||
}
|
||
}
|
||
task TMB{
|
||
String codes_dir
|
||
String tumor
|
||
String outputDir
|
||
Array[String] anno
|
||
|
||
command <<<
|
||
perl ${codes_dir}/tmb_umi.pl ${outputDir} ${tumor}
|
||
python3 ${codes_dir}/correct_f1r1_tmb.py ${outputDir} ${tumor}
|
||
>>>
|
||
output{
|
||
String tmb="${outputDir}/mutation/${tumor}.tmb.txt"
|
||
}
|
||
}
|
||
|
||
task conpair{
|
||
String tumor
|
||
String normal
|
||
String outputDir
|
||
String ref
|
||
#for task chain
|
||
Array[String] rmdupBam
|
||
command <<<
|
||
|
||
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
||
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
||
-B ${outputDir}/alignment/${normal}.rmdup.bam \
|
||
-O ${outputDir}/alignment/${normal}.gatk.mpileup \
|
||
-R ${ref} \
|
||
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
||
|
||
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
|
||
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
|
||
-B ${outputDir}/alignment/${tumor}.rmdup.bam \
|
||
-O ${outputDir}/alignment/${tumor}.gatk.mpileup \
|
||
-R ${ref} \
|
||
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
|
||
|
||
sed -i 's/^chr//g' ${outputDir}/alignment/${normal}.gatk.mpileup
|
||
sed -i 's/^chr//g' ${outputDir}/alignment/${tumor}.gatk.mpileup
|
||
|
||
|
||
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
|
||
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
|
||
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
|
||
-H \
|
||
-O ${outputDir}/qc/${tumor}_concordance.txt
|
||
|
||
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
|
||
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
|
||
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
|
||
-O ${outputDir}/qc/${tumor}_contamination.txt
|
||
|
||
>>>
|
||
output{
|
||
Array[String] concordance = [
|
||
"${outputDir}/report/qc/${tumor}_concordance.txt",
|
||
"${outputDir}/report/qc/${tumor}_contamination.txt",
|
||
]
|
||
}
|
||
}
|
||
|
||
task chemoTherapy{
|
||
String codes_dir
|
||
String normal
|
||
String outputDir
|
||
String ref
|
||
String project
|
||
String rmdupBam
|
||
command <<<
|
||
${codes_dir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} --r ${ref}
|
||
>>>
|
||
}
|
||
|
||
task tumor_content{
|
||
String ref
|
||
String tumor
|
||
String normal
|
||
String outputDir
|
||
String gc_wiggle
|
||
String codes_dir
|
||
#for task chain
|
||
Array[String] pileup
|
||
|
||
command <<<
|
||
|
||
sequenza-utils bam2seqz -p -gc ${gc_wiggle} \
|
||
-F ${ref} \
|
||
-n ${outputDir}/alignment/${normal}.pileup \
|
||
-t ${outputDir}/alignment/${tumor}.pileup | gzip >${outputDir}/qc/target_${tumor}.200base.seqz.gz
|
||
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${tumor}.200base.seqz.gz | gzip > ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz
|
||
|
||
Rscript ${codes_dir}/sequenza.R ${tumor} ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!"
|
||
|
||
>>>
|
||
output{
|
||
String purity = "${outputDir}/qc/sequenza/${tumor}_CP_contours.pdf"
|
||
}
|
||
}
|
||
|
||
task cnvkit{
|
||
String tumor
|
||
String outputDir
|
||
String cancer
|
||
String codes_dir
|
||
String project
|
||
#for task chain
|
||
String rmdupBam
|
||
String purity
|
||
|
||
command <<<
|
||
|
||
echo run cnvkit batch to processing cnv calling
|
||
cnvkit.py batch \
|
||
${rmdupBam} \
|
||
-r ${codes_dir}/cnvkit/624gene_pool_normal_reference.cnn \
|
||
--drop-low-coverage --scatter --diagram --output-dir ${outputDir}/cnvkit
|
||
|
||
cnvkit.py scatter \
|
||
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \
|
||
--y-max 3 --y-min -3 \
|
||
--title ${tumor}.cns \
|
||
-o ${outputDir}/cnvkit/${tumor}.cnv.png
|
||
|
||
if [ -e "${outputDir}/qc/sequenza/${tumor}_confints_CP.txt" ]; then
|
||
# absolute copy number
|
||
cnvkit.py call \
|
||
-m clonal \
|
||
${outputDir}/cnvkit/${tumor}.rmdup.cns \
|
||
-y \
|
||
--purity `head -n2 ${outputDir}/qc/sequenza/${tumor}_confints_CP.txt |tail -n1|cut -f1` \
|
||
--drop-low-coverage \
|
||
--filter ampdel \
|
||
-o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc
|
||
fi
|
||
perl ${codes_dir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn
|
||
perl ${codes_dir}/cnv_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
|
||
>>>
|
||
|
||
output{
|
||
String cnv = "${outputDir}/cnvkit/${tumor}.rmdup.cns"
|
||
}
|
||
}
|
||
|
||
task dealwithsnvindel{
|
||
String codes_dir
|
||
String project
|
||
String outputDir
|
||
String tumor
|
||
Array[String] anno
|
||
String cancer
|
||
|
||
command <<<
|
||
|
||
perl ${codes_dir}/pick_variant_control_umi.pl ${outputDir} ${tumor}
|
||
python3 ${codes_dir}/correct_f1r1.py ${outputDir} ${tumor}
|
||
perl ${codes_dir}/pick_mut_splice_promoter_control.pl ${codes_dir} ${tumor} ${outputDir} ${project}
|
||
perl ${codes_dir}/germline_targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
|
||
perl ${codes_dir}/targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
|
||
|
||
>>>
|
||
output{
|
||
String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt"
|
||
}
|
||
}
|
||
|
||
task MMR{
|
||
String codes_dir
|
||
String tumor
|
||
String outputDir
|
||
String snv
|
||
command <<<
|
||
perl ${codes_dir}/mmr_controlsample.pl ${outputDir} ${tumor}
|
||
>>>
|
||
output{
|
||
String mmr_out = "${outputDir}/MMR/${tumor}_mmr.txt"
|
||
}
|
||
}
|
||
|
||
task HRR{
|
||
String codes_dir
|
||
String tumor
|
||
String outputDir
|
||
String snv
|
||
command <<<
|
||
perl ${codes_dir}/hrr_controlsample_cfDNA.pl ${outputDir} ${tumor}
|
||
>>>
|
||
output{
|
||
String mmr_out = "${outputDir}/HRR/${tumor}_hrr.txt"
|
||
}
|
||
}
|
||
|
||
task HLA{
|
||
String inputDir
|
||
String outputDir
|
||
String normal
|
||
String newdir
|
||
command <<<
|
||
|
||
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*1.fq.gz
|
||
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq
|
||
rm ${outputDir}/neoantigen/HLA/fished_1.bam
|
||
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*2.fq.gz
|
||
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq
|
||
rm ${outputDir}/neoantigen/HLA/fished_2.bam
|
||
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/
|
||
|
||
>>>
|
||
output{
|
||
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv"
|
||
}
|
||
}
|
||
task neoantigen{
|
||
String codes_dir
|
||
String outputDir
|
||
String normal
|
||
String tumor
|
||
String hla
|
||
String vcf
|
||
command <<<
|
||
sh ${codes_dir}/predict_neoantigen.sh ${outputDir} ${normal} ${tumor} ${codes_dir}
|
||
>>>
|
||
output{
|
||
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${tumor}.all_epitopes.netchop.txt"
|
||
}
|
||
}
|
||
|
||
task MSI{
|
||
String bed
|
||
String codes_dir
|
||
String tumor
|
||
String outputDir
|
||
String rmdupBam
|
||
|
||
command <<<
|
||
##msings
|
||
echo ${outputDir}/alignment/${tumor}.rmdup.bam >${outputDir}/MSI/bam.list
|
||
${codes_dir}/MSI/run_msings.sh ${outputDir}/MSI/bam.list
|
||
mv ${outputDir}/alignment/${tumor}.rmdup ${outputDir}/MSI/
|
||
mv ${outputDir}/alignment/msi_run_log.txt ${outputDir}/MSI/
|
||
mv ${outputDir}/alignment/Combined_MSI.txt ${outputDir}/MSI/
|
||
##msisensor2
|
||
msisensor2 msi -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 -t ${rmdupBam} -e ${bed} -b 10 -o ${outputDir}/MSI/${tumor}.msi
|
||
>>>
|
||
output{
|
||
String target="${outputDir}/MSI/${tumor}.msi"
|
||
}
|
||
}
|
||
|
||
task hereditary{
|
||
String codes_dir
|
||
String tumor
|
||
String outputDir
|
||
String project
|
||
String snv
|
||
command <<<
|
||
python3 /dataseq/jmdna/codes/624/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${tumor}
|
||
>>>
|
||
}
|
||
|
||
task auto_report{
|
||
String tumor
|
||
String normal
|
||
String cancer
|
||
String outputDir
|
||
String codes_dir
|
||
Array[String] qc_2
|
||
Array[String] concordance
|
||
String snv_result
|
||
String cnv_result
|
||
String fusion_result
|
||
String tmb
|
||
|
||
|
||
command <<<
|
||
perl ${codes_dir}/hpd_control_umi.pl ${outputDir} ${tumor}
|
||
python3 ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} --n ${normal}
|
||
perl ${codes_dir}/indication.pl ${outputDir} ${cancer}
|
||
python3 ${codes_dir}/qc_check_control_cfdna.py ${outputDir} ${tumor} ${normal}
|
||
python3 ${codes_dir}/drug_dedup.py ${outputDir} ${tumor}
|
||
python3 ${codes_dir}/report_template/624gene_cfdna_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
|
||
python3 ${codes_dir}/wdl_check.py -o ${outputDir}
|
||
|
||
perl ${codes_dir}/file_format_change.pl ${outputDir} ${tumor}
|
||
cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/qc_fail.txt
|
||
cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/
|
||
cp ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn ${outputDir}/report/
|
||
cp ${outputDir}/cnvkit/${tumor}.cnv.png ${outputDir}/report/
|
||
cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/
|
||
cp ${outputDir}/fusion/${tumor}.fusion.reanno.vcf ${outputDir}/report/
|
||
# cp ${outputDir}/mutation/*filter* ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.snvindel.Germline.target.txt ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.nontarget.vus.txt ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.target.promoter.txt ${outputDir}/report/
|
||
cp ${outputDir}/mutation/${tumor}.tmb.txt ${outputDir}/report/
|
||
cp ${outputDir}/MMR/${tumor}.mmr.pre.txt ${outputDir}/report/
|
||
cp ${outputDir}/HRR/${tumor}.hrr.pre.txt ${outputDir}/report/
|
||
cp ${outputDir}/HPD/${tumor}.hpd.pre.txt ${outputDir}/report/
|
||
cp ${outputDir}/hereditary/${tumor}.hereditary.pre.txt ${outputDir}/report/
|
||
python3 ${codes_dir}/txt_xlsx.py ${outputDir} ${tumor}
|
||
>>>
|
||
}
|