pipeline/other/624.wdl

987 lines
30 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

workflow pancancer_singlesample_umi{
String normal
String tumor
String cancer
String project = "624gene"
String raw_bed = "/dataseq/jmdna/database/bed/624.merge.bed"
String bed = "/dataseq/jmdna/database/bed/624plus20bp_merge.bed"
String outputDir
String inputDir
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
String codes_dir = "/dataseq/jmdna/codes/624"
String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed"
String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt"
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
#创建目录
call create_dir{
input:
outputDir=outputDir
}
scatter (read in ["1","2"]){
call changehead{
input:
newdir=create_dir.newdir,
tumor=tumor,
inputDir=inputDir,
outputDir=outputDir,
read=read
}
}
call qc{
input:
data=changehead.outputFile,
tumor=tumor,
inputDir=inputDir,
outputDir=outputDir,
codes_dir=codes_dir
}
call qc_normal{
input:
data=changehead.outputFile,
normal=normal,
inputDir=inputDir,
outputDir=outputDir
}
scatter (name in [normal,tumor]){
call alignment_bwa{
input:
name=name,
outputDir=outputDir,
ref=ref,
qc_T=qc.outputFile,
qc_N=qc_normal.outputFile
}
call rmdup{
input:
name=name,
outputDir=outputDir,
ref=ref,
bam=alignment_bwa.sorted
}
call generater_mpileup{
input:
rmdup=rmdup.rmdup,
name=name,
ref=ref,
bed=bed,
outputDir=outputDir
}
}
call mutation_calling{
input:
tumor=tumor,
outputDir=outputDir,
ref=ref,
bed=bed,
normal=normal,
codes_dir=codes_dir,
rmdup=rmdup.rmdup[1]
}
call mutation_calling_normal{
input:
normal=normal,
outputDir=outputDir,
ref=ref,
bed=bed,
rmdup=rmdup.rmdup[0]
}
call hotspot{
input:
rmdupBam=rmdup.rmdup[1],
tumor=tumor,
outputDir=outputDir,
ref=ref,
codes_dir=codes_dir
}
call annovar{
input:
codes_dir=codes_dir,
tumor=tumor,
normal=normal,
outputDir=outputDir,
tvcf=mutation_calling.vcf,
nvcf=mutation_calling_normal.vcf
}
scatter (name in [normal,tumor]){
call qc_2{
input:
ref=ref,
bed=raw_bed,
name=name,
tumor=tumor,
outputDir=outputDir,
codes_dir=codes_dir,
anno=annovar.anno
}
}
call fusion{
input:
ref=ref,
codes_dir=codes_dir,
tumor=tumor,
cancer=cancer,
outputDir=outputDir,
Bam=alignment_bwa.sorted[1],
project=project
}
call TMB{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
anno=annovar.anno
}
call conpair{
input:
tumor=tumor,
normal=normal,
ref=ref,
outputDir=outputDir,
rmdupBam=rmdup.rmdup
}
call chemoTherapy{
input:
codes_dir=codes_dir,
normal=normal,
outputDir=outputDir,
ref=ref,
project=project,
rmdupBam=rmdup.rmdup[0]
}
call tumor_content{
input:
ref=ref,
tumor=tumor,
normal=normal,
outputDir=outputDir,
gc_wiggle=gc_wiggle,
codes_dir=codes_dir,
pileup=generater_mpileup.pileup
}
call cnvkit{
input:
tumor=tumor,
cancer=cancer,
outputDir=outputDir,
rmdupBam=rmdup.rmdup[1],
codes_dir=codes_dir,
project=project,
purity=tumor_content.purity
}
call dealwithsnvindel{
input:
codes_dir=codes_dir,
project=project,
outputDir=outputDir,
tumor=tumor,
cancer=cancer,
anno=annovar.anno
}
call MMR{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
snv=dealwithsnvindel.snv
}
call HRR{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
snv=dealwithsnvindel.snv
}
call HLA{
input:
inputDir=inputDir,
outputDir=outputDir,
normal=normal,
newdir=create_dir.newdir
}
call neoantigen{
input:
codes_dir=codes_dir,
outputDir=outputDir,
normal=normal,
tumor=tumor,
hla=HLA.hla,
vcf=mutation_calling.vcf
}
call MSI{
input:
bed=bed,
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
rmdupBam=rmdup.rmdup[1]
}
call hereditary{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
project=project,
snv=dealwithsnvindel.snv
}
call auto_report{
input:
outputDir=outputDir,
tumor=tumor,
normal=normal,
cancer=cancer,
codes_dir=codes_dir,
qc_2=qc_2.qc,
concordance=conpair.concordance,
snv_result=dealwithsnvindel.snv,
cnv_result=cnvkit.cnv,
fusion_result=fusion.fusion,
tmb=TMB.tmb
}
}
#create project directory
task create_dir{
String outputDir
command <<<
#创建目录
if [ ! -d ${outputDir} ];then
mkdir ${outputDir}
fi
#创建qc目录
if [ ! -d ${outputDir}/qc ];then
mkdir ${outputDir}/qc
fi
#创建alignment目录
if [ ! -d ${outputDir}/alignment ];then
mkdir ${outputDir}/alignment
fi
#创建mutation目录
if [ ! -d ${outputDir}/mutation ];then
mkdir ${outputDir}/mutation
fi
#创建cnv目录
if [ ! -d ${outputDir}/cnvkit ];then
mkdir ${outputDir}/cnvkit
fi
#创建chemo目录
if [ ! -d ${outputDir}/chemo ];then
mkdir ${outputDir}/chemo
fi
#创建fusion目录
if [ ! -d ${outputDir}/fusion ];then
mkdir ${outputDir}/fusion
fi
#创建MSI目录
if [ ! -d ${outputDir}/MSI ];then
mkdir ${outputDir}/MSI
fi
#创建MMR目录
if [ ! -d ${outputDir}/MMR ];then
mkdir ${outputDir}/MMR
fi
#创建HPD目录
if [ ! -d ${outputDir}/HPD ];then
mkdir ${outputDir}/HPD
fi
##创建HRR目录
if [ ! -d ${outputDir}/HRR ];then
mkdir ${outputDir}/HRR
fi
#创建neoantigen目录
if [ ! -d ${outputDir}/neoantigen/HLA ];then
mkdir -p ${outputDir}/neoantigen/HLA
fi
#创建hereditary目录
if [ ! -d ${outputDir}/hereditary ];then
mkdir ${outputDir}/hereditary
fi
#创建report目录
if [ ! -d ${outputDir}/report ];then
mkdir -p ${outputDir}/report
fi
>>>
output{
String newdir = "${outputDir}/report"
}
}
#generator raw fastq to clean fastq
task changehead{
String newdir
String tumor
String inputDir
String outputDir
String read
command <<<
seqkit replace -p "/${read}" -r " ${read}" -j 10 ${inputDir}/*_${tumor}_*${read}.fq.gz -o ${outputDir}/qc/${tumor}_changehead_R${read}.fq
>>>
output{
String outputFile = "${outputDir}/qc/${tumor}_changehead_R${read}.fq"
}
}
task qc_normal{
Array[String] data
String normal
String inputDir
String outputDir
command <<<
echo processing raw reads with fastp
fastp -i ${inputDir}/*_${normal}_*1.fq.gz -o ${outputDir}/qc/${normal}_clean_R1.fq.gz \
-I ${inputDir}/*_${normal}_*2.fq.gz -O ${outputDir}/qc/${normal}_clean_R2.fq.gz \
-w 10 \
--correction \
--overlap_len_require 20 \
--adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \
--adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
-j ${outputDir}/qc/${normal}.json \
-h ${outputDir}/qc/${normal}.html --report_title ${normal} -e 20
>>>
output{
Array[String] outputFile = [
"${outputDir}/qc/${normal}_clean_R1.fq.gz",
"${outputDir}/qc/${normal}_clean_R2.fq.gz",
"${outputDir}/qc/${normal}.json",
"${outputDir}/qc/${normal}.html"
]
}
}
task qc{
String tumor
String inputDir
String outputDir
String codes_dir
Array[String] data
command <<<
echo processing raw reads with fastp
fastp -i ${outputDir}/qc/${tumor}_changehead_R1.fq -o ${outputDir}/qc/${tumor}_clean_R1.fq.gz \
-I ${outputDir}/qc/${tumor}_changehead_R2.fq -O ${outputDir}/qc/${tumor}_clean_R2.fq.gz \
-w 10 \
-U --umi_loc=per_read --umi_len=4 --umi_prefix=UMI --umi_skip=3 \
--disable_trim_poly_g \
--disable_quality_filtering \
--adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA \
--adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
--correction \
--overlap_len_require 10 \
-j ${outputDir}/qc/${tumor}.json \
-h ${outputDir}/qc/${tumor}.html --report_title ${tumor}
# UMI
python3 ${codes_dir}/UMI_Project.py -i ${inputDir} -o ${outputDir} -t ${tumor}
rm ${outputDir}/qc/${tumor}_changehead_R1.fq ${outputDir}/qc/${tumor}_changehead_R2.fq
>>>
output{
Array[String] outputFile = [
"${outputDir}/qc/${tumor}_clean_R1.fq.gz",
"${outputDir}/qc/${tumor}_clean_R2.fq.gz",
"${outputDir}/qc/${tumor}.json",
"${outputDir}/qc/${tumor}.html"
]
}
}
#alignment clean fastq to reference
task alignment_bwa{
String name
String ref
String outputDir
Array[String] qc_T
Array[String] qc_N
command<<<
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} \
${outputDir}/qc/${name}_clean_R1.fq.gz ${outputDir}/qc/${name}_clean_R2.fq.gz | \
samtools view -@ 5 -bh -o - | samtools sort -@ 5 -o ${outputDir}/alignment/${name}.sorted.bam
samtools index ${outputDir}/alignment/${name}.sorted.bam
>>>
output{
String sorted = "${outputDir}/alignment/${name}.sorted.bam"
}
}
##rmdup
task rmdup{
String name
String outputDir
String ref
String bam
command<<<
gencore -i ${outputDir}/alignment/${name}.sorted.bam -o ${outputDir}/alignment/${name}.rmdup.bam \
-r ${ref} -j ${outputDir}/qc/${name}_rmdup.json -h ${outputDir}/qc/${name}_rmdup.html
samtools index ${outputDir}/alignment/${name}.rmdup.bam
>>>
output{
String rmdup = "${outputDir}/alignment/${name}.rmdup.bam"
}
}
# generater mpileup file
task generater_mpileup{
String rmdup
String name
String ref
String bed
String outputDir
command<<<
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \
${outputDir}/alignment/${name}.rmdup.bam -o ${outputDir}/alignment/${name}.pileup
>>>
output{
String pileup = "${outputDir}/alignment/${name}.pileup"
}
}
task mutation_calling{
String tumor
String rmdup
String outputDir
String ref
String bed
String codes_dir
String normal
command<<<
##1条call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.002 -N ${tumor} -b ${outputDir}/alignment/${tumor}.rmdup.bam \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.002 >${outputDir}/mutation/${tumor}.1r.snp.indel.vcf
##提取2条矫正的序列和NM<4的序列
python3 ${codes_dir}/fetch_bam.py ${outputDir} ${tumor}
##2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${tumor}_2r -b ${outputDir}/alignment/${tumor}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${tumor} -E -f 0.001 >${outputDir}/mutation/${tumor}.2r.snp.indel.vcf
##merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
perl ${codes_dir}/1r_plus_2r.pl ${outputDir} ${tumor}
##去除controlcontrol 5%以上的cfDNA都去除1%-5%的cfDNA/control<5 的都去除。
perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal}
##add msi and strandbias flag
perl ${codes_dir}/add_flag.pl ${outputDir} ${tumor} ${normal}
##add malt flag
grep -v ^# ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.vcf | awk '{OFS="\t"}{print $1,$2-1,$2}' - \
>${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed
samtools mpileup -aBq 20 -Q 20 -f ${ref} -l ${outputDir}/mutation/${tumor}_substract_${normal}.snp.indel.bed \
${outputDir}/alignment/${tumor}.2r.rmdup.bam -o ${outputDir}/alignment/${tumor}_substract_${normal}.snp.indel.pileup
python ${codes_dir}/add_malt.py ${outputDir} ${tumor} ${normal}
##filter flag
perl ${codes_dir}/filter_flag.pl ${outputDir} ${tumor} ${normal}
>>>
output{
String vcf = "${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf"
}
}
task mutation_calling_normal{
String normal
String rmdup
String outputDir
String ref
String bed
command<<<
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.01 -N ${normal} -b ${outputDir}/alignment/${normal}.rmdup.bam \
-Q 20 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${normal} -E -f 0.01 >${outputDir}/mutation/${normal}.snp.indel.vcf
>>>
output{
String vcf = "${outputDir}/mutation/${normal}.snp.indel.vcf"
}
}
# hotspot
task hotspot{
String tumor
String outputDir
String ref
String rmdupBam
String codes_dir
command<<<
mkdir -p ${outputDir}/mutation/hotspot
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l /dataseq/jmdna/codes/public/hotspot.bed ${rmdupBam} -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup
java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.001 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 2 --strand-filter 0 >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf
# java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.002 --min-avg-qual 20 --output-vcf 1 --variants --p-value 0.99 --min-reads2 3 --strand-filter 1 >${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf
# perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor}
# if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then
table_annovar.pl \
${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-argument '-hgvs' \
-operation g \
--outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno
# fi
perl ${codes_dir}/hotspot.filter.pl ${outputDir} ${tumor}
>>>
output{
String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.filter.anno.hg19_multianno.txt"
}
}
task annovar{
String codes_dir
String tumor
String normal
String outputDir
String tvcf
String nvcf
command<<<
perl ${codes_dir}/t_subs_n_vcf.pl ${outputDir} ${tumor} ${normal}
table_annovar.pl \
${outputDir}/mutation/${tumor}_substract_${normal}.filter.flag.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall
table_annovar.pl \
${outputDir}/mutation/${normal}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Germline.anno
>>>
output{
Array[String] anno = [
"${outputDir}/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno.txt",
"${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt",
]
}
}
task qc_2{
String ref
String bed
String name
String tumor
String outputDir
String codes_dir
#for task chain
Array[String] anno
command <<<
samtools flagstat -@ 10 ${outputDir}/alignment/${name}.rmdup.bam >${outputDir}/qc/${name}.rmdup.flagstat
samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${name}.rmdup.bam > ${outputDir}/alignment/${name}.stat
mkdir ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/qc/${name}_rmdup_bamdst
bamdst -p ${bed} -o ${outputDir}/qc/${name}_sorted_bamdst ${outputDir}/alignment/${name}.sorted.bam
bamdst -p ${bed} -o ${outputDir}/qc/${name}_rmdup_bamdst ${outputDir}/alignment/${name}.rmdup.bam
Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${name}_InsertAndDepthStat ${outputDir}/qc/${name}_rmdup_bamdst/insertsize.plot ${outputDir}/qc/${name}_rmdup_bamdst/depth_distribution.plot
python3 ${codes_dir}/qc_stat_umi.py ${outputDir} ${name} ${tumor}
>>>
output{
String qc = "${outputDir}/qc/${name}_qc.txt"
}
}
task fusion{
String ref
String codes_dir
String tumor
String cancer
String outputDir
#for task chain
String Bam
String project
command<<<
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
I=${outputDir}/alignment/${tumor}.sorted.bam \
O=${outputDir}/alignment/${tumor}.picard.rmdup.bam \
CREATE_INDEX=true \
M=${outputDir}/alignment/${tumor}.picard.rmdup.metrics.txt \
R=${ref}
# Extract the discordant paired-end alignments.
samtools view -b -F 1294 ${outputDir}/alignment/${tumor}.picard.rmdup.bam > ${outputDir}/fusion/${tumor}.discordants.bam
# Extract the split-read alignments
samtools view -h ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
| samtools view -Sb - \
> ${outputDir}/fusion/${tumor}.splitters.bam
lumpyexpress \
-B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
-S ${outputDir}/fusion/${tumor}.splitters.bam \
-D ${outputDir}/fusion/${tumor}.discordants.bam \
-o ${outputDir}/fusion/${tumor}.fusion.vcf
perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf
svtyper \
-B ${outputDir}/alignment/${tumor}.picard.rmdup.bam \
-i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \
-T ${ref} \
-o ${outputDir}/fusion/${tumor}.fusion.gt.vcf
table_annovar.pl \
${outputDir}/fusion/${tumor}.fusion.gt.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-operation g \
--outfile ${outputDir}/fusion/${tumor}.fusion
perl ${codes_dir}/fusion.reanno.624.pl ${outputDir}/qc/${tumor}_sorted_bamdst/depth.tsv.gz ${outputDir} ${tumor}
perl ${codes_dir}/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt"
}
}
task TMB{
String codes_dir
String tumor
String outputDir
Array[String] anno
command <<<
perl ${codes_dir}/tmb_umi.pl ${outputDir} ${tumor}
python3 ${codes_dir}/correct_f1r1_tmb.py ${outputDir} ${tumor}
>>>
output{
String tmb="${outputDir}/mutation/${tumor}.tmb.txt"
}
}
task conpair{
String tumor
String normal
String outputDir
String ref
#for task chain
Array[String] rmdupBam
command <<<
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${outputDir}/alignment/${normal}.rmdup.bam \
-O ${outputDir}/alignment/${normal}.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${outputDir}/alignment/${tumor}.rmdup.bam \
-O ${outputDir}/alignment/${tumor}.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
sed -i 's/^chr//g' ${outputDir}/alignment/${normal}.gatk.mpileup
sed -i 's/^chr//g' ${outputDir}/alignment/${tumor}.gatk.mpileup
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
-H \
-O ${outputDir}/qc/${tumor}_concordance.txt
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
-O ${outputDir}/qc/${tumor}_contamination.txt
>>>
output{
Array[String] concordance = [
"${outputDir}/report/qc/${tumor}_concordance.txt",
"${outputDir}/report/qc/${tumor}_contamination.txt",
]
}
}
task chemoTherapy{
String codes_dir
String normal
String outputDir
String ref
String project
String rmdupBam
command <<<
${codes_dir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} --r ${ref}
>>>
}
task tumor_content{
String ref
String tumor
String normal
String outputDir
String gc_wiggle
String codes_dir
#for task chain
Array[String] pileup
command <<<
sequenza-utils bam2seqz -p -gc ${gc_wiggle} \
-F ${ref} \
-n ${outputDir}/alignment/${normal}.pileup \
-t ${outputDir}/alignment/${tumor}.pileup | gzip >${outputDir}/qc/target_${tumor}.200base.seqz.gz
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${tumor}.200base.seqz.gz | gzip > ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz
Rscript ${codes_dir}/sequenza.R ${tumor} ${outputDir}/qc/target_${tumor}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!"
>>>
output{
String purity = "${outputDir}/qc/sequenza/${tumor}_CP_contours.pdf"
}
}
task cnvkit{
String tumor
String outputDir
String cancer
String codes_dir
String project
#for task chain
String rmdupBam
String purity
command <<<
echo run cnvkit batch to processing cnv calling
cnvkit.py batch \
${rmdupBam} \
-r ${codes_dir}/cnvkit/624gene_pool_normal_reference.cnn \
--drop-low-coverage --scatter --diagram --output-dir ${outputDir}/cnvkit
cnvkit.py scatter \
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \
--y-max 3 --y-min -3 \
--title ${tumor}.cns \
-o ${outputDir}/cnvkit/${tumor}.cnv.png
if [ -e "${outputDir}/qc/sequenza/${tumor}_confints_CP.txt" ]; then
# absolute copy number
cnvkit.py call \
-m clonal \
${outputDir}/cnvkit/${tumor}.rmdup.cns \
-y \
--purity `head -n2 ${outputDir}/qc/sequenza/${tumor}_confints_CP.txt |tail -n1|cut -f1` \
--drop-low-coverage \
--filter ampdel \
-o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc
fi
perl ${codes_dir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn
perl ${codes_dir}/cnv_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String cnv = "${outputDir}/cnvkit/${tumor}.rmdup.cns"
}
}
task dealwithsnvindel{
String codes_dir
String project
String outputDir
String tumor
Array[String] anno
String cancer
command <<<
perl ${codes_dir}/pick_variant_control_umi.pl ${outputDir} ${tumor}
python3 ${codes_dir}/correct_f1r1.py ${outputDir} ${tumor}
perl ${codes_dir}/pick_mut_splice_promoter_control.pl ${codes_dir} ${tumor} ${outputDir} ${project}
perl ${codes_dir}/germline_targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
perl ${codes_dir}/targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt"
}
}
task MMR{
String codes_dir
String tumor
String outputDir
String snv
command <<<
perl ${codes_dir}/mmr_controlsample.pl ${outputDir} ${tumor}
>>>
output{
String mmr_out = "${outputDir}/MMR/${tumor}_mmr.txt"
}
}
task HRR{
String codes_dir
String tumor
String outputDir
String snv
command <<<
perl ${codes_dir}/hrr_controlsample_cfDNA.pl ${outputDir} ${tumor}
>>>
output{
String mmr_out = "${outputDir}/HRR/${tumor}_hrr.txt"
}
}
task HLA{
String inputDir
String outputDir
String normal
String newdir
command <<<
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*1.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_1.bam
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*2.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/
>>>
output{
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv"
}
}
task neoantigen{
String codes_dir
String outputDir
String normal
String tumor
String hla
String vcf
command <<<
sh ${codes_dir}/predict_neoantigen.sh ${outputDir} ${normal} ${tumor} ${codes_dir}
>>>
output{
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${tumor}.all_epitopes.netchop.txt"
}
}
task MSI{
String bed
String codes_dir
String tumor
String outputDir
String rmdupBam
command <<<
##msings
echo ${outputDir}/alignment/${tumor}.rmdup.bam >${outputDir}/MSI/bam.list
${codes_dir}/MSI/run_msings.sh ${outputDir}/MSI/bam.list
mv ${outputDir}/alignment/${tumor}.rmdup ${outputDir}/MSI/
mv ${outputDir}/alignment/msi_run_log.txt ${outputDir}/MSI/
mv ${outputDir}/alignment/Combined_MSI.txt ${outputDir}/MSI/
##msisensor2
msisensor2 msi -M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 -t ${rmdupBam} -e ${bed} -b 10 -o ${outputDir}/MSI/${tumor}.msi
>>>
output{
String target="${outputDir}/MSI/${tumor}.msi"
}
}
task hereditary{
String codes_dir
String tumor
String outputDir
String project
String snv
command <<<
python3 /dataseq/jmdna/codes/624/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${tumor}
>>>
}
task auto_report{
String tumor
String normal
String cancer
String outputDir
String codes_dir
Array[String] qc_2
Array[String] concordance
String snv_result
String cnv_result
String fusion_result
String tmb
command <<<
perl ${codes_dir}/hpd_control_umi.pl ${outputDir} ${tumor}
python3 ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} --n ${normal}
perl ${codes_dir}/indication.pl ${outputDir} ${cancer}
python3 ${codes_dir}/qc_check_control_cfdna.py ${outputDir} ${tumor} ${normal}
python3 ${codes_dir}/drug_dedup.py ${outputDir} ${tumor}
python3 ${codes_dir}/report_template/624gene_cfdna_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
python3 ${codes_dir}/wdl_check.py -o ${outputDir}
perl ${codes_dir}/file_format_change.pl ${outputDir} ${tumor}
cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/qc_fail.txt
cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/
cp ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn ${outputDir}/report/
cp ${outputDir}/cnvkit/${tumor}.cnv.png ${outputDir}/report/
cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/
cp ${outputDir}/fusion/${tumor}.fusion.reanno.vcf ${outputDir}/report/
# cp ${outputDir}/mutation/*filter* ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.snvindel.Germline.target.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.nontarget.vus.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.target.promoter.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.tmb.txt ${outputDir}/report/
cp ${outputDir}/MMR/${tumor}.mmr.pre.txt ${outputDir}/report/
cp ${outputDir}/HRR/${tumor}.hrr.pre.txt ${outputDir}/report/
cp ${outputDir}/HPD/${tumor}.hpd.pre.txt ${outputDir}/report/
cp ${outputDir}/hereditary/${tumor}.hereditary.pre.txt ${outputDir}/report/
python3 ${codes_dir}/txt_xlsx.py ${outputDir} ${tumor}
>>>
}