pipeline/other/pancancer_controlsample_682...

968 lines
28 KiB
Plaintext
Raw Normal View History

2023-08-29 17:46:31 +08:00
workflow pancancer_controlsample{
String normal
String tumor
String cancer
String project = "650gene"
String bed = "/dataseq/jmdna/database/bed/650.bed"
String bed_plus = "/dataseq/jmdna/database/bed/650plus50bp.bed"
String name
String outputDir
String need_TMB_test = "yes"
String need_MSI_test = "yes"
String need_neoantigen_test = "yes"
2023-10-10 11:09:16 +08:00
String inputDir
2023-08-29 17:46:31 +08:00
String ref = "/dataseq/jmdna/database/genome/hg19/hg19.fa"
String codes_dir = "/dataseq/jmdna/codes/pancancer_controlsample"
String accessBed = "/dataseq/jmdna/software/cnvkit-0.9.7/data/access-5k-mappable.hg19.bed"
String annotateGene = "/dataseq/jmdna/software/cnvkit-0.9.7/data/refFlat.txt"
String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz"
#创建目录
call create_dir{
input:
outputDir=outputDir,
need_neoantigen_test=need_neoantigen_test,
need_MSI_test=need_MSI_test
}
2023-10-10 11:09:16 +08:00
scatter (name in [normal,tumor]){
call qc{
2023-08-29 17:46:31 +08:00
input:
2023-10-10 11:09:16 +08:00
name=name,
inputDir=inputDir,
outputDir=outputDir
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
call alignment_bwa{
2023-08-29 17:46:31 +08:00
input:
2023-10-10 11:09:16 +08:00
name=name,
2023-08-29 17:46:31 +08:00
outputDir=outputDir,
ref=ref,
2023-10-10 11:09:16 +08:00
read1=qc.outputFile[0],
read2=qc.outputFile[1]
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
call markduplicates{
2023-08-29 17:46:31 +08:00
input:
name=name,
outputDir=outputDir,
ref=ref,
2023-10-10 11:09:16 +08:00
sortedBam=alignment_bwa.sorted
}
call generater_mpileup{
input:
name=name,
ref=ref,
bed=bed_plus,
outputDir=outputDir,
rmdupBam=markduplicates.rmdup
}
}
call mutation_calling{
2023-08-29 17:46:31 +08:00
input:
2023-10-10 11:09:16 +08:00
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
pileup=generater_mpileup.pileup
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
call annovar{
input:
tumor=tumor,
normal=normal,
outputDir=outputDir,
ref=ref,
vcf=mutation_calling.vcf
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
scatter (name in [normal,tumor]){
2023-08-29 17:46:31 +08:00
call qc_2{
input:
ref=ref,
bed=bed,
codes_dir=codes_dir,
name=name,
tumor=tumor,
normal=normal,
outputDir=outputDir,
2023-10-10 11:09:16 +08:00
anno=annovar.anno
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
}
call dealwithsnvindel{
input:
codes_dir=codes_dir,
project=project,
outputDir=outputDir,
tumor=tumor,
cancer=cancer,
anno=annovar.anno
}
call fusion{
input:
ref=ref,
codes_dir=codes_dir,
project=project,
tumor=tumor,
outputDir=outputDir,
cancer=cancer,
rmdupBam=markduplicates.rmdup
}
if (need_TMB_test=="yes"){
call TMB{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
anno=annovar.anno,
concordance=conpair.concordance,
snv_result=dealwithsnvindel.snv,
cnv_result=cnvkit.cnv,
fusion_result=fusion.fusion
}
}
if (need_MSI_test=="yes"){
call MSI{
input:
bed=bed,
tumor=tumor,
normal=normal,
outputDir=outputDir,
rmdupBam=markduplicates.rmdup
}
2023-08-29 17:46:31 +08:00
}
2023-10-10 11:09:16 +08:00
call MMR{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
snv=dealwithsnvindel.snv
}
call HRR{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
snv=dealwithsnvindel.snv
}
call chemoTherapy{
input:
codes_dir=codes_dir,
normal=normal,
tumor=tumor,
outputDir=outputDir,
ref=ref,
project=project,
rmdupBam=markduplicates.rmdup
}
call hereditary{
input:
codes_dir=codes_dir,
tumor=tumor,
outputDir=outputDir,
project=project,
snv=dealwithsnvindel.snv
}
call conpair{
input:
name=name,
codes_dir=codes_dir,
tumor=tumor,
normal=normal,
ref=ref,
outputDir=outputDir,
rmdupBam=markduplicates.rmdup
}
call cnvkit{
input:
name=name,
normal=normal,
tumor=tumor,
ref=ref,
bed=bed,
cancer=cancer,
project=project,
codes_dir=codes_dir,
outputDir=outputDir,
rmdupBam=markduplicates.rmdup,
accessBed=accessBed,
annotateGene=annotateGene,
purity=tumor_content.purity
}
call tumor_content{
input:
name=name,
ref=ref,
tumor=tumor,
normal=normal,
outputDir=outputDir,
gc_wiggle=gc_wiggle,
codes_dir=codes_dir,
pileup=generater_mpileup.pileup
}
if (need_neoantigen_test=="yes"){
call HLA{
input:
inputDir=inputDir,
outputDir=outputDir,
normal=normal,
newdir=create_dir.newdir
}
call neoantigen{
input:
codes_dir=codes_dir,
outputDir=outputDir,
name=name,
normal=normal,
tumor=tumor,
hla=HLA.hla,
vcf=mutation_calling.vcf
}
}
2023-08-29 17:46:31 +08:00
call auto_report{
input:
cancer=cancer,
codes_dir=codes_dir,
outputDir=outputDir,
name=name,
normal=normal,
2023-10-10 11:09:16 +08:00
tumor=tumor,
2023-08-29 17:46:31 +08:00
qc_2=qc_2.qc,
concordance=conpair.concordance,
snv_result=dealwithsnvindel.snv,
cnv_result=cnvkit.cnv,
fusion_result=fusion.fusion,
tmb=TMB.tmb
}
call hotspot{
input:
tumor=tumor,
outputDir=outputDir,
ref=ref,
rmdupBam=markduplicates.rmdup,
codes_dir=codes_dir
}
}
#create project directory
task create_dir{
String outputDir
String need_MSI_test
String need_neoantigen_test
command <<<
#创建目录
if [ ! -d ${outputDir} ];then
mkdir ${outputDir}
fi
#创建qc目录
if [ ! -d ${outputDir}/qc ];then
mkdir ${outputDir}/qc
fi
#创建alignment目录
if [ ! -d ${outputDir}/alignment ];then
mkdir ${outputDir}/alignment
fi
#创建mutation目录
if [ ! -d ${outputDir}/mutation ];then
mkdir ${outputDir}/mutation
fi
#创建cnv目录
if [ ! -d ${outputDir}/cnvkit ];then
mkdir ${outputDir}/cnvkit
fi
#创建report目录
if [ ! -d ${outputDir}/report ];then
mkdir -p ${outputDir}/report
fi
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
#创建chemo目录
if [ ! -d ${outputDir}/chemo ];then
mkdir ${outputDir}/chemo
fi
#创建fusion目录
if [ ! -d ${outputDir}/fusion ];then
mkdir ${outputDir}/fusion
fi
#创建MSI目录
if [ ${need_MSI_test} = "yes" ] && [ ! -d ${outputDir}/MSI ];then
mkdir ${outputDir}/MSI
fi
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
if [ ${need_neoantigen_test} = "yes" ] && [ ! -d ${outputDir}/neoantigen/HLA ];then
mkdir -p ${outputDir}/neoantigen/HLA
fi
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
#创建MMR目录
if [ ! -d ${outputDir}/MMR ];then
mkdir ${outputDir}/MMR
fi
##创建HRR目录
if [ ! -d ${outputDir}/HRR ];then
mkdir ${outputDir}/HRR
fi
#创建HPD目录
if [ ! -d ${outputDir}/HPD ];then
mkdir ${outputDir}/HPD
fi
>>>
output{
String newdir = "${outputDir}/report"
}
}
#generator raw fastq to clean fastq
task qc{
String name
String inputDir
String outputDir
command <<<
echo processing raw reads with fastp
fastp -i ${inputDir}/*_${name}_*1.fq.gz -o ${outputDir}/qc/${name}_clean_R1.fq.gz \
-I ${inputDir}/*_${name}_*2.fq.gz -O ${outputDir}/qc/${name}_clean_R2.fq.gz \
-w 10 \
--correction \
--overlap_len_require 10 \
-j ${outputDir}/qc/${name}.json \
-h ${outputDir}/qc/${name}.html --report_title ${name} -e 20
# --adapter_sequence AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA \
# --adapter_sequence_r2 AAGTCGGATCGTAGCCATGTCGTTCTGTGAGCCAAGGAGTTG \
>>>
output{
Array[String] outputFile = [
"${outputDir}/qc/${name}_clean_R1.fq.gz ",
"${outputDir}/qc/${name}_clean_R2.fq.gz",
"${outputDir}/qc/${name}.json",
"${outputDir}/qc/${name}.html"
]
}
}
#alignment clean fastq to reference
task alignment_bwa{
String name
String ref
String outputDir
String read1
String read2
command<<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
bwa mem -R '@RG\tID:group_n\tLB:library_n\tPL:BGI\tPU:unit1\tSM:${name}' -M -t 10 ${ref} \
${outputDir}/qc/${name}_clean_R1.fq.gz ${outputDir}/qc/${name}_clean_R2.fq.gz | \
samtools view -@ 5 -bh -o - | samtools sort -@ 5 -o ${outputDir}/alignment/${name}.sorted.bam
samtools index ${outputDir}/alignment/${name}.sorted.bam
>>>
output{
String sorted = "${outputDir}/alignment/${name}.sorted.bam"
}
}
2023-10-10 11:09:16 +08:00
#remove PCR duplicates
2023-08-29 17:46:31 +08:00
task markduplicates{
String name
String ref
String outputDir
String sortedBam
command<<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
java -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Xmx12G -jar $PICARD MarkDuplicates \
I=${outputDir}/alignment/${name}.sorted.bam \
O=${outputDir}/alignment/${name}.rmdup.bam \
CREATE_INDEX=true \
M=${outputDir}/alignment/${name}.rmdup.metrics.txt \
R=${ref}
>>>
output{
String rmdup = "${outputDir}/alignment/${name}.rmdup.bam"
}
}
# generater mpileup file
task generater_mpileup{
String name
String ref
String bed
String outputDir
String rmdupBam
command<<<
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${bed} \
${outputDir}/alignment/${name}.rmdup.bam -o ${outputDir}/alignment/${name}.pileup
>>>
output{
String pileup = "${outputDir}/alignment/${name}.pileup"
}
}
task mutation_calling{
String codes_dir
String tumor
Array[String] pileup
String outputDir
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
command<<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
sh ${codes_dir}/mpileup2cns.sh ${sep=" " pileup} ${outputDir} ${tumor}
java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.Somatic.hc.vcf -I ${outputDir}/mutation/${tumor}.indel.Somatic.hc.vcf \
-O ${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.Germline.vcf -I ${outputDir}/mutation/${tumor}.indel.Germline.vcf \
-O ${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict
java -jar $GATK MergeVcfs -I ${outputDir}/mutation/${tumor}.snp.LOH.hc.vcf -I ${outputDir}/mutation/${tumor}.indel.LOH.hc.vcf \
-O ${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf -D /dataseq/jmdna/database/genome/hg19/hg19.dict
>>>
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
output{
Array[String] vcf = [
"${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf",
"${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf",
"${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf"
]
}
}
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
task annovar{
String tumor
String normal
String outputDir
String ref
Array[String] vcf
command<<<
table_annovar.pl \
${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno
table_annovar.pl \
${outputDir}/mutation/${tumor}.snp.indel.Germline.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${tumor}.snp.indel.Germline.anno
table_annovar.pl \
${outputDir}/mutation/${tumor}.snp.indel.LOH.hc.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${outputDir}/mutation/${tumor}.snp.indel.LOH.anno
java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \
-R ${ref} \
-I ${outputDir}/alignment/${tumor}.rmdup.bam \
-V ${outputDir}/mutation/${tumor}.snp.indel.Somatic.hc.vcf \
-o ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf \
--annotation TandemRepeatAnnotator
# -nt 10
grep -v "^##" ${outputDir}/mutation/${tumor}.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno.hg19_multianno.txt - >${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt
>>>
output{
Array[String] anno = [
"${outputDir}/mutation/${tumor}.snp.indel.Somatic.anno.hg19_multianno.txt",
"${outputDir}/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno.txt",
"${outputDir}/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt",
]
}
}
task dealwithsnvindel{
String codes_dir
String project
String outputDir
String tumor
Array[String] anno
String cancer
command <<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
perl ${codes_dir}/pick_variant.pl ${outputDir} ${tumor}
perl ${codes_dir}/pick_mut_splice_promoter.pl ${codes_dir} ${tumor} ${outputDir} ${project}
perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String snv = "${outputDir}/mutation/${tumor}.snvindel.pos.txt"
}
}
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
task qc_2{
String ref
String codes_dir
String name
String tumor
String normal
String bed
String outputDir
#for task chain
Array[String] anno
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
command <<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
samtools flagstat -@ 10 ${outputDir}/alignment/${name}.rmdup.bam >${outputDir}/qc/${name}.rmdup.flagstat
samtools stats --reference ${ref} -t ${bed} -@ 10 ${outputDir}/alignment/${name}.rmdup.bam > ${outputDir}/alignment/${name}.stat
mkdir ${outputDir}/qc/${name}_bamdst
bamdst -p ${bed} -o ${outputDir}/qc/${name}_bamdst ${outputDir}/alignment/${name}.rmdup.bam
Rscript ${codes_dir}/InsertAndDepthStat.R ${outputDir}/qc/${name}_InsertAndDepthStat ${outputDir}/qc/${name}_bamdst/insertsize.plot ${outputDir}/qc/${name}_bamdst/depth_distribution.plot
2023-10-10 11:09:16 +08:00
python3 ${codes_dir}/sample_post.py -s ${tumor} -o ${outputDir} --n ${normal}
2023-08-29 17:46:31 +08:00
python3 ${codes_dir}/qc_stat.py ${outputDir}/qc/${name}.json ${outputDir}/qc/${name}_bamdst/coverage.report ${outputDir}/qc/${name}_bamdst/depth_distribution.plot ${outputDir} ${name} ${tumor}
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
>>>
output{
String qc = "${outputDir}/qc/${name}_qc.txt"
}
}
task fusion{
String ref
String codes_dir
String tumor
String outputDir
String cancer
String project
#for task chain
Array[String] rmdupBam
command<<<
# Extract the discordant paired-end alignments.
samtools view -b -F 1294 ${outputDir}/alignment/${tumor}.rmdup.bam > ${outputDir}/fusion/${tumor}.discordants.bam
# Extract the split-read alignments
samtools view -h ${outputDir}/alignment/${tumor}.rmdup.bam \
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
| samtools view -Sb - \
> ${outputDir}/fusion/${tumor}.splitters.bam
lumpyexpress \
-B ${outputDir}/alignment/${tumor}.rmdup.bam \
-S ${outputDir}/fusion/${tumor}.splitters.bam \
-D ${outputDir}/fusion/${tumor}.discordants.bam \
-o ${outputDir}/fusion/${tumor}.fusion.vcf
perl ${codes_dir}/fusion.filter.pl ${outputDir}/fusion/${tumor}.fusion.vcf ${outputDir}/fusion/${tumor}.fusion.filter.vcf
svtyper \
-B ${outputDir}/alignment/${tumor}.rmdup.bam \
-i ${outputDir}/fusion/${tumor}.fusion.filter.vcf \
-T ${ref} \
-o ${outputDir}/fusion/${tumor}.fusion.gt.vcf
table_annovar.pl \
${outputDir}/fusion/${tumor}.fusion.gt.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-operation g \
--outfile ${outputDir}/fusion/${tumor}.fusion
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
perl ${codes_dir}/fusion.reanno.pl ${outputDir}/qc/${tumor}_bamdst/depth.tsv.gz ${outputDir} ${tumor}
perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String fusion = "${outputDir}/fusion/${tumor}.fusion.pos.txt"
}
}
task chemoTherapy{
String codes_dir
String normal
String tumor
String outputDir
String ref
String project
Array[String] rmdupBam
command <<<
# perl ${codes_dir}/chemo/singlecancer_chemo_2.pl ${codes_dir} ${outputDir} ${normal} ${ref} ${project}
${codes_dir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal}
>>>
}
task hereditary{
String codes_dir
String tumor
String outputDir
String project
String snv
command <<<
${codes_dir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${tumor}
>>>
}
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
task conpair{
String name
String tumor
String normal
String outputDir
String codes_dir
String ref
#for task chain
Array[String] rmdupBam
command <<<
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${outputDir}/alignment/${normal}.rmdup.bam \
-O ${outputDir}/alignment/${normal}.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \
-M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \
-B ${outputDir}/alignment/${tumor}.rmdup.bam \
-O ${outputDir}/alignment/${tumor}.gatk.mpileup \
-R ${ref} \
-G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
sed -i 's/^chr//g' ${outputDir}/alignment/${normal}.gatk.mpileup
sed -i 's/^chr//g' ${outputDir}/alignment/${tumor}.gatk.mpileup
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
-H \
-O ${outputDir}/qc/${name}_concordance.txt
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \
-T ${outputDir}/alignment/${tumor}.gatk.mpileup \
-N ${outputDir}/alignment/${normal}.gatk.mpileup \
2023-10-10 11:09:16 +08:00
-O ${outputDir}/qc/${name}_contamination.txt
2023-08-29 17:46:31 +08:00
>>>
output{
Array[String] concordance = [
"${outputDir}/report/qc/${name}_concordance.txt",
"${outputDir}/report/qc/${name}_contamination.txt",
]
}
}
task MSI{
String bed
String tumor
String normal
String outputDir
Array[String] rmdupBam
command <<<
msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list -n ${outputDir}/alignment/${normal}.rmdup.bam -t ${outputDir}/alignment/${tumor}.rmdup.bam -e ${bed} -b 10 -o ${outputDir}/MSI/${tumor}.msi
>>>
output{
String target="${outputDir}/MSI/${tumor}.msi"
}
}
task MMR{
String codes_dir
String tumor
String outputDir
String snv
command <<<
perl ${codes_dir}/mmr_controlsample.pl ${outputDir} ${tumor}
>>>
output{
String mmr_out = "${outputDir}/MMR/${tumor}_mmr.txt"
}
}
task HRR{
String codes_dir
String tumor
String outputDir
String snv
command <<<
perl ${codes_dir}/hrr_controlsample_tissue.pl ${outputDir} ${tumor}
>>>
output{
String mmr_out = "${outputDir}/HRR/${tumor}_hrr.txt"
}
}
task TMB{
String codes_dir
String tumor
String outputDir
Array[String] anno
Array[String] concordance
String snv_result
String cnv_result
String fusion_result
2023-10-10 11:09:16 +08:00
command <<<
2023-08-29 17:46:31 +08:00
perl ${codes_dir}/tmb.pl ${outputDir} ${tumor}
>>>
output{
String tmb="${outputDir}/mutation/${tumor}.tmb.txt"
}
}
task cnvkit{
String ref
String bed
String name
String tumor
String normal
String outputDir
String cancer
String accessBed
2023-10-10 11:09:16 +08:00
String annotateGene
2023-08-29 17:46:31 +08:00
String codes_dir
String project
#for task chain
Array[String] rmdupBam
String purity
command <<<
echo run cnvkit batch to processing cnv calling
cnvkit.py batch \
${outputDir}/alignment/${tumor}.rmdup.bam \
--normal ${outputDir}/alignment/${normal}.rmdup.bam \
--targets ${bed} \
--fasta ${ref} \
--access ${accessBed} \
--output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \
--annotate ${annotateGene} \
--drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit
cnvkit.py scatter \
${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \
--y-max 3 --y-min -3 \
--title ${tumor}.cns \
-o ${outputDir}/cnvkit/${tumor}.cnv.png
if [ -e "${outputDir}/qc/sequenza/${name}_confints_CP.txt" ]; then
# absolute copy number
cnvkit.py call \
-m clonal \
${outputDir}/cnvkit/${tumor}.rmdup.cns \
-y \
--purity `head -n2 ${outputDir}/qc/sequenza/${name}_confints_CP.txt |tail -n1|cut -f1` \
--drop-low-coverage \
--filter ampdel \
-o ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn.hc
2023-10-10 11:09:16 +08:00
fi
perl ${codes_dir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn
2023-08-29 17:46:31 +08:00
perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codes_dir} ${tumor} ${outputDir} ${project} ${cancer}
>>>
output{
String cnv = "${outputDir}/cnvkit/${tumor}.rmdup.cns"
}
}
task tumor_content{
String name
String ref
String tumor
String normal
String outputDir
String gc_wiggle
String codes_dir
#for task chain
Array[String] pileup
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
command <<<
sequenza-utils bam2seqz -p -gc ${gc_wiggle} \
-F ${ref} \
-n ${outputDir}/alignment/${normal}.pileup \
-t ${outputDir}/alignment/${tumor}.pileup | gzip >${outputDir}/qc/target_${name}.200base.seqz.gz
sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz | gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz
Rscript ${codes_dir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!"
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
# Rscript ${codes_dir}/qc_stat.R ${outputDir} ${name}
# cp ${outputDir}/qc/${name}_qcstat.txt ${outputDir}/report/qc/${name}_qcstat.txt
>>>
output{
String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf"
}
}
task HLA{
String inputDir
String outputDir
String normal
String newdir
command <<<
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*1.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq
2023-10-10 11:09:16 +08:00
rm ${outputDir}/neoantigen/HLA/fished_1.bam
2023-08-29 17:46:31 +08:00
razers3 -tc 10 -i 95 -m 1 -dr 0 -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta ${inputDir}/*_${normal}_*2.fq.gz
samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq
rm ${outputDir}/neoantigen/HLA/fished_2.bam
/dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/
>>>
output{
String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv"
}
}
task neoantigen{
String codes_dir
String outputDir
String name
String normal
String tumor
String hla
2023-10-10 11:09:16 +08:00
Array[String] vcf
2023-08-29 17:46:31 +08:00
command <<<
2023-10-10 11:09:16 +08:00
sh /home/jm001/test_duantao/database_update/test_project/t20230818/predict_neoantigen.sh ${outputDir} ${name} ${tumor} ${codes_dir}
2023-08-29 17:46:31 +08:00
>>>
output{
String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${tumor}.all_epitopes.netchop.txt"
}
}
task auto_report{
String cancer
String codes_dir
String outputDir
String name
String normal
String tumor
Array[String] qc_2
Array[String] concordance
# String purity
String snv_result
String cnv_result
2023-10-10 11:09:16 +08:00
String fusion_result
2023-08-29 17:46:31 +08:00
String tmb
command <<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer}
python3 ${codes_dir}/qc_check_tissue.py ${outputDir} ${tumor} ${normal} ${name}
python3 ${codes_dir}/drug_dedup.py ${outputDir} ${tumor}
perl ${codes_dir}/hpd.pl ${outputDir} ${tumor}
python3 ${codes_dir}/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer}
python3 ${codes_dir}/wdl_check.py -o ${outputDir}
perl ${codes_dir}/file_format_change.pl ${outputDir} ${tumor}
# cp ${outputDir}/qc/sequenza/${name}_confints_CP.txt ${outputDir}/report/${name}_confints_CP.txt
cp ${outputDir}/qc/qc_fail.txt ${outputDir}/report/qc_fail.txt
cp ${outputDir}/qc/${tumor}_qc.txt ${outputDir}/report/
cp ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn ${outputDir}/report/
cp ${outputDir}/cnvkit/${tumor}.cnv.png ${outputDir}/report/
cp ${outputDir}/fusion/${tumor}.fuison.vus.txt ${outputDir}/report/
cp ${outputDir}/fusion/${tumor}.fusion.reanno.vcf ${outputDir}/report/
# cp ${outputDir}/mutation/*filter* ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.snvindel.Germline.target.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.nontarget.vus.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.snvindel.vus.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.target.splicing.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.target.promoter.txt ${outputDir}/report/
cp ${outputDir}/mutation/${tumor}.tmb.txt ${outputDir}/report/
2023-10-10 11:09:16 +08:00
cp ${outputDir}/MMR/${tumor}.mmr.pre.txt ${outputDir}/report/
2023-08-29 17:46:31 +08:00
cp ${outputDir}/HRR/${tumor}.hrr.pre.txt ${outputDir}/report/
cp ${outputDir}/HPD/${tumor}.hpd.pre.txt ${outputDir}/report/
cp ${outputDir}/hereditary/${tumor}.hereditary.pre.txt ${outputDir}/report/
python3 ${codes_dir}/txt_xlsx.py ${outputDir} ${tumor}
>>>
}
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
# hotspot
task hotspot{
String tumor
String outputDir
String ref
Array[String] rmdupBam
String codes_dir
command<<<
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
mkdir -p ${outputDir}/mutation/hotspot
samtools mpileup -Bq 20 -Q 20 -f ${ref} -l ${codes_dir}/hotspot.bed ${outputDir}/alignment/${tumor}.rmdup.bam -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup
java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.005 --min-avg-qual 20 --output-vcf 1 --variants 1 --p-value 0.99 --min-reads2 2 --strand-filter 0 > ${outputDir}/mutation/hotspot/${tumor}.hotspot.L.snp.indel.vcf
java -jar $VARSCAN mpileup2cns ${outputDir}/mutation/hotspot/${tumor}.hotspot.pileup --min-var-freq 0.01 --min-avg-qual 20 --output-vcf 1 --variants 1 --p-value 0.05 --min-reads2 3 --strand-filter 1 > ${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf
2023-10-10 11:09:16 +08:00
perl ${codes_dir}/hotspot.hvl.pl ${outputDir} ${tumor}
2023-08-29 17:46:31 +08:00
if [ -e "${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf" ]; then
table_annovar.pl \
${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-argument '-hgvs' \
-operation g \
--outfile ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno
# java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \
# -R ${ref} \
# -I ${outputDir}/alignment/${tumor}.rmdup.bam \
# -V ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.vcf \
# -o ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf \
# --annotation TandemRepeatAnnotator
2023-10-10 11:09:16 +08:00
# grep -v "^##" ${outputDir}/mutation/hotspot/${tumor}.hotspot.TandemRepeatAnnotator.vcf | cut -f8| paste ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt - >${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.Somatic.annoall.hg19_multianno.txt
2023-08-29 17:46:31 +08:00
# cp ${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt ${outputDir}/report/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
perl ${codes_dir}/hotspot.filter.pl ${outputDir} ${tumor}
fi
2023-10-10 11:09:16 +08:00
2023-08-29 17:46:31 +08:00
>>>
output{
String hotspot = "${outputDir}/mutation/hotspot/${tumor}.hotspot.H.snp.indel.vcf"
}
}