Difference between revisions of "Tassel 5 GBS v2 Pipeline sample script for calling SNPs using reference genome"
Line 1: | Line 1: | ||
==== For this script to work properly, you should have <code>gbs</code> folder in your home directory, and <code>jobs</code> and <code>projects</code> folders inside <code>gbs</code> ==== | ==== For this script to work properly, you should have <code>gbs</code> folder in your home directory, and <code>jobs</code> and <code>projects</code> folders inside <code>gbs</code> ==== | ||
− | + | Once you have the full script copied in a simple text file with extension <code>.sh</code>, the <code>name</code> variable should be replaced with something more explicit, such as your project name. | |
+ | |||
+ | Keyfile should have these 4 mandatory headers: 'Flowcell' 'Lane' 'Barcode' 'FullSampleName' | ||
<blockquote> | <blockquote> | ||
− | '''''To keep everything organized, I recommend using the same name for the project and the files, for example''''' | + | '''''To keep everything organized, I recommend using the same base name for the project and the files, for example''''' |
− | + | <code>name</code> - projectName | |
− | Keyfile - | + | Keyfile name - projectName.txt |
− | Shell script - | + | Shell script name - projectName.sh |
</blockquote> | </blockquote> | ||
− | + | Once the Keyfile and shell script are ready, put them in <code>jobs</code> folder and run <code> qsub projectName.sh </code> | |
− | |||
− | |||
− | |||
<syntaxhighlight lang="bash"> | <syntaxhighlight lang="bash"> | ||
#!/bin/bash | #!/bin/bash | ||
− | |||
− | |||
− | + | # Update the user and name variables, user is your K-State eID | |
− | + | ||
− | + | ||
user=eID | user=eID | ||
− | + | name=projectName | |
+ | # Update beocat resources request | ||
+ | #$ -l h_rt=72:00:00 -l mem=64G -cwd | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
+ | ## NO NEED TO CHANGE ANYTHING FROM HERE ON ## | ||
− | + | keyFile=/homes/$user/gbs/jobs/${name}.txt | |
− | + | ||
− | + | ||
− | keyFile=/homes/$user/gbs/jobs/name.txt | + | |
− | + | ||
− | |||
− | |||
− | |||
seqDir=/bulk/jpoland/sequence | seqDir=/bulk/jpoland/sequence | ||
− | |||
− | |||
− | |||
− | |||
− | |||
dbPath=/bulk/jpoland/genome/ChineseSpring/pseudomolecules_v1.0/161010_Chinese_Spring_v1.0_pseudomolecules_parts | dbPath=/bulk/jpoland/genome/ChineseSpring/pseudomolecules_v1.0/161010_Chinese_Spring_v1.0_pseudomolecules_parts | ||
− | |||
− | |||
− | |||
− | |||
− | |||
tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl | tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl | ||
− | |||
+ | mkdir /homes/$user/gbs/projects/${name} | ||
+ | mkdir /homes/$user/gbs/projects/${name}/keyFileSh | ||
+ | cd /homes/$user/gbs/projects/${name} | ||
− | + | # Path for required software | |
− | + | export PATH=$PATH:/homes/nss470/usr/bin:/homes/nss470/usr/bin/bin | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
+ | # Set JAVA VM Version | ||
+ | eselect java-vm set user 5 | ||
− | |||
− | |||
## GBSSeqToTagDBPlugin - RUN Tags to DB | ## GBSSeqToTagDBPlugin - RUN Tags to DB | ||
$tasselPath -Xms64G -Xmx64G -fork1 -GBSSeqToTagDBPlugin -e PstI-MspI \ | $tasselPath -Xms64G -Xmx64G -fork1 -GBSSeqToTagDBPlugin -e PstI-MspI \ | ||
-i $seqDir \ | -i $seqDir \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
-k $keyFile \ | -k $keyFile \ | ||
-kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 250000000 \ | -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 250000000 \ | ||
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_GBSSeqToTagDB.out |
## TagExportToFastqPlugin - export Tags | ## TagExportToFastqPlugin - export Tags | ||
$tasselPath -fork1 -TagExportToFastqPlugin \ | $tasselPath -fork1 -TagExportToFastqPlugin \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
− | -o | + | -o ${name}_tagsForAlign.fq -c 1 \ |
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_TagExportToFastq.out |
## RUN BOWTIE | ## RUN BOWTIE | ||
bowtie2 -p 15 --very-sensitive-local \ | bowtie2 -p 15 --very-sensitive-local \ | ||
-x $dbPath \ | -x $dbPath \ | ||
− | -U | + | -U ${name}_tagsForAlign.fq \ |
− | -S name.sam >> | + | -S ${name}.sam >> z_${name}_bowtie2.out |
## SAMToGBSdbPlugin - SAM to DB | ## SAMToGBSdbPlugin - SAM to DB | ||
$tasselPath -Xms64G -Xmx64G -fork1 -SAMToGBSdbPlugin \ | $tasselPath -Xms64G -Xmx64G -fork1 -SAMToGBSdbPlugin \ | ||
− | -i name.sam \ | + | -i ${name}.sam \ |
− | -db name.db \ | + | -db ${name}.db \ |
-aProp 0.0 -aLen 0 \ | -aProp 0.0 -aLen 0 \ | ||
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_SAMToGBSdb.out |
## DiscoverySNPCallerPluginV2 - RUN DISCOVERY SNP CALLER | ## DiscoverySNPCallerPluginV2 - RUN DISCOVERY SNP CALLER | ||
$tasselPath -Xms64G -Xmx64G -fork1 -DiscoverySNPCallerPluginV2 \ | $tasselPath -Xms64G -Xmx64G -fork1 -DiscoverySNPCallerPluginV2 \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
-mnLCov 0.1 -mnMAF 0.01 -deleteOldData true \ | -mnLCov 0.1 -mnMAF 0.01 -deleteOldData true \ | ||
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_DiscoverySNPCaller.out |
## SNPQualityProfilerPlugin - RUN QUALITY PROFILER | ## SNPQualityProfilerPlugin - RUN QUALITY PROFILER | ||
$tasselPath -Xms64G -Xmx64G -fork1 -SNPQualityProfilerPlugin \ | $tasselPath -Xms64G -Xmx64G -fork1 -SNPQualityProfilerPlugin \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
− | -statFile | + | -statFile ${name}_SNPqual_stats.txt \ |
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_SNPQualityProfiler.out |
## UpdateSNPPositionQualityPlugin - UPDATE DATABASE WITH QUALITY SCORE | ## UpdateSNPPositionQualityPlugin - UPDATE DATABASE WITH QUALITY SCORE | ||
$tasselPath -Xms64G -Xmx64G -fork1 -UpdateSNPPositionQualityPlugin \ | $tasselPath -Xms64G -Xmx64G -fork1 -UpdateSNPPositionQualityPlugin \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
− | -qsFile | + | -qsFile ${name}_SNPqual_stats.txt \ |
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_UpdateSNPPositionQuality.out |
## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .h5 | ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .h5 | ||
$tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
-i $seqDir \ | -i $seqDir \ | ||
-k $keyFile \ | -k $keyFile \ | ||
− | -o name.h5 \ | + | -o ${name}.h5 \ |
-e PstI-MspI -kmerLength 64 \ | -e PstI-MspI -kmerLength 64 \ | ||
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_ProductionSNPCaller.out |
## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .vcf | ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .vcf | ||
$tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | ||
− | -db name.db \ | + | -db ${name}.db \ |
-i $seqDir \ | -i $seqDir \ | ||
-k $keyFile \ | -k $keyFile \ | ||
− | -o name.vcf \ | + | -o ${name}.vcf \ |
-e PstI-MspI -kmerLength 64 \ | -e PstI-MspI -kmerLength 64 \ | ||
− | -endPlugin -runfork1 >> | + | -endPlugin -runfork1 >> z_${name}_ProductionSNPCaller.out |
## Convert to Hapmap format | ## Convert to Hapmap format | ||
− | $tasselPath -Xms64G -Xmx64G -fork1 -vcf name.vcf \ | + | $tasselPath -Xms64G -Xmx64G -fork1 -vcf ${name}.vcf \ |
− | -export name -exportType Hapmap | + | -export ${name} -exportType Hapmap |
− | + | ||
− | |||
− | |||
− | |||
mv /homes/$user/gbs/jobs/name.* keyFileSh/ | mv /homes/$user/gbs/jobs/name.* keyFileSh/ | ||
</syntaxhighlight> | </syntaxhighlight> |
Revision as of 22:13, 24 October 2016
For this script to work properly, you should have gbs
folder in your home directory, and jobs
and projects
folders inside gbs
Once you have the full script copied in a simple text file with extension .sh
, the name
variable should be replaced with something more explicit, such as your project name.
Keyfile should have these 4 mandatory headers: 'Flowcell' 'Lane' 'Barcode' 'FullSampleName'
To keep everything organized, I recommend using the same base name for the project and the files, for example
name
- projectNameKeyfile name - projectName.txt
Shell script name - projectName.sh
Once the Keyfile and shell script are ready, put them in jobs
folder and run qsub projectName.sh
#!/bin/bash # Update the user and name variables, user is your K-State eID user=eID name=projectName # Update beocat resources request #$ -l h_rt=72:00:00 -l mem=64G -cwd ## NO NEED TO CHANGE ANYTHING FROM HERE ON ## keyFile=/homes/$user/gbs/jobs/${name}.txt seqDir=/bulk/jpoland/sequence dbPath=/bulk/jpoland/genome/ChineseSpring/pseudomolecules_v1.0/161010_Chinese_Spring_v1.0_pseudomolecules_parts tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl mkdir /homes/$user/gbs/projects/${name} mkdir /homes/$user/gbs/projects/${name}/keyFileSh cd /homes/$user/gbs/projects/${name} # Path for required software export PATH=$PATH:/homes/nss470/usr/bin:/homes/nss470/usr/bin/bin # Set JAVA VM Version eselect java-vm set user 5 ## GBSSeqToTagDBPlugin - RUN Tags to DB $tasselPath -Xms64G -Xmx64G -fork1 -GBSSeqToTagDBPlugin -e PstI-MspI \ -i $seqDir \ -db ${name}.db \ -k $keyFile \ -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 250000000 \ -endPlugin -runfork1 >> z_${name}_GBSSeqToTagDB.out ## TagExportToFastqPlugin - export Tags $tasselPath -fork1 -TagExportToFastqPlugin \ -db ${name}.db \ -o ${name}_tagsForAlign.fq -c 1 \ -endPlugin -runfork1 >> z_${name}_TagExportToFastq.out ## RUN BOWTIE bowtie2 -p 15 --very-sensitive-local \ -x $dbPath \ -U ${name}_tagsForAlign.fq \ -S ${name}.sam >> z_${name}_bowtie2.out ## SAMToGBSdbPlugin - SAM to DB $tasselPath -Xms64G -Xmx64G -fork1 -SAMToGBSdbPlugin \ -i ${name}.sam \ -db ${name}.db \ -aProp 0.0 -aLen 0 \ -endPlugin -runfork1 >> z_${name}_SAMToGBSdb.out ## DiscoverySNPCallerPluginV2 - RUN DISCOVERY SNP CALLER $tasselPath -Xms64G -Xmx64G -fork1 -DiscoverySNPCallerPluginV2 \ -db ${name}.db \ -mnLCov 0.1 -mnMAF 0.01 -deleteOldData true \ -endPlugin -runfork1 >> z_${name}_DiscoverySNPCaller.out ## SNPQualityProfilerPlugin - RUN QUALITY PROFILER $tasselPath -Xms64G -Xmx64G -fork1 -SNPQualityProfilerPlugin \ -db ${name}.db \ -statFile ${name}_SNPqual_stats.txt \ -endPlugin -runfork1 >> z_${name}_SNPQualityProfiler.out ## UpdateSNPPositionQualityPlugin - UPDATE DATABASE WITH QUALITY SCORE $tasselPath -Xms64G -Xmx64G -fork1 -UpdateSNPPositionQualityPlugin \ -db ${name}.db \ -qsFile ${name}_SNPqual_stats.txt \ -endPlugin -runfork1 >> z_${name}_UpdateSNPPositionQuality.out ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .h5 $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ -db ${name}.db \ -i $seqDir \ -k $keyFile \ -o ${name}.h5 \ -e PstI-MspI -kmerLength 64 \ -endPlugin -runfork1 >> z_${name}_ProductionSNPCaller.out ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .vcf $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ -db ${name}.db \ -i $seqDir \ -k $keyFile \ -o ${name}.vcf \ -e PstI-MspI -kmerLength 64 \ -endPlugin -runfork1 >> z_${name}_ProductionSNPCaller.out ## Convert to Hapmap format $tasselPath -Xms64G -Xmx64G -fork1 -vcf ${name}.vcf \ -export ${name} -exportType Hapmap mv /homes/$user/gbs/jobs/name.* keyFileSh/
If everything ran correctly, you should get a folder named hessian_F3 in your gbs/projects
folder with all the output files including SNPs containing .vcf and .hmp.txt files that can be used for further analyses