Difference between revisions of "Tassel 5 GBS v2 Pipeline sample script for calling SNPs using reference genome"
From Poland Lab Wiki
(17 intermediate revisions by the same user not shown) | |||
Line 15: | Line 15: | ||
</blockquote> | </blockquote> | ||
− | * Once the Keyfile and shell script are ready, put them in <code>jobs</code> folder and run <code> | + | * Once the Keyfile and shell script are ready, put them in <code>jobs</code> folder and run <code> sbatch projectName.sh </code> |
<syntaxhighlight lang="bash"> | <syntaxhighlight lang="bash"> | ||
− | #!/bin/bash | + | #!/bin/bash -l |
− | # Update the user and name variables, | + | # Update beocat resources request |
+ | #SBATCH --mem-per-cpu=64G # Memory per core, use --mem= for memory per node | ||
+ | #SBATCH --time=02-00:00:00 # Use the form DD-HH:MM:SS | ||
+ | #SBATCH --mail-user=eID@ksu.edu --mail-type=ALL # same as =BEGIN,FAIL,END | ||
+ | #SBATCH --job-name=projectName | ||
+ | |||
+ | # Update the 'user' and 'name' variables, 'eID' is your K-State eID. | ||
user=eID | user=eID | ||
name=projectName | name=projectName | ||
− | |||
− | # | + | ## ####################################### |
− | # | + | ## NO NEED TO CHANGE ANYTHING FROM HERE ON |
+ | ## ####################################### | ||
− | + | # Load Java module and set JAVA VM Version to 1.8 (if default is different) | |
− | + | module load Java | |
keyFile=/homes/$user/gbs/jobs/${name}.txt | keyFile=/homes/$user/gbs/jobs/${name}.txt | ||
seqDir=/bulk/jpoland/sequence | seqDir=/bulk/jpoland/sequence | ||
− | dbPath=/bulk/ | + | dbPath=/bulk/nss470/genomes/CS_NRGene/pseudomolecules_v1.0/161010_Chinese_Spring_v1.0_pseudomolecules_parts |
tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl | tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl | ||
Line 44: | Line 50: | ||
# Path for required software | # Path for required software | ||
export PATH=$PATH:/homes/nss470/usr/bin:/homes/nss470/usr/bin/bin | export PATH=$PATH:/homes/nss470/usr/bin:/homes/nss470/usr/bin/bin | ||
− | |||
− | |||
− | |||
## GBSSeqToTagDBPlugin - RUN Tags to DB | ## GBSSeqToTagDBPlugin - RUN Tags to DB | ||
Line 59: | Line 62: | ||
$tasselPath -fork1 -TagExportToFastqPlugin \ | $tasselPath -fork1 -TagExportToFastqPlugin \ | ||
-db ${name}.db \ | -db ${name}.db \ | ||
− | -o ${name}_tagsForAlign. | + | -o ${name}_tagsForAlign.fa.gz -c 10 \ |
-endPlugin -runfork1 >> z_pipeline.out | -endPlugin -runfork1 >> z_pipeline.out | ||
Line 65: | Line 68: | ||
bowtie2 -p 20 --end-to-end \ | bowtie2 -p 20 --end-to-end \ | ||
-x $dbPath \ | -x $dbPath \ | ||
− | -U ${name}_tagsForAlign. | + | -U ${name}_tagsForAlign.fa.gz \ |
-S ${name}.sam >> z_pipeline.out | -S ${name}.sam >> z_pipeline.out | ||
Line 93: | Line 96: | ||
-endPlugin -runfork1 >> z_pipeline.out | -endPlugin -runfork1 >> z_pipeline.out | ||
− | ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output . | + | ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .vcf |
$tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ | ||
-db ${name}.db \ | -db ${name}.db \ | ||
-i $seqDir \ | -i $seqDir \ | ||
-k $keyFile \ | -k $keyFile \ | ||
− | -o ${name}. | + | -o ${name}.vcf \ |
-e PstI-MspI -kmerLength 64 \ | -e PstI-MspI -kmerLength 64 \ | ||
-endPlugin -runfork1 >> z_pipeline.out | -endPlugin -runfork1 >> z_pipeline.out | ||
## Convert to Hapmap format | ## Convert to Hapmap format | ||
− | $tasselPath -Xms64G -Xmx64G -fork1 - | + | $tasselPath -Xms64G -Xmx64G -fork1 -vcf ${name}.vcf \ |
-export ${name} -exportType Hapmap | -export ${name} -exportType Hapmap | ||
Line 110: | Line 113: | ||
− | If everything ran correctly, you should get a folder named ''' | + | If everything ran correctly, you should get a folder named '''projectName''' in your <code>gbs/projects</code> folder with all the output files including SNPs containing '''projectName.vcf''' and '''projectName.hmp.txt''' files that can be used for further analyses |
If you have any question or it doesn't work, please contact me [mailto:nss470@ksu.edu?Subject=TASSEL5%20GBS%20v2%20pipeline%20question here]. | If you have any question or it doesn't work, please contact me [mailto:nss470@ksu.edu?Subject=TASSEL5%20GBS%20v2%20pipeline%20question here]. |
Latest revision as of 20:01, 14 March 2018
For this script to work properly, you should have gbs
folder in your home directory, and jobs
and projects
folders inside gbs
- Once you have the full script copied in a simple text file with extension
.sh
, thename
variable should be replaced with something more explicit, such as your project name.
- Keyfile should have these 4 mandatory headers: 'Flowcell' 'Lane' 'Barcode' 'FullSampleName'
To keep everything organized, I recommend using the same base name for the project and the files, for example
name
= projectNameKeyfile name = projectName.txt
Shell script name = projectName.sh
- Once the Keyfile and shell script are ready, put them in
jobs
folder and runsbatch projectName.sh
#!/bin/bash -l # Update beocat resources request #SBATCH --mem-per-cpu=64G # Memory per core, use --mem= for memory per node #SBATCH --time=02-00:00:00 # Use the form DD-HH:MM:SS #SBATCH --mail-user=eID@ksu.edu --mail-type=ALL # same as =BEGIN,FAIL,END #SBATCH --job-name=projectName # Update the 'user' and 'name' variables, 'eID' is your K-State eID. user=eID name=projectName ## ####################################### ## NO NEED TO CHANGE ANYTHING FROM HERE ON ## ####################################### # Load Java module and set JAVA VM Version to 1.8 (if default is different) module load Java keyFile=/homes/$user/gbs/jobs/${name}.txt seqDir=/bulk/jpoland/sequence dbPath=/bulk/nss470/genomes/CS_NRGene/pseudomolecules_v1.0/161010_Chinese_Spring_v1.0_pseudomolecules_parts tasselPath=/homes/nss470/softwares/tassel5/run_pipeline.pl mkdir /homes/$user/gbs/projects/${name} mkdir /homes/$user/gbs/projects/${name}/keyFileSh cd /homes/$user/gbs/projects/${name} # Path for required software export PATH=$PATH:/homes/nss470/usr/bin:/homes/nss470/usr/bin/bin ## GBSSeqToTagDBPlugin - RUN Tags to DB $tasselPath -Xms64G -Xmx64G -fork1 -GBSSeqToTagDBPlugin -e PstI-MspI \ -i $seqDir \ -db ${name}.db \ -k $keyFile \ -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 250000000 \ -endPlugin -runfork1 >> z_pipeline.out ## TagExportToFastqPlugin - export Tags $tasselPath -fork1 -TagExportToFastqPlugin \ -db ${name}.db \ -o ${name}_tagsForAlign.fa.gz -c 10 \ -endPlugin -runfork1 >> z_pipeline.out ## RUN BOWTIE bowtie2 -p 20 --end-to-end \ -x $dbPath \ -U ${name}_tagsForAlign.fa.gz \ -S ${name}.sam >> z_pipeline.out ## SAMToGBSdbPlugin - SAM to DB $tasselPath -Xms64G -Xmx64G -fork1 -SAMToGBSdbPlugin \ -i ${name}.sam \ -db ${name}.db \ -aProp 0.0 -aLen 0 \ -endPlugin -runfork1 >> z_pipeline.out ## DiscoverySNPCallerPluginV2 - RUN DISCOVERY SNP CALLER $tasselPath -Xms64G -Xmx64G -fork1 -DiscoverySNPCallerPluginV2 \ -db ${name}.db \ -mnLCov 0.1 -mnMAF 0.01 -deleteOldData true \ -endPlugin -runfork1 >> z_pipeline.out ## SNPQualityProfilerPlugin - RUN QUALITY PROFILER $tasselPath -Xms64G -Xmx64G -fork1 -SNPQualityProfilerPlugin \ -db ${name}.db \ -statFile ${name}_SNPqual_stats.txt \ -endPlugin -runfork1 >> z_pipeline.out ## UpdateSNPPositionQualityPlugin - UPDATE DATABASE WITH QUALITY SCORE $tasselPath -Xms64G -Xmx64G -fork1 -UpdateSNPPositionQualityPlugin \ -db ${name}.db \ -qsFile ${name}_SNPqual_stats.txt \ -endPlugin -runfork1 >> z_pipeline.out ## ProductionSNPCallerPluginV2 - RUN PRODUCTION PIPELINE - output .vcf $tasselPath -Xms64G -Xmx64G -fork1 -ProductionSNPCallerPluginV2 \ -db ${name}.db \ -i $seqDir \ -k $keyFile \ -o ${name}.vcf \ -e PstI-MspI -kmerLength 64 \ -endPlugin -runfork1 >> z_pipeline.out ## Convert to Hapmap format $tasselPath -Xms64G -Xmx64G -fork1 -vcf ${name}.vcf \ -export ${name} -exportType Hapmap mv /homes/$user/gbs/jobs/${name}.* keyFileSh/
If everything ran correctly, you should get a folder named projectName in your gbs/projects
folder with all the output files including SNPs containing projectName.vcf and projectName.hmp.txt files that can be used for further analyses
If you have any question or it doesn't work, please contact me here.