Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
7657016c
Commit
7657016c
authored
Jun 20, 2016
by
Peter van 't Hof
Browse files
Added open reference method
parent
7c40ad23
Changes
4
Hide whitespace changes
Inline
Side-by-side
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOpenReferenceOtus.scala
0 → 100644
View file @
7657016c
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.qiime
import
java.io.File
import
nl.lumc.sasc.biopet.core.
{
BiopetCommandLineFunction
,
Version
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* Created by pjvan_thof on 12/4/15.
*/
class
PickOpenReferenceOtus
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
with
Version
{
executable
=
config
(
"exe"
,
default
=
"pick_open_reference_otus.py"
)
@Input
(
required
=
true
)
var
inputFasta
:
File
=
_
var
outputDir
:
File
=
null
override
def
defaultThreads
=
1
override
def
defaultCoreMemory
=
20.0
def
versionCommand
=
executable
+
" --version"
def
versionRegex
=
"""Version: (.*)"""
.
r
@Input
(
required
=
false
)
var
parameterFp
:
Option
[
File
]
=
config
(
"parameter_fp"
)
@Input
(
required
=
false
)
var
referenceFp
:
Option
[
File
]
=
config
(
"reference_fp"
)
@Input
(
required
=
false
)
var
taxonomyFp
:
Option
[
File
]
=
config
(
"taxonomy_fp"
)
var
force
:
Boolean
=
config
(
"force"
,
default
=
false
)
var
printOnly
:
Boolean
=
config
(
"print_only"
,
default
=
false
)
var
suppressTaxonomyAssignment
:
Boolean
=
config
(
"suppress_taxonomy_assignment"
,
default
=
false
)
var
percentSubsample
:
Option
[
Double
]
=
config
(
"percent_subsample"
)
var
prefilterPercentId
:
Option
[
Double
]
=
config
(
"prefilter_percent_id"
)
var
suppressStep4
:
Boolean
=
config
(
"suppress_step4"
,
default
=
false
)
var
minOtuSize
:
Option
[
Int
]
=
config
(
"min_otu_size"
)
var
suppressAlignAndTree
:
Boolean
=
config
(
"suppress_taxonomy_assignment"
,
default
=
false
)
def
otuTable
=
new
File
(
outputDir
,
"otu_table_mc2.biom"
)
def
failedOtuTable
=
new
File
(
outputDir
,
"otu_table_mc2_w_tax_no_pynast_failures.biom"
)
def
otuMap
=
new
File
(
outputDir
,
"final_otu_map.txt"
)
@Output
private
var
outputFiles
:
List
[
File
]
=
Nil
override
def
beforeGraph
()
:
Unit
=
{
super
.
beforeGraph
()
jobOutputFile
=
new
File
(
outputDir
,
".std.out"
)
outputFiles
::=
otuTable
outputFiles
::=
failedOtuTable
outputFiles
::=
otuMap
}
def
cmdLine
=
executable
+
required
(
"-f"
)
+
required
(
"-i"
,
inputFasta
)
+
required
(
"-o"
,
outputDir
)
+
optional
(
"--reference_fp"
,
referenceFp
)
+
optional
(
"--parameter_fp"
,
parameterFp
)
+
optional
(
"--taxonomy_fp"
,
taxonomyFp
)
+
conditional
(
force
,
"--force"
)
+
conditional
(
printOnly
,
"--print_only"
)
+
conditional
(
suppressTaxonomyAssignment
,
"--suppress_taxonomy_assignment"
)
+
(
if
(
threads
>
1
)
required
(
"-a"
)
+
required
(
"-O"
,
threads
)
else
""
)
+
optional
(
"--percent_subsample"
,
percentSubsample
)
+
optional
(
"--prefilter_percent_id"
,
prefilterPercentId
)
+
conditional
(
suppressStep4
,
"--suppress_step4"
)
+
optional
(
"-min_otu_size"
,
minOtuSize
)
+
conditional
(
suppressAlignAndTree
,
"--suppress_align_and_tree"
)
}
gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala
View file @
7657016c
...
...
@@ -53,22 +53,30 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
}
def
qiimeClosedDir
:
Option
[
File
]
=
{
if
(
samples
.
values
.
flatMap
(
_
.
g
s
.
qiimeClosed
).
nonEmpty
)
{
if
(
samples
.
values
.
flatMap
(
_
.
g
earsSingle
.
qiimeClosed
).
nonEmpty
)
{
Some
(
new
File
(
outputDir
,
"qiime_closed_reference"
))
}
else
None
}
def
qiimeOpenDir
:
Option
[
File
]
=
{
if
(
samples
.
values
.
flatMap
(
_
.
gearsSingle
.
qiimeOpen
).
nonEmpty
)
{
Some
(
new
File
(
outputDir
,
"qiime_open_reference"
))
}
else
None
}
def
qiimeClosedOtuTable
:
Option
[
File
]
=
qiimeClosedDir
.
map
(
new
File
(
_
,
"otu_table.biom"
))
def
qiimeClosedOtuMap
:
Option
[
File
]
=
qiimeClosedDir
.
map
(
new
File
(
_
,
"otu_map.txt"
))
def
qiimeOpenOtuTable
:
Option
[
File
]
=
qiimeOpenDir
.
map
(
new
File
(
_
,
"otu_table.biom"
))
def
qiimeOpenOtuMap
:
Option
[
File
]
=
qiimeOpenDir
.
map
(
new
File
(
_
,
"otu_map.txt"
))
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def
addMultiSampleJobs
()
:
Unit
=
{
val
gs
s
=
samples
.
values
.
flatMap
(
_
.
g
s
.
qiimeClosed
).
toList
val
closedOtuTables
=
gs
s
.
map
(
_
.
otuTable
)
val
closedOtuMaps
=
gs
s
.
map
(
_
.
otuMap
)
val
qiimeClosed
s
=
samples
.
values
.
flatMap
(
_
.
g
earsSingle
.
qiimeClosed
).
toList
val
closedOtuTables
=
qiimeClosed
s
.
map
(
_
.
otuTable
)
val
closedOtuMaps
=
qiimeClosed
s
.
map
(
_
.
otuMap
)
require
(
closedOtuTables
.
size
==
closedOtuMaps
.
size
)
if
(
closedOtuTables
.
nonEmpty
)
{
if
(
closedOtuTables
.
size
>
1
)
{
...
...
@@ -86,14 +94,35 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
add
(
Ln
(
qscript
,
closedOtuMaps
.
head
,
qiimeClosedOtuMap
.
get
))
add
(
Ln
(
qscript
,
closedOtuTables
.
head
,
qiimeClosedOtuTable
.
get
))
}
}
val
qiimeOpens
=
samples
.
values
.
flatMap
(
_
.
gearsSingle
.
qiimeOpen
).
toList
val
openOtuTables
=
qiimeOpens
.
map
(
_
.
otuTable
)
val
openOtuMaps
=
qiimeOpens
.
map
(
_
.
otuMap
)
require
(
openOtuTables
.
size
==
openOtuMaps
.
size
)
if
(
openOtuTables
.
nonEmpty
)
{
if
(
openOtuTables
.
size
>
1
)
{
val
mergeTables
=
new
MergeOtuTables
(
qscript
)
mergeTables
.
input
=
openOtuTables
mergeTables
.
outputFile
=
qiimeOpenOtuTable
.
get
add
(
mergeTables
)
val
mergeMaps
=
new
MergeOtuMaps
(
qscript
)
mergeMaps
.
input
=
openOtuMaps
mergeMaps
.
output
=
qiimeOpenOtuMap
.
get
add
(
mergeMaps
)
//TODO: Plots
}
else
{
add
(
Ln
(
qscript
,
openOtuMaps
.
head
,
qiimeOpenOtuMap
.
get
))
add
(
Ln
(
qscript
,
openOtuTables
.
head
,
qiimeOpenOtuTable
.
get
))
}
}
}
/**
* Factory method for Sample class
*
* @param id SampleId
* @return Sample class
*/
...
...
@@ -102,7 +131,8 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
/**
* Factory method for Library class
* @param id SampleId
*
* @param id SampleId
* @return Sample class
*/
def
makeLibrary
(
id
:
String
)
:
Library
=
new
Library
(
id
)
...
...
@@ -144,9 +174,9 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
def
summaryStats
=
Map
()
}
lazy
val
g
s
=
new
GearsSingle
(
qscript
)
g
s
.
sampleId
=
Some
(
sampleId
)
g
s
.
outputDir
=
sampleDir
lazy
val
g
earsSingle
=
new
GearsSingle
(
qscript
)
g
earsSingle
.
sampleId
=
Some
(
sampleId
)
g
earsSingle
.
outputDir
=
sampleDir
/** Function to add sample jobs */
protected
def
addJobs
()
:
Unit
=
{
...
...
@@ -162,9 +192,9 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
add
(
Zcat
(
qscript
,
flexipreps
.
flatMap
(
_
.
fastqR2Qc
))
|
new
Gzip
(
qscript
)
>
file
)
}
g
s
.
fastqR1
=
Some
(
addDownsample
(
mergeR1
,
g
s
.
outputDir
))
g
s
.
fastqR2
=
mergeR2
.
map
(
addDownsample
(
_
,
g
s
.
outputDir
))
add
(
g
s
)
g
earsSingle
.
fastqR1
=
Some
(
addDownsample
(
mergeR1
,
g
earsSingle
.
outputDir
))
g
earsSingle
.
fastqR2
=
mergeR2
.
map
(
addDownsample
(
_
,
g
earsSingle
.
outputDir
))
add
(
g
earsSingle
)
}
/** Must return files to store into summary */
...
...
gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeOpen.scala
0 → 100644
View file @
7657016c
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.pipelines.gears
import
nl.lumc.sasc.biopet.core.SampleLibraryTag
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.qiime._
import
nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 12/4/15.
*/
class
GearsQiimeOpen
(
val
root
:
Configurable
)
extends
QScript
with
SummaryQScript
with
SampleLibraryTag
{
var
fastqInput
:
File
=
_
override
def
defaults
=
Map
(
"splitlibrariesfastq"
->
Map
(
"barcode_type"
->
"not-barcoded"
)
)
def
init
()
=
{
require
(
fastqInput
!=
null
)
require
(
sampleId
.
isDefined
)
}
private
var
_otuMap
:
File
=
_
def
otuMap
=
_otuMap
private
var
_otuTable
:
File
=
_
def
otuTable
=
_otuTable
def
biopetScript
()
=
{
val
splitLib
=
new
SplitLibrariesFastq
(
this
)
splitLib
.
input
:+=
fastqInput
splitLib
.
outputDir
=
new
File
(
outputDir
,
"split_libraries_fastq"
)
sampleId
.
foreach
(
splitLib
.
sampleIds
:+=
_
.
replaceAll
(
"_"
,
"-"
))
add
(
splitLib
)
val
openReference
=
new
PickOpenReferenceOtus
(
this
)
openReference
.
inputFasta
=
addDownsample
(
splitLib
.
outputSeqs
,
new
File
(
splitLib
.
outputDir
,
s
"${sampleId.get}.downsample.fna"
))
openReference
.
outputDir
=
new
File
(
outputDir
,
"pick_open_reference_otus"
)
add
(
openReference
)
_otuMap
=
openReference
.
otuMap
_otuTable
=
openReference
.
otuTable
addSummaryJobs
()
}
/** Must return a map with used settings for this pipeline */
def
summarySettings
:
Map
[
String
,
Any
]
=
Map
()
/** File to put in the summary for thie pipeline */
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
(
"otu_table"
->
otuTable
,
"otu_map"
->
otuMap
)
/** Name of summary output file */
def
summaryFile
:
File
=
new
File
(
outputDir
,
"summary.open_reference.json"
)
val
downSample
:
Option
[
Double
]
=
config
(
"downsample"
)
def
addDownsample
(
input
:
File
,
output
:
File
)
:
File
=
{
downSample
match
{
case
Some
(
x
)
=>
val
seqtk
=
new
SeqtkSample
(
this
)
seqtk
.
input
=
input
seqtk
.
sample
=
x
seqtk
.
output
=
output
add
(
seqtk
)
output
case
_
=>
input
}
}
}
gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala
View file @
7657016c
...
...
@@ -43,6 +43,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
lazy
val
krakenScript
=
if
(
config
(
"gears_use_kraken"
,
default
=
true
))
Some
(
new
GearsKraken
(
this
))
else
None
lazy
val
qiimeRatx
=
if
(
config
(
"gears_use_qiime_rtax"
,
default
=
false
))
Some
(
new
GearsQiimeRtax
(
this
))
else
None
lazy
val
qiimeClosed
=
if
(
config
(
"gears_use_qiime_closed"
,
default
=
false
))
Some
(
new
GearsQiimeClosed
(
this
))
else
None
lazy
val
qiimeOpen
=
if
(
config
(
"gears_use_qiime_open"
,
default
=
false
))
Some
(
new
GearsQiimeOpen
(
this
))
else
None
lazy
val
seqCount
=
if
(
config
(
"gears_use_seq_count"
,
default
=
false
))
Some
(
new
GearsSeqCount
(
this
))
else
None
/** Executed before running the script */
...
...
@@ -138,6 +139,12 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
add
(
qiimeClosed
)
}
qiimeOpen
foreach
{
qiimeOpen
=>
qiimeOpen
.
outputDir
=
new
File
(
outputDir
,
"qiime_open"
)
qiimeOpen
.
fastqInput
=
combinedFastq
add
(
qiimeOpen
)
}
seqCount
.
foreach
{
seqCount
=>
seqCount
.
fastqInput
=
combinedFastq
seqCount
.
outputDir
=
new
File
(
outputDir
,
"seq_count"
)
...
...
@@ -155,7 +162,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
"skip_flexiprep"
->
skipFlexiprep
,
"gears_use_kraken"
->
krakenScript
.
isDefined
,
"gear_use_qiime_rtax"
->
qiimeRatx
.
isDefined
,
"gear_use_qiime_closed"
->
qiimeClosed
.
isDefined
"gear_use_qiime_closed"
->
qiimeClosed
.
isDefined
,
"gear_use_qiime_open"
->
qiimeOpen
.
isDefined
)
/** Statistics shown in the summary file */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment