Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
c6c7c899
Commit
c6c7c899
authored
Sep 23, 2015
by
Peter van 't Hof
Browse files
Refactor flexiprep, more pipes are used now
parent
8e950ef8
Changes
2
Hide whitespace changes
Inline
Side-by-side
public/biopet-extentsions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala
View file @
c6c7c899
...
...
@@ -24,7 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for zcat */
class
Zcat
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
@Input
(
doc
=
"Zipped file"
,
required
=
true
)
var
input
:
File
=
_
var
input
:
List
[
File
]
=
_
@Output
(
doc
=
"Unzipped file"
,
required
=
true
)
var
output
:
File
=
_
...
...
@@ -46,6 +46,13 @@ object Zcat {
/** Returns Zcat with input and output files */
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
Zcat
=
{
val
zcat
=
new
Zcat
(
root
)
zcat
.
input
=
input
::
Nil
zcat
.
output
=
output
zcat
}
def
apply
(
root
:
Configurable
,
input
:
List
[
File
],
output
:
File
)
:
Zcat
=
{
val
zcat
=
new
Zcat
(
root
)
zcat
.
input
=
input
zcat
.
output
=
output
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
c6c7c899
...
...
@@ -15,6 +15,8 @@
*/
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.File
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.core.
{
PipelineCommand
,
SampleLibraryTag
}
import
nl.lumc.sasc.biopet.extensions.
{
Pbzip2
,
Zcat
,
Gzip
,
Sickle
}
...
...
@@ -120,8 +122,8 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
/** Add init non chunkable jobs */
def
runInitialJobs
()
{
outputFiles
+=
(
"fastq_input_R1"
->
extractIfNeeded
(
input_R1
,
outputDir
)
)
if
(
paired
)
outputFiles
+=
(
"fastq_input_R2"
->
extractIfNeeded
(
input_R2
.
get
,
outputDir
)
)
outputFiles
+=
(
"fastq_input_R1"
->
input_R1
)
if
(
paired
)
outputFiles
+=
(
"fastq_input_R2"
->
input_R2
.
get
)
fastqc_R1
=
Fastqc
(
this
,
input_R1
,
new
File
(
outputDir
,
R1_name
+
".fastqc/"
))
add
(
fastqc_R1
)
...
...
@@ -148,7 +150,12 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
}
}
//TODO: Refactor need to combine all this functions
def
fastqR1Qc
=
if
(
paired
)
new
File
(
outputDir
,
s
"${sampleId.getOrElse("
x
")}-${libId.getOrElse("
x
")}.R1.qc.sync.fq.gz"
)
else
new
File
(
outputDir
,
s
"${sampleId.getOrElse("
x
")}-${libId.getOrElse("
x
")}.R1.qc.fq.gz"
)
def
fastqR2Qc
=
if
(
paired
)
Some
(
new
File
(
outputDir
,
s
"${sampleId.getOrElse("
x
")}-${libId.getOrElse("
x
")}.R2.qc.sync.fq.gz"
))
else
None
/** Adds all chunkable jobs of flexiprep */
def
runTrimClip
(
R1_in
:
File
,
outDir
:
File
,
chunk
:
String
)
:
(
File
,
Option
[
File
],
List
[
File
])
=
...
...
@@ -169,92 +176,42 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
var
R1
=
R1_in
var
R2
=
R2_in
var
deps_R1
=
R1
::
Nil
var
deps_R2
=
if
(
paired
)
R2
.
get
::
Nil
else
Nil
def
deps
:
List
[
File
]
=
deps_R1
:::
deps_R2
def
deps
:
List
[
File
]
=
Nil
val
seqtkSeq_R1
=
SeqtkSeq
(
this
,
R1
,
swapExt
(
outDir
,
R1
,
R1_ext
,
".sanger"
+
R1_ext
)
,
fastqc_R1
)
seqtkSeq_R1
.
isIntermediate
=
true
add
(
seqtkSeq_R1
)
R1
=
seqtkSeq_R1
.
output
deps_R1
::=
R1
val
qcCmdR1
=
new
QcCommand
(
this
,
fastqc_R1
)
qcCmdR1
.
input
=
R1_in
qcCmdR1
.
output
=
new
File
(
outDir
,
s
"${sampleId}-${libId}.R1.qc.fq.gz"
)
qcCmdR1
.
isIntermediate
=
paired
||
!
keepQcFastqFiles
add
(
qcCmd
R1
)
if
(
paired
)
{
val
seqtkSeq_R2
=
SeqtkSeq
(
this
,
R2
.
get
,
swapExt
(
outDir
,
R2
.
get
,
R2_ext
,
".sanger"
+
R2_ext
),
fastqc_R2
)
seqtkSeq_R2
.
isIntermediate
=
true
add
(
seqtkSeq_R2
)
R2
=
Some
(
seqtkSeq_R2
.
output
)
deps_R2
::=
R2
.
get
}
if
(!
skipClip
)
{
// Adapter clipping
val
cutadapt_R1
=
Cutadapt
(
this
,
R1
,
swapExt
(
outDir
,
R1
,
R1_ext
,
".clip"
+
R1_ext
))
cutadapt_R1
.
fastqc
=
fastqc_R1
cutadapt_R1
.
isIntermediate
=
true
cutadapt_R1
.
deps
=
deps_R1
add
(
cutadapt_R1
)
addSummarizable
(
cutadapt_R1
,
"clipping_R1"
)
R1
=
cutadapt_R1
.
fastq_output
deps_R1
::=
R1
outputFiles
+=
(
"cutadapt_R1_stats"
->
cutadapt_R1
.
stats_output
)
if
(
paired
)
{
val
cutadapt_R2
=
Cutadapt
(
this
,
R2
.
get
,
swapExt
(
outDir
,
R2
.
get
,
R2_ext
,
".clip"
+
R2_ext
))
outputFiles
+=
(
"cutadapt_R2_stats"
->
cutadapt_R2
.
stats_output
)
cutadapt_R2
.
fastqc
=
fastqc_R2
cutadapt_R2
.
isIntermediate
=
true
cutadapt_R2
.
deps
=
deps_R2
add
(
cutadapt_R2
)
addSummarizable
(
cutadapt_R2
,
"clipping_R2"
)
R2
=
Some
(
cutadapt_R2
.
fastq_output
)
deps_R2
::=
R2
.
get
val
fqSync
=
new
FastqSync
(
this
)
fqSync
.
refFastq
=
cutadapt_R1
.
fastq_input
fqSync
.
inputFastq1
=
cutadapt_R1
.
fastq_output
fqSync
.
inputFastq2
=
cutadapt_R2
.
fastq_output
fqSync
.
outputFastq1
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".sync"
+
R1_ext
)
fqSync
.
outputFastq2
=
swapExt
(
outDir
,
R2
.
get
,
R2_ext
,
".sync"
+
R2_ext
)
fqSync
.
outputStats
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".sync.stats"
)
fqSync
.
isIntermediate
=
true
fqSync
.
deps
:::=
deps
add
(
fqSync
)
addSummarizable
(
fqSync
,
"fastq_sync"
)
outputFiles
+=
(
"syncStats"
->
fqSync
.
outputStats
)
R1
=
fqSync
.
outputFastq1
R2
=
Some
(
fqSync
.
outputFastq2
)
deps_R1
::=
R1
deps_R2
::=
R2
.
get
}
}
if
(!
skipTrim
)
{
// Quality trimming
val
sickle
=
new
Sickle
(
this
)
sickle
.
input_R1
=
R1
sickle
.
output_R1
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".trim"
+
R1_ext
)
if
(
paired
)
{
sickle
.
input_R2
=
R2
.
get
sickle
.
output_R2
=
swapExt
(
outDir
,
R2
.
get
,
R2_ext
,
".trim"
+
R2_ext
)
sickle
.
output_singles
=
swapExt
(
outDir
,
R2
.
get
,
R2_ext
,
".trim.singles"
+
R1_ext
)
}
sickle
.
output_stats
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".trim.stats"
)
sickle
.
deps
=
deps
sickle
.
isIntermediate
=
true
add
(
sickle
)
addSummarizable
(
sickle
,
"trimming"
)
R1
=
sickle
.
output_R1
if
(
paired
)
R2
=
Some
(
sickle
.
output_R2
)
}
val
qcCmdR2
=
new
QcCommand
(
this
,
fastqc_R2
)
qcCmdR2
.
input
=
R2_in
.
get
qcCmdR2
.
output
=
new
File
(
outDir
,
s
"${sampleId}-${libId}.R2.qc.fq.gz"
)
qcCmdR2
.
isIntermediate
=
true
add
(
qcCmdR2
)
val
fqSync
=
new
FastqSync
(
this
)
fqSync
.
refFastq
=
R1_in
fqSync
.
inputFastq1
=
qcCmdR1
.
output
fqSync
.
inputFastq2
=
qcCmdR2
.
output
fqSync
.
outputFastq1
=
fastqR1Qc
fqSync
.
outputFastq2
=
fastqR2Qc
.
get
fqSync
.
outputStats
=
new
File
(
outDir
,
s
"${sampleId}-${libId}.sync.stats"
)
fqSync
.
isIntermediate
=
!
keepQcFastqFiles
add
(
fqSync
)
addSummarizable
(
fqSync
,
"fastq_sync"
)
outputFiles
+=
(
"syncStats"
->
fqSync
.
outputStats
)
R1
=
fqSync
.
outputFastq1
R2
=
Some
(
fqSync
.
outputFastq2
)
}
else
R1
=
qcCmdR1
.
output
val
seqstat_R1_after
=
SeqStat
(
this
,
R1
,
outDir
)
seqstat_R1_after
.
deps
=
deps_R1
add
(
seqstat_R1_after
)
addSummarizable
(
seqstat_R1_after
,
"seqstat_R1_qc"
)
if
(
paired
)
{
val
seqstat_R2_after
=
SeqStat
(
this
,
R2
.
get
,
outDir
)
seqstat_R2_after
.
deps
=
deps_R2
add
(
seqstat_R2_after
)
addSummarizable
(
seqstat_R2_after
,
"seqstat_R2_qc"
)
}
...
...
@@ -266,23 +223,24 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
/** Adds last non chunkable jobs */
def
runFinalize
(
fastq_R1
:
List
[
File
],
fastq_R2
:
List
[
File
])
{
if
(
fastq_R1
.
length
!=
fastq_R2
.
length
&&
paired
)
throw
new
IllegalStateException
(
"R1 and R2 file number is not the same"
)
val
R1
=
new
File
(
outputDir
,
R1_name
+
".qc"
+
R1_ext
+
".gz"
)
val
R2
=
new
File
(
outputDir
,
R2_name
+
".qc"
+
R2_ext
+
".gz"
)
if
(
fastq_R1
.
length
!=
fastq_R2
.
length
&&
paired
)
throw
new
IllegalStateException
(
"R1 and R2 file number is not the same"
)
if
(!
skipTrim
||
!
skipClip
)
{
add
(
Gzip
(
this
,
fastq_R1
,
R1
),
!
keepQcFastqFiles
)
if
(
paired
)
add
(
Gzip
(
this
,
fastq_R2
,
R2
),
!
keepQcFastqFiles
)
if
(
fastq_R1
.
length
>
1
)
{
add
(
Zcat
(
this
,
fastq_R1
,
fastqR1Qc
)
|
new
Gzip
(
this
)
>
fastqR1Qc
)
if
(
paired
)
add
(
Zcat
(
this
,
fastq_R2
,
fastqR2Qc
.
get
)
|
new
Gzip
(
this
)
>
fastqR2Qc
.
get
)
}
outputFiles
+=
(
"output_R1_gzip"
->
R1
)
if
(
paired
)
outputFiles
+=
(
"output_R2_gzip"
->
R2
)
outputFiles
+=
(
"output_R1_gzip"
->
fastqR1Qc
)
if
(
paired
)
outputFiles
+=
(
"output_R2_gzip"
->
fastqR2Qc
.
get
)
fastqc_R1_after
=
Fastqc
(
this
,
R1
,
new
File
(
outputDir
,
R1_name
+
".qc.fastqc/"
))
fastqc_R1_after
=
Fastqc
(
this
,
fastqR1Qc
,
new
File
(
outputDir
,
R1_name
+
".qc.fastqc/"
))
add
(
fastqc_R1_after
)
addSummarizable
(
fastqc_R1_after
,
"fastqc_R1_qc"
)
if
(
paired
)
{
fastqc_R2_after
=
Fastqc
(
this
,
R2
,
new
File
(
outputDir
,
R2_name
+
".qc.fastqc/"
))
fastqc_R2_after
=
Fastqc
(
this
,
fastqR2Qc
.
get
,
new
File
(
outputDir
,
R2_name
+
".qc.fastqc/"
))
add
(
fastqc_R2_after
)
addSummarizable
(
fastqc_R2_after
,
"fastqc_R2_qc"
)
}
...
...
@@ -290,25 +248,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
addSummaryJobs
()
}
/** Extracts file if file is compressed */
def
extractIfNeeded
(
file
:
File
,
runDir
:
File
)
:
File
=
{
if
(
file
==
null
)
file
else
if
(
file
.
getName
.
endsWith
(
".gz"
)
||
file
.
getName
.
endsWith
(
".gzip"
))
{
var
newFile
:
File
=
swapExt
(
runDir
,
file
,
".gz"
,
""
)
if
(
file
.
getName
.
endsWith
(
".gzip"
))
newFile
=
swapExt
(
runDir
,
file
,
".gzip"
,
""
)
val
zcatCommand
=
Zcat
(
this
,
file
,
newFile
)
zcatCommand
.
isIntermediate
=
true
add
(
zcatCommand
)
newFile
}
else
if
(
file
.
getName
.
endsWith
(
".bz2"
))
{
val
newFile
=
swapExt
(
runDir
,
file
,
".bz2"
,
""
)
val
pbzip2
=
Pbzip2
(
this
,
file
,
newFile
)
pbzip2
.
isIntermediate
=
true
add
(
pbzip2
)
newFile
}
else
file
}
}
object
Flexiprep
extends
PipelineCommand
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment