Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
13430b2e
Commit
13430b2e
authored
Nov 22, 2014
by
Peter van 't Hof
Browse files
Added flexiprep as separated artifact
parent
e671341e
Changes
8
Hide whitespace changes
Inline
Side-by-side
public/flexiprep/.gitignore
0 → 100644
View file @
13430b2e
/target/
\ No newline at end of file
public/flexiprep/pom.xml
0 → 100644
View file @
13430b2e
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
Flexiprep
</artifactId>
<version>
0.2.0-DEV
</version>
<packaging>
jar
</packaging>
<inceptionYear>
2014
</inceptionYear>
<name>
Flexiprep
</name>
<url>
http://maven.apache.org
</url>
<properties>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<sting.unpack.phase>
prepare-package
</sting.unpack.phase>
<sting.shade.phase>
package
</sting.shade.phase>
<app.main.class>
nl.lumc.sasc.biopet.core.BiopetExecutable
</app.main.class>
</properties>
<dependencies>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetFramework
</artifactId>
<version>
0.2.0-DEV
</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>
src/main/resources
</directory>
<includes>
<include>
**/*
</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>
org.scala-tools
</groupId>
<artifactId>
maven-scala-plugin
</artifactId>
<version>
2.15.2
</version>
<executions>
<execution>
<id>
scala-compile
</id>
<goals>
<goal>
compile
</goal>
<goal>
testCompile
</goal>
</goals>
<configuration>
<args>
<arg>
-dependencyfile
</arg>
<arg>
${project.build.directory}/.scala_dependencies
</arg>
<arg>
-deprecation
</arg>
<arg>
-feature
</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-jar-plugin
</artifactId>
<version>
2.5
</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>
true
</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>
true
</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-compiler-plugin
</artifactId>
<version>
2.3.2
</version>
<configuration>
<showDeprecation>
true
</showDeprecation>
</configuration>
</plugin>
</plugins>
</build>
</project>
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
0 → 100644
View file @
13430b2e
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
scala.io.Source
import
nl.lumc.sasc.biopet.extensions.Ln
import
org.broadinstitute.gatk.utils.commandline.
{
Input
}
import
argonaut._
,
Argonaut
.
_
import
scalaz._
,
Scalaz
.
_
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
scala.collection.mutable.Map
class
Cutadapt
(
root
:
Configurable
)
extends
nl
.
lumc
.
sasc
.
biopet
.
extensions
.
Cutadapt
(
root
)
{
@Input
(
doc
=
"Fastq contams file"
,
required
=
false
)
var
contams_file
:
File
=
_
override
def
beforeCmd
()
{
super
.
beforeCmd
getContamsFromFile
}
override
def
cmdLine
=
{
if
(!
opt_adapter
.
isEmpty
||
!
opt_anywhere
.
isEmpty
||
!
opt_front
.
isEmpty
)
{
analysisName
=
getClass
.
getSimpleName
super
.
cmdLine
}
else
{
analysisName
=
getClass
.
getSimpleName
+
"-ln"
Ln
(
this
,
fastq_input
,
fastq_output
,
relative
=
true
).
cmd
}
}
def
getContamsFromFile
{
if
(
contams_file
!=
null
)
{
if
(
contams_file
.
exists
())
{
for
(
line
<-
Source
.
fromFile
(
contams_file
).
getLines
)
{
var
s
:
String
=
line
.
substring
(
line
.
lastIndexOf
(
"\t"
)
+
1
,
line
.
size
)
if
(
default_clip_mode
==
"3"
)
opt_adapter
+=
s
else
if
(
default_clip_mode
==
"5"
)
opt_front
+=
s
else
if
(
default_clip_mode
==
"both"
)
opt_anywhere
+=
s
else
{
opt_adapter
+=
s
logger
.
warn
(
"Option default_clip_mode should be '3', '5' or 'both', falling back to default: '3'"
)
}
logger
.
info
(
"Adapter: "
+
s
+
" found in: "
+
fastq_input
)
}
}
else
logger
.
warn
(
"File : "
+
contams_file
+
" does not exist"
)
}
}
def
getSummary
:
Json
=
{
val
trimR
=
""".*Trimmed reads: *(\d*) .*"""
.
r
val
tooShortR
=
""".*Too short reads: *(\d*) .*"""
.
r
val
tooLongR
=
""".*Too long reads: *(\d*) .*"""
.
r
val
adapterR
=
"""Adapter '([C|T|A|G]*)'.*trimmed (\d*) times."""
.
r
var
stats
:
Map
[
String
,
Int
]
=
Map
(
"trimmed"
->
0
,
"tooshort"
->
0
,
"toolong"
->
0
)
var
adapter_stats
:
Map
[
String
,
Int
]
=
Map
()
if
(
stats_output
.
exists
)
for
(
line
<-
Source
.
fromFile
(
stats_output
).
getLines
)
{
line
match
{
case
trimR
(
m
)
=>
stats
+=
(
"trimmed"
->
m
.
toInt
)
case
tooShortR
(
m
)
=>
stats
+=
(
"tooshort"
->
m
.
toInt
)
case
tooLongR
(
m
)
=>
stats
+=
(
"toolong"
->
m
.
toInt
)
case
adapterR
(
adapter
,
count
)
=>
adapter_stats
+=
(
adapter
->
count
.
toInt
)
case
_
=>
}
}
return
(
"num_reads_affected"
:=
stats
(
"trimmed"
))
->:
(
"num_reads_discarded_too_short"
:=
stats
(
"tooshort"
))
->:
(
"num_reads_discarded_too_long"
:=
stats
(
"toolong"
))
->:
(
"adapters"
:=
adapter_stats
.
toMap
)
->:
jEmptyObject
}
}
object
Cutadapt
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
Cutadapt
=
{
val
cutadapt
=
new
Cutadapt
(
root
)
cutadapt
.
fastq_input
=
input
cutadapt
.
fastq_output
=
output
cutadapt
.
stats_output
=
new
File
(
output
.
getAbsolutePath
.
substring
(
0
,
output
.
getAbsolutePath
.
lastIndexOf
(
"."
))
+
".stats"
)
return
cutadapt
}
def
mergeSummaries
(
jsons
:
List
[
Json
])
:
Json
=
{
var
affected
=
0
var
tooShort
=
0
var
tooLong
=
0
var
adapter_stats
:
Map
[
String
,
Int
]
=
Map
()
for
(
json
<-
jsons
)
{
affected
+=
json
.
field
(
"num_reads_affected"
).
get
.
numberOrZero
.
toInt
tooShort
+=
json
.
field
(
"num_reads_discarded_too_short"
).
get
.
numberOrZero
.
toInt
tooLong
+=
json
.
field
(
"num_reads_discarded_too_long"
).
get
.
numberOrZero
.
toInt
val
adapters
=
json
.
fieldOrEmptyObject
(
"adapters"
)
for
(
key
<-
adapters
.
objectFieldsOrEmpty
)
{
val
number
=
adapters
.
field
(
key
).
get
.
numberOrZero
.
toInt
if
(
adapter_stats
.
contains
(
key
))
adapter_stats
(
key
)
+=
number
else
adapter_stats
+=
(
key
->
number
)
}
}
return
(
"num_reads_affected"
:=
affected
)
->:
(
"num_reads_discarded_too_short"
:=
tooShort
)
->:
(
"num_reads_discarded_too_long"
:=
tooLong
)
->:
(
"adapters"
:=
adapter_stats
.
toMap
)
->:
jEmptyObject
}
}
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
0 → 100644
View file @
13430b2e
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
scala.io.Source
import
argonaut._
,
Argonaut
.
_
import
scalaz._
,
Scalaz
.
_
class
Fastqc
(
root
:
Configurable
)
extends
nl
.
lumc
.
sasc
.
biopet
.
extensions
.
Fastqc
(
root
)
{
def
getDataBlock
(
name
:
String
)
:
Array
[
String
]
=
{
// Based on Fastqc v0.10.1
val
outputDir
=
output
.
getAbsolutePath
.
stripSuffix
(
".zip"
)
val
dataFile
=
new
File
(
outputDir
+
"/fastqc_data.txt"
)
if
(!
dataFile
.
exists
)
return
null
val
data
=
Source
.
fromFile
(
dataFile
).
mkString
for
(
block
<-
data
.
split
(
">>END_MODULE\n"
))
{
val
b
=
if
(
block
.
startsWith
(
"##FastQC"
))
block
.
substring
(
block
.
indexOf
(
"\n"
)
+
1
)
else
block
if
(
b
.
startsWith
(
">>"
+
name
))
return
for
(
line
<-
b
.
split
(
"\n"
))
yield
line
}
return
null
}
def
getEncoding
:
String
=
{
val
block
=
getDataBlock
(
"Basic Statistics"
)
if
(
block
==
null
)
return
null
for
(
line
<-
block
if
(
line
.
startsWith
(
"Encoding"
))
)
return
line
.
stripPrefix
(
"Encoding\t"
)
return
null
// Could be default Sanger with a warning in the log
}
def
getSummary
:
Json
=
{
val
subfixs
=
Map
(
"plot_duplication_levels"
->
"Images/duplication_levels.png"
,
"plot_kmer_profiles"
->
"Images/kmer_profiles.png"
,
"plot_per_base_gc_content"
->
"Images/per_base_gc_content.png"
,
"plot_per_base_n_content"
->
"Images/per_base_n_content.png"
,
"plot_per_base_quality"
->
"Images/per_base_quality.png"
,
"plot_per_base_sequence_content"
->
"Images/per_base_sequence_content.png"
,
"plot_per_sequence_gc_content"
->
"Images/per_sequence_gc_content.png"
,
"plot_per_sequence_quality"
->
"Images/per_sequence_quality.png"
,
"plot_sequence_length_distribution"
->
"Images/sequence_length_distribution.png"
,
"fastqc_data"
->
"fastqc_data.txt"
)
val
dir
=
output
.
getAbsolutePath
.
stripSuffix
(
".zip"
)
+
"/"
var
outputMap
:
Map
[
String
,
Map
[
String
,
String
]]
=
Map
()
for
((
k
,
v
)
<-
subfixs
)
outputMap
+=
(
k
->
Map
(
"path"
->
(
dir
+
v
)))
val
temp
=
(
""
:=
outputMap
)
->:
jEmptyObject
return
temp
.
fieldOrEmptyObject
(
""
)
}
}
object
Fastqc
{
def
apply
(
root
:
Configurable
,
fastqfile
:
File
,
outDir
:
String
)
:
Fastqc
=
{
val
fastqcCommand
=
new
Fastqc
(
root
)
fastqcCommand
.
fastqfile
=
fastqfile
var
filename
:
String
=
fastqfile
.
getName
()
if
(
filename
.
endsWith
(
".gz"
))
filename
=
filename
.
substring
(
0
,
filename
.
size
-
3
)
if
(
filename
.
endsWith
(
".gzip"
))
filename
=
filename
.
substring
(
0
,
filename
.
size
-
5
)
if
(
filename
.
endsWith
(
".fastq"
))
filename
=
filename
.
substring
(
0
,
filename
.
size
-
6
)
//if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3)
fastqcCommand
.
output
=
new
File
(
outDir
+
"/"
+
filename
+
"_fastqc.zip"
)
fastqcCommand
.
afterGraph
return
fastqcCommand
}
}
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
0 → 100644
View file @
13430b2e
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Gzip
,
Pbzip2
,
Md5sum
,
Zcat
,
Seqstat
}
import
nl.lumc.sasc.biopet.scripts.
{
FastqSync
,
FastqcToContams
}
class
Flexiprep
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"R1 fastq file (gzipped allowed)"
,
shortName
=
"R1"
,
required
=
true
)
var
input_R1
:
File
=
_
@Input
(
doc
=
"R2 fastq file (gzipped allowed)"
,
shortName
=
"R2"
,
required
=
false
)
var
input_R2
:
File
=
_
@Argument
(
doc
=
"Skip Trim fastq files"
,
shortName
=
"skiptrim"
,
required
=
false
)
var
skipTrim
:
Boolean
=
config
(
"skiptrim"
,
default
=
false
)
@Argument
(
doc
=
"Skip Clip fastq files"
,
shortName
=
"skipclip"
,
required
=
false
)
var
skipClip
:
Boolean
=
config
(
"skipclip"
,
default
=
false
)
@Argument
(
doc
=
"Sample name"
,
shortName
=
"sample"
,
required
=
true
)
var
sampleName
:
String
=
_
@Argument
(
doc
=
"Library name"
,
shortName
=
"library"
,
required
=
true
)
var
libraryName
:
String
=
_
var
paired
:
Boolean
=
(
input_R2
!=
null
)
var
R1_ext
:
String
=
_
var
R2_ext
:
String
=
_
var
R1_name
:
String
=
_
var
R2_name
:
String
=
_
var
fastqc_R1
:
Fastqc
=
_
var
fastqc_R2
:
Fastqc
=
_
var
fastqc_R1_after
:
Fastqc
=
_
var
fastqc_R2_after
:
Fastqc
=
_
val
summary
=
new
FlexiprepSummary
(
this
)
def
init
()
{
if
(
input_R1
==
null
)
throw
new
IllegalStateException
(
"Missing R1 on flexiprep module"
)
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on flexiprep module"
)
if
(
sampleName
==
null
)
throw
new
IllegalStateException
(
"Missing Sample name on flexiprep module"
)
if
(
libraryName
==
null
)
throw
new
IllegalStateException
(
"Missing Library name on flexiprep module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
paired
=
(
input_R2
!=
null
)
if
(
input_R1
.
endsWith
(
".gz"
))
R1_name
=
input_R1
.
getName
.
substring
(
0
,
input_R1
.
getName
.
lastIndexOf
(
".gz"
))
else
if
(
input_R1
.
endsWith
(
".gzip"
))
R1_name
=
input_R1
.
getName
.
substring
(
0
,
input_R1
.
getName
.
lastIndexOf
(
".gzip"
))
else
R1_name
=
input_R1
.
getName
R1_ext
=
R1_name
.
substring
(
R1_name
.
lastIndexOf
(
"."
),
R1_name
.
size
)
R1_name
=
R1_name
.
substring
(
0
,
R1_name
.
lastIndexOf
(
R1_ext
))
if
(
paired
)
{
if
(
input_R2
.
endsWith
(
".gz"
))
R2_name
=
input_R2
.
getName
.
substring
(
0
,
input_R2
.
getName
.
lastIndexOf
(
".gz"
))
else
if
(
input_R2
.
endsWith
(
".gzip"
))
R2_name
=
input_R2
.
getName
.
substring
(
0
,
input_R2
.
getName
.
lastIndexOf
(
".gzip"
))
else
R2_name
=
input_R2
.
getName
R2_ext
=
R2_name
.
substring
(
R2_name
.
lastIndexOf
(
"."
),
R2_name
.
size
)
R2_name
=
R2_name
.
substring
(
0
,
R2_name
.
lastIndexOf
(
R2_ext
))
}
summary
.
out
=
outputDir
+
sampleName
+
"-"
+
libraryName
+
".qc.summary.json"
}
def
biopetScript
()
{
runInitialJobs
()
val
out
=
if
(
paired
)
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputFiles
(
"fastq_input_R2"
),
outputDir
)
else
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputDir
)
val
R1_files
=
for
((
k
,
v
)
<-
outputFiles
if
k
.
endsWith
(
"output_R1"
))
yield
v
val
R2_files
=
for
((
k
,
v
)
<-
outputFiles
if
k
.
endsWith
(
"output_R2"
))
yield
v
runFinalize
(
R1_files
.
toList
,
R2_files
.
toList
)
}
def
runInitialJobs
()
{
outputFiles
+=
(
"fastq_input_R1"
->
extractIfNeeded
(
input_R1
,
outputDir
))
if
(
paired
)
outputFiles
+=
(
"fastq_input_R2"
->
extractIfNeeded
(
input_R2
,
outputDir
))
fastqc_R1
=
Fastqc
(
this
,
input_R1
,
outputDir
+
"/"
+
R1_name
+
".fastqc/"
)
add
(
fastqc_R1
)
summary
.
addFastqc
(
fastqc_R1
)
outputFiles
+=
(
"fastqc_R1"
->
fastqc_R1
.
output
)
outputFiles
+=
(
"contams_R1"
->
getContams
(
fastqc_R1
,
R1_name
))
val
md5sum_R1
=
Md5sum
(
this
,
input_R1
,
outputDir
)
add
(
md5sum_R1
)
summary
.
addMd5sum
(
md5sum_R1
,
R2
=
false
,
after
=
false
)
if
(
paired
)
{
fastqc_R2
=
Fastqc
(
this
,
input_R2
,
outputDir
+
"/"
+
R2_name
+
".fastqc/"
)
add
(
fastqc_R2
)
summary
.
addFastqc
(
fastqc_R2
,
R2
=
true
)
outputFiles
+=
(
"fastqc_R2"
->
fastqc_R2
.
output
)
outputFiles
+=
(
"contams_R2"
->
getContams
(
fastqc_R2
,
R2_name
))
val
md5sum_R2
=
Md5sum
(
this
,
input_R2
,
outputDir
)
add
(
md5sum_R2
)
summary
.
addMd5sum
(
md5sum_R2
,
R2
=
true
,
after
=
false
)
}
}
def
getContams
(
fastqc
:
Fastqc
,
pairname
:
String
)
:
File
=
{
val
fastqcToContams
=
new
FastqcToContams
(
this
)
fastqcToContams
.
fastqc_output
=
fastqc
.
output
fastqcToContams
.
out
=
new
File
(
outputDir
+
pairname
+
".contams.txt"
)
fastqcToContams
.
contams_file
=
fastqc
.
contaminants
add
(
fastqcToContams
)
return
fastqcToContams
.
out
}
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
,
chunk
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
new
File
(
""
),
outDir
,
chunk
)
}
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
new
File
(
""
),
outDir
,
""
)
}
def
runTrimClip
(
R1_in
:
File
,
R2_in
:
File
,
outDir
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
R2_in
,
outDir
,
""
)
}
def
runTrimClip
(
R1_in
:
File
,
R2_in
:
File
,
outDir
:
String
,
chunkarg
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
val
chunk
=
if
(
chunkarg
.
isEmpty
||
chunkarg
.
endsWith
(
"_"
))
chunkarg
else
chunkarg
+
"_"
var
results
:
Map
[
String
,
File
]
=
Map
()
var
R1
:
File
=
new
File
(
R1_in
)
var
R2
:
File
=
new
File
(
R2_in
)
var
deps
:
List
[
File
]
=
if
(
paired
)
List
(
R1
,
R2
)
else
List
(
R1
)
val
seqtkSeq_R1
=
SeqtkSeq
.
apply
(
this
,
R1
,
swapExt
(
outDir
,
R1
,
R1_ext
,
".sanger"
+
R1_ext
),
fastqc_R1
)
add
(
seqtkSeq_R1
,
isIntermediate
=
true
)
R1
=
seqtkSeq_R1
.
output
deps
::=
R1
if
(
paired
)
{
val
seqtkSeq_R2
=
SeqtkSeq
.
apply
(
this
,
R2
,
swapExt
(
outDir
,
R2
,
R2_ext
,
".sanger"
+
R2_ext
),
fastqc_R2
)
add
(
seqtkSeq_R2
,
isIntermediate
=
true
)
R2
=
seqtkSeq_R2
.
output
deps
::=
R2
}
val
seqstat_R1
=
Seqstat
(
this
,
R1
,
outDir
)
add
(
seqstat_R1
,
isIntermediate
=
true
)
summary
.
addSeqstat
(
seqstat_R1
,
R2
=
false
,
after
=
false
,
chunk
)
if
(
paired
)
{
val
seqstat_R2
=
Seqstat
(
this
,
R2
,
outDir
)
add
(
seqstat_R2
,
isIntermediate
=
true
)
summary
.
addSeqstat
(
seqstat_R2
,
R2
=
true
,
after
=
false
,
chunk
)
}
if
(!
skipClip
)
{
// Adapter clipping
val
cutadapt_R1
=
Cutadapt
(
this
,
R1
,
swapExt
(
outDir
,
R1
,
R1_ext
,
".clip"
+
R1_ext
))
if
(
outputFiles
.
contains
(
"contams_R1"
))
cutadapt_R1
.
contams_file
=
outputFiles
(
"contams_R1"
)
add
(
cutadapt_R1
,
isIntermediate
=
true
)
summary
.
addCutadapt
(
cutadapt_R1
,
R2
=
false
,
chunk
)
R1
=
cutadapt_R1
.
fastq_output
deps
::=
R1
outputFiles
+=
(
"cutadapt_R1_stats"
->
cutadapt_R1
.
stats_output
)
if
(
paired
)
{
val
cutadapt_R2
=
Cutadapt
(
this
,
R2
,
swapExt
(
outDir
,
R2
,
R2_ext
,
".clip"
+
R2_ext
))
outputFiles
+=
(
"cutadapt_R2_stats"
->
cutadapt_R2
.
stats_output
)
if
(
outputFiles
.
contains
(
"contams_R2"
))
cutadapt_R2
.
contams_file
=
outputFiles
(
"contams_R2"
)
add
(
cutadapt_R2
,
isIntermediate
=
true
)
summary
.
addCutadapt
(
cutadapt_R2
,
R2
=
true
,
chunk
)
R2
=
cutadapt_R2
.
fastq_output
deps
::=
R2
val
fastqSync
=
FastqSync
(
this
,
cutadapt_R1
.
fastq_input
,
cutadapt_R1
.
fastq_output
,
cutadapt_R2
.
fastq_output
,
swapExt
(
outDir
,
R1
,
R1_ext
,
".sync"
+
R1_ext
),
swapExt
(
outDir
,
R2
,
R2_ext
,
".sync"
+
R2_ext
),
swapExt
(
outDir
,
R1
,
R1_ext
,
".sync.stats"
))
fastqSync
.
deps
:::=
deps
add
(
fastqSync
,
isIntermediate
=
true
)
summary
.
addFastqcSync
(
fastqSync
,
chunk
)
outputFiles
+=
(
"syncStats"
->
fastqSync
.
output_stats
)
R1
=
fastqSync
.
output_R1
R2
=
fastqSync
.
output_R2
deps
:::=
R1
::
R2
::
Nil
}
}
if
(!
skipTrim
)
{
// Quality trimming
val
sickle
=
new
Sickle
(
this
)
sickle
.
input_R1
=
R1
sickle
.
output_R1
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".trim"
+
R1_ext
)
if
(
paired
)
{
sickle
.
input_R2
=
R2
sickle
.
output_R2
=
swapExt
(
outDir
,
R2
,
R2_ext
,
".trim"
+
R2_ext
)
sickle
.
output_singles
=
swapExt
(
outDir
,
R2
,
R2_ext
,
".trim.singles"
+
R1_ext
)
}
sickle
.
output_stats
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".trim.stats"
)
sickle
.
deps
=
deps
add
(
sickle
,
isIntermediate
=
true
)
summary
.
addSickle
(
sickle
,
chunk
)
R1
=
sickle
.
output_R1
if
(
paired
)
R2
=
sickle
.
output_R2
}
val
seqstat_R1_after
=
Seqstat
(
this
,
R1
,
outDir
)
seqstat_R1_after
.
deps
=
deps
add
(
seqstat_R1_after
)
summary
.
addSeqstat
(
seqstat_R1_after
,
R2
=
false
,
after
=
true
,
chunk
)
if
(
paired
)
{
val
seqstat_R2_after
=
Seqstat
(
this
,
R2
,
outDir
)
seqstat_R2_after
.
deps
=
deps
add
(
seqstat_R2_after
)
summary
.
addSeqstat
(
seqstat_R2_after
,
R2
=
true
,
after
=
true
,
chunk
)
}
outputFiles
+=
(
chunk
+
"output_R1"
->
R1
)
if
(
paired
)
outputFiles
+=
(
chunk
+
"output_R2"
->
R2
)
return
(
R1
,
R2
,
deps
)
}
def
runFinalize
(
fastq_R1
:
List
[
File
],
fastq_R2
:
List
[
File
])
{
if
(
fastq_R1
.
length
!=
fastq_R2
.
length
&&
paired
)
throw
new
IllegalStateException
(
"R1 and R2 file number is not the same"
)
val
R1
=
new
File
(
outputDir
+
R1_name
+
".qc"
+
R1_ext
+
".gz"
)
val
R2
=
new
File
(
outputDir
+
R2_name
+
".qc"
+
R2_ext
+
".gz"
)
add
(
Gzip
(
this
,
fastq_R1
,
R1
))
if
(
paired
)
add
(
Gzip
(
this
,
fastq_R2
,
R2
))
outputFiles
+=
(
"output_R1_gzip"
->
R1
)
if
(
paired
)
outputFiles
+=
(
"output_R2_gzip"
->
R2
)
if
(!
skipTrim
||
!
skipClip
)
{
val
md5sum_R1
=
Md5sum
(
this
,
R1
,
outputDir
)
add
(
md5sum_R1
)
summary
.
addMd5sum
(
md5sum_R1
,
R2
=
false
,
after
=
true
)
if
(
paired
)
{
val
md5sum_R2
=
Md5sum
(
this
,
R2
,
outputDir
)
add
(
md5sum_R2
)
summary
.
addMd5sum
(
md5sum_R2
,
R2
=
true
,
after
=
true
)
}
fastqc_R1_after
=
Fastqc
(
this
,
R1
,
outputDir
+
"/"
+
R1_name
+
".qc.fastqc/"
)
add
(
fastqc_R1_after
)
summary
.
addFastqc
(
fastqc_R1_after
,
after
=
true
)
if
(
paired
)
{
fastqc_R2_after
=
Fastqc
(
this
,
R2
,
outputDir
+
"/"
+
R2_name
+
".qc.fastqc/"
)
add
(
fastqc_R2_after
)
summary
.
addFastqc
(
fastqc_R2_after
,
R2
=
true
,
after
=
true
)
}
}
add
(
summary
)
}
def
extractIfNeeded
(
file
:
File
,
runDir
:
String
)
:
File
=
{
if
(
file
.
getName
().
endsWith
(
".gz"
)
||
file
.
getName
().
endsWith
(
".gzip"
))
{
var
newFile
:
File
=
swapExt
(
runDir
,
file
,
".gz"
,
""
)
if
(
file
.
getName
().
endsWith
(
".gzip"
))
newFile
=
swapExt
(
runDir
,
file
,
".gzip"
,
""
)
val
zcatCommand
=
Zcat
(
this
,
file
,
newFile
)
zcatCommand
.
isIntermediate
=
true
add
(
zcatCommand
)
return
newFile
}
else
if
(
file
.
getName
().
endsWith
(
".bz2"
))
{
var
newFile
=
swapExt
(
runDir
,
file
,
".bz2"
,
""
)
val
pbzip2
=
Pbzip2
(
this
,
file
,
newFile
)
pbzip2
.
isIntermediate
=
true
add
(
pbzip2
)
return
newFile
}
else
return
file
}
}
object
Flexiprep
extends
PipelineCommand
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala
0 → 100644
View file @
13430b2e
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Md5sum
,
Seqstat
}
import
nl.lumc.sasc.biopet.scripts.
{
FastqSync
}
import
org.broadinstitute.gatk.queue.function.InProcessFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
java.io.File
import
argonaut._
,
Argonaut
.
_
import
scalaz._
,
Scalaz
.
_