Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
7d8f335d
Commit
7d8f335d
authored
Aug 29, 2014
by
Peter van 't Hof
Browse files
Added tagcounting to Sage pipeline
parent
ce8bb5c9
Changes
6
Hide whitespace changes
Inline
Side-by-side
biopet-framework/nbactions.xml
View file @
7d8f335d
...
...
@@ -10,7 +10,7 @@
<goal>
org.codehaus.mojo:exec-maven-plugin:1.2.1:exec
</goal>
</goals>
<properties>
<exec.args>
-classpath %classpath nl.lumc.sasc.biopet.
core.apps.BedToInterval /data/DIV5/SASC/project-062-snake/analysis/deps/custom_region.bed /data/DIV5/SASC/project-062-snake/analysis/runs/biopet/110/run_1/110-1.dedup.bam /home/pjvan_thof/pipelines/test/test.interval
</exec.args>
<exec.args>
-classpath %classpath nl.lumc.sasc.biopet.
pipelines.sage.CreateTagCounts -I raw.count -taglib tag.lib -sense sense.counts -allsense all.sense.counts -antisense antisense.counts -allantisense all.antisense.counts
</exec.args>
<exec.executable>
java
</exec.executable>
<exec.workingdir>
/home/pjvan_thof/pipelines/test
</exec.workingdir>
</properties>
...
...
@@ -25,7 +25,7 @@
<goal>
org.codehaus.mojo:exec-maven-plugin:1.2.1:exec
</goal>
</goals>
<properties>
<exec.args>
-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.
core.apps.BedToInterval /data/DIV5/SASC/project-062-snake/analysis/deps/custom_region.bed /data/DIV5/SASC/project-062-snake/analysis/runs/biopet/110/run_1/110-1.dedup.bam /home/pjvan_thof/pipelines/test/test.interval
</exec.args>
<exec.args>
-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.
pipelines.sage.CreateTagCounts -I raw.count -taglib tag.lib -sense sense.counts -allsense all.sense.counts -antisense antisense.counts -allantisense all.antisense.counts
</exec.args>
<exec.executable>
java
</exec.executable>
<jpda.listen>
true
</jpda.listen>
<exec.workingdir>
/home/pjvan_thof/pipelines/test
</exec.workingdir>
...
...
@@ -41,7 +41,7 @@
<goal>
org.codehaus.mojo:exec-maven-plugin:1.2.1:exec
</goal>
</goals>
<properties>
<exec.args>
-classpath %classpath nl.lumc.sasc.biopet.
core.apps.BedToInterval /data/DIV5/SASC/project-062-snake/analysis/deps/custom_region.bed /data/DIV5/SASC/project-062-snake/analysis/runs/biopet/110/run_1/110-1.dedup.bam /home/pjvan_thof/pipelines/test/test.interval
</exec.args>
<exec.args>
-classpath %classpath nl.lumc.sasc.biopet.
pipelines.sage.CreateTagCounts -I raw.count -taglib tag.lib -sense sense.counts -allsense all.sense.counts -antisense antisense.counts -allantisense all.antisense.counts
</exec.args>
<exec.executable>
java
</exec.executable>
<exec.workingdir>
/home/pjvan_thof/pipelines/test
</exec.workingdir>
</properties>
...
...
biopet-framework/pom.xml
View file @
7d8f335d
...
...
@@ -17,7 +17,13 @@
<sting.binary-dist.name>
SASC-Pipelines
</sting.binary-dist.name>
<app.main.class>
nl.lumc.sasc.biopet.core.BiopetExecutable
</app.main.class>
</properties>
<repositories>
<repository>
<id>
biojava-maven-repo
</id>
<name>
BioJava repository
</name>
<url>
http://www.biojava.org/download/maven/
</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>
org.testng
</groupId>
...
...
@@ -40,6 +46,16 @@
<artifactId>
argonaut_2.11
</artifactId>
<version>
6.1-M4
</version>
</dependency>
<dependency>
<groupId>
org.biojava
</groupId>
<artifactId>
biojava3-core
</artifactId>
<version>
3.1.0
</version>
</dependency>
<dependency>
<groupId>
org.biojava
</groupId>
<artifactId>
biojava3-sequencing
</artifactId>
<version>
3.1.0
</version>
</dependency>
</dependencies>
<build>
<resources>
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/CountFastq.scala
0 → 100644
View file @
7d8f335d
package
nl.lumc.sasc.biopet.pipelines.sage
import
java.io.File
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
org.biojava3.sequencing.io.fastq.
{
SangerFastqReader
,
StreamListener
,
Fastq
}
import
scala.collection.JavaConversions._
import
scala.collection.SortedMap
import
scala.collection.mutable.Map
import
java.io.FileReader
class
CountFastq
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input fasta"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
_
@Output
(
doc
=
"Output tag library"
,
shortName
=
"output"
,
required
=
true
)
var
output
:
File
=
_
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
4.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-I"
,
input
)
+
required
(
"-o"
,
output
)
}
object
CountFastq
{
var
input
:
File
=
_
var
output
:
File
=
_
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
for
(
t
<-
0
until
args
.
size
)
{
args
(
t
)
match
{
case
"-I"
=>
input
=
new
File
(
args
(
t
+
1
))
case
"-o"
=>
output
=
new
File
(
args
(
t
+
1
))
case
_
=>
}
}
if
(
input
==
null
||
!
input
.
exists
)
throw
new
IllegalStateException
(
"Input file not found, use -I"
)
if
(
output
==
null
)
throw
new
IllegalStateException
(
"Output file not found, use -o"
)
val
counts
:
Map
[
String
,
Long
]
=
Map
()
val
reader
=
new
SangerFastqReader
var
count
=
0
System
.
err
.
println
(
"Reading fastq file: "
+
input
)
val
fileReader
=
new
FileReader
(
input
)
reader
.
stream
(
fileReader
,
new
StreamListener
{
def
fastq
(
fastq
:
Fastq
)
{
val
seq
=
fastq
.
getSequence
if
(
counts
.
contains
(
seq
))
counts
(
seq
)
+=
1
else
counts
+=
(
seq
->
1
)
count
+=
1
if
(
count
%
1000000
==
0
)
System
.
err
.
println
(
count
+
" sequences done"
)
}
})
System
.
err
.
println
(
count
+
" sequences done"
)
System
.
err
.
println
(
"Sorting"
)
val
sortedCounts
:
SortedMap
[
String
,
Long
]
=
SortedMap
(
counts
.
toArray
:_
*
)
System
.
err
.
println
(
"Writting outputfile: "
+
output
)
val
writer
=
new
PrintWriter
(
output
)
for
((
seq
,
count
)
<-
sortedCounts
)
{
writer
.
println
(
seq
+
"\t"
+
count
)
}
writer
.
close
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/CreateDeepsageLibrary.scala
0 → 100644
View file @
7d8f335d
package
nl.lumc.sasc.biopet.pipelines.sage
import
java.io.File
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.biojava3.core.sequence.DNASequence
import
org.biojava3.core.sequence.io.FastaReaderHelper
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.collection.SortedMap
import
scala.collection.mutable.
{
Map
,
Set
}
import
scala.collection.JavaConversions._
class
CreateDeepsageLibrary
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input fasta"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
_
@Output
(
doc
=
"Output tag library"
,
shortName
=
"output"
,
required
=
true
)
var
output
:
File
=
_
@Output
(
doc
=
"Output no tags"
,
shortName
=
"noTagsOutput"
,
required
=
false
)
var
noTagsOutput
:
File
=
_
@Output
(
doc
=
"Output no anti tags library"
,
shortName
=
"noAntiTagsOutput"
,
required
=
false
)
var
noAntiTagsOutput
:
File
=
_
@Output
(
doc
=
"Output file all genes"
,
shortName
=
"allGenes"
,
required
=
false
)
var
allGenesOutput
:
File
=
_
var
tag
:
String
=
config
(
"tag"
,
default
=
"CATG"
)
var
length
:
Option
[
Int
]
=
config
(
"length"
,
default
=
17
)
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
4.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-I"
,
input
)
+
optional
(
"-tag"
,
tag
)
+
optional
(
"-length"
,
length
)
+
optional
(
"-notag"
,
noTagsOutput
)
+
optional
(
"-noantitag"
,
noAntiTagsOutput
)
+
required
(
"-o"
,
output
)
}
object
CreateDeepsageLibrary
{
var
tag
=
"CATG"
var
length
=
17
var
input
:
File
=
_
var
noTagsOutput
:
File
=
_
var
noAntiTagsOutput
:
File
=
_
var
allGenesOutput
:
File
=
_
var
output
:
File
=
_
lazy
val
tagRegex
=
(
tag
+
"[CATG]{"
+
length
+
"}"
).
r
var
geneRegex
=
"""ENSG[0-9]{11}"""
.
r
val
tagGenesMap
:
Map
[
String
,
TagGenes
]
=
Map
()
val
allGenes
:
Set
[
String
]
=
Set
()
val
tagGenes
:
Set
[
String
]
=
Set
()
val
antiTagGenes
:
Set
[
String
]
=
Set
()
class
TagGenes
{
val
firstTag
:
Set
[
String
]
=
Set
()
val
allTags
:
Set
[
String
]
=
Set
()
val
firstAntiTag
:
Set
[
String
]
=
Set
()
val
allAntiTags
:
Set
[
String
]
=
Set
()
}
class
TagResult
(
val
firstTag
:
String
,
val
allTags
:
List
[
String
],
val
firstAntiTag
:
String
,
val
allAntiTags
:
List
[
String
]
)
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
for
(
t
<-
0
until
args
.
size
)
{
args
(
t
)
match
{
case
"-I"
=>
input
=
new
File
(
args
(
t
+
1
))
case
"-tag"
=>
tag
=
args
(
t
+
1
)
case
"-length"
=>
length
=
args
(
t
+
1
).
toInt
case
"-o"
=>
output
=
new
File
(
args
(
t
+
1
))
case
"-notag"
=>
noTagsOutput
=
new
File
(
args
(
t
+
1
))
case
"-noantitag"
=>
noAntiTagsOutput
=
new
File
(
args
(
t
+
1
))
case
"-allgenes"
=>
allGenesOutput
=
new
File
(
args
(
t
+
1
))
case
_
=>
}
}
if
(
input
==
null
||
!
input
.
exists
)
throw
new
IllegalStateException
(
"Input file not found, use -I"
)
if
(
output
==
null
)
throw
new
IllegalStateException
(
"Output file not found, use -o"
)
var
count
=
0
System
.
err
.
println
(
"Reading fasta file"
)
val
reader
=
FastaReaderHelper
.
readFastaDNASequence
(
input
)
System
.
err
.
println
(
"Finding tags"
)
for
((
name
,
seq
)
<-
reader
)
{
getTags
(
name
,
seq
)
count
+=
1
if
(
count
%
10000
==
0
)
System
.
err
.
println
(
count
+
" transcripts done"
)
}
System
.
err
.
println
(
count
+
" transcripts done"
)
System
.
err
.
println
(
"Start sorting tags"
)
val
tagGenesMapSorted
:
SortedMap
[
String
,
TagGenes
]
=
SortedMap
(
tagGenesMap
.
toArray
:_
*
)
System
.
err
.
println
(
"Writting output files"
)
val
writer
=
new
PrintWriter
(
output
)
writer
.
println
(
"#tag\tfirstTag\tAllTags\tFirstAntiTag\tAllAntiTags"
)
for
((
tag
,
genes
)
<-
tagGenesMapSorted
)
{
val
line
=
tag
+
"\t"
+
genes
.
firstTag
.
mkString
(
","
)
+
"\t"
+
genes
.
allTags
.
mkString
(
","
)
+
"\t"
+
genes
.
firstAntiTag
.
mkString
(
","
)
+
"\t"
+
genes
.
allAntiTags
.
mkString
(
","
)
writer
.
println
(
line
)
}
writer
.
close
()
if
(
noTagsOutput
!=
null
)
{
val
writer
=
new
PrintWriter
(
noTagsOutput
)
for
(
gene
<-
allGenes
if
!
tagGenes
.
contains
(
gene
))
{
writer
.
println
(
gene
)
}
writer
.
close
}
if
(
noAntiTagsOutput
!=
null
)
{
val
writer
=
new
PrintWriter
(
noAntiTagsOutput
)
for
(
gene
<-
allGenes
if
!
antiTagGenes
.
contains
(
gene
))
{
writer
.
println
(
gene
)
}
writer
.
close
}
if
(
allGenesOutput
!=
null
)
{
val
writer
=
new
PrintWriter
(
allGenesOutput
)
for
(
gene
<-
allGenes
)
{
writer
.
println
(
gene
)
}
writer
.
close
}
}
def
addTagresultToTaglib
(
name
:
String
,
tagResult
:
TagResult
)
{
val
id
=
name
.
split
(
" "
).
head
//.stripPrefix("hg19_ensGene_")
val
geneID
=
geneRegex
.
findFirstIn
(
name
).
getOrElse
(
"unknown_gene"
)
allGenes
.
add
(
geneID
)
if
(
tagResult
.
firstTag
!=
null
)
{
if
(!
tagGenesMap
.
contains
(
tagResult
.
firstTag
))
tagGenesMap
+=
(
tagResult
.
firstTag
->
new
TagGenes
)
tagGenesMap
(
tagResult
.
firstTag
).
firstTag
.
add
(
geneID
)
tagGenes
.
add
(
geneID
)
}
for
(
tag
<-
tagResult
.
allTags
)
{
if
(!
tagGenesMap
.
contains
(
tag
))
tagGenesMap
+=
(
tag
->
new
TagGenes
)
tagGenesMap
(
tag
).
allTags
.
add
(
geneID
)
}
if
(
tagResult
.
firstAntiTag
!=
null
)
{
if
(!
tagGenesMap
.
contains
(
tagResult
.
firstAntiTag
))
tagGenesMap
+=
(
tagResult
.
firstAntiTag
->
new
TagGenes
)
tagGenesMap
(
tagResult
.
firstAntiTag
).
firstAntiTag
.
add
(
geneID
)
antiTagGenes
.
add
(
geneID
)
}
for
(
tag
<-
tagResult
.
allAntiTags
)
{
if
(!
tagGenesMap
.
contains
(
tag
))
tagGenesMap
+=
(
tag
->
new
TagGenes
)
tagGenesMap
(
tag
).
allAntiTags
.
add
(
geneID
)
}
}
def
getTags
(
name
:
String
,
seq
:
DNASequence
)
:
TagResult
=
{
val
allTags
:
List
[
String
]
=
for
(
tag
<-
tagRegex
.
findAllMatchIn
(
seq
.
getSequenceAsString
).
toList
)
yield
tag
.
toString
val
firstTag
=
if
(
allTags
.
isEmpty
)
null
else
allTags
.
last
val
allAntiTags
:
List
[
String
]
=
for
(
tag
<-
tagRegex
.
findAllMatchIn
(
seq
.
getReverseComplement
.
getSequenceAsString
).
toList
)
yield
tag
.
toString
val
firstAntiTag
=
if
(
allAntiTags
.
isEmpty
)
null
else
allAntiTags
.
head
val
result
=
new
TagResult
(
firstTag
,
allTags
,
firstAntiTag
,
allAntiTags
)
addTagresultToTaglib
(
name
,
result
)
return
result
}
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/CreateTagCounts.scala
0 → 100644
View file @
7d8f335d
package
nl.lumc.sasc.biopet.pipelines.sage
import
java.io.File
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.io.Source
import
scala.collection.mutable.Map
import
scala.collection.SortedMap
class
CreateTagCounts
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Raw count file"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
_
@Input
(
doc
=
"tag library"
,
shortName
=
"taglib"
,
required
=
true
)
var
tagLib
:
File
=
_
@Output
(
doc
=
"Sense count file"
,
shortName
=
"sense"
,
required
=
true
)
var
countSense
:
File
=
_
@Output
(
doc
=
"Sense all coun filet"
,
shortName
=
"allsense"
,
required
=
true
)
var
countAllSense
:
File
=
_
@Output
(
doc
=
"AntiSense count file"
,
shortName
=
"antisense"
,
required
=
true
)
var
countAntiSense
:
File
=
_
@Output
(
doc
=
"AntiSense all count file"
,
shortName
=
"allantisense"
,
required
=
true
)
var
countAllAntiSense
:
File
=
_
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
4.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-I"
,
input
)
+
required
(
"-taglib"
,
tagLib
)
+
optional
(
"-sense"
,
countSense
)
+
optional
(
"-allsense"
,
countAllSense
)
+
optional
(
"-antisense"
,
countAntiSense
)
+
optional
(
"-allantisense"
,
countAllAntiSense
)
}
object
CreateTagCounts
{
var
input
:
File
=
_
var
tagLib
:
File
=
_
var
countSense
:
File
=
_
var
countAllSense
:
File
=
_
var
countAntiSense
:
File
=
_
var
countAllAntiSense
:
File
=
_
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
for
(
t
<-
0
until
args
.
size
)
{
args
(
t
)
match
{
case
"-I"
=>
input
=
new
File
(
args
(
t
+
1
))
case
"-taglib"
=>
tagLib
=
new
File
(
args
(
t
+
1
))
case
"-sense"
=>
countSense
=
new
File
(
args
(
t
+
1
))
case
"-allsense"
=>
countAllSense
=
new
File
(
args
(
t
+
1
))
case
"-antisense"
=>
countAntiSense
=
new
File
(
args
(
t
+
1
))
case
"-allantisense"
=>
countAllAntiSense
=
new
File
(
args
(
t
+
1
))
case
_
=>
}
}
if
(
input
==
null
||
!
input
.
exists
)
throw
new
IllegalStateException
(
"Input file not found, use -I"
)
if
(
tagLib
==
null
)
throw
new
IllegalStateException
(
"Output file not found, use -o"
)
val
rawCounts
:
Map
[
String
,
Long
]
=
Map
()
for
(
line
<-
Source
.
fromFile
(
input
).
getLines
)
{
val
values
=
line
.
split
(
"\t"
)
val
gene
=
values
(
0
)
val
count
=
values
(
1
).
toLong
if
(
rawCounts
.
contains
(
gene
))
rawCounts
(
gene
)
+=
count
else
rawCounts
+=
gene
->
count
}
val
senseCounts
:
Map
[
String
,
Long
]
=
Map
()
val
allSenseCounts
:
Map
[
String
,
Long
]
=
Map
()
val
antiSenseCounts
:
Map
[
String
,
Long
]
=
Map
()
val
allAntiSenseCounts
:
Map
[
String
,
Long
]
=
Map
()
for
(
line
<-
Source
.
fromFile
(
tagLib
).
getLines
if
!
line
.
startsWith
(
"#"
))
{
val
values
=
line
.
split
(
"\t"
)
val
tag
=
values
(
0
)
val
sense
=
values
(
1
)
val
allSense
=
values
(
2
)
val
antiSense
=
if
(
values
.
size
>
3
)
values
(
3
)
else
""
val
allAntiSense
=
if
(
values
.
size
>
4
)
values
(
4
)
else
""
if
(!
sense
.
isEmpty
&&
!
sense
.
contains
(
","
))
{
val
count
=
if
(
rawCounts
.
contains
(
tag
))
rawCounts
(
tag
)
else
0
if
(
senseCounts
.
contains
(
sense
))
senseCounts
(
sense
)
+=
count
else
senseCounts
+=
sense
->
count
}
if
(!
allSense
.
isEmpty
&&
!
allSense
.
contains
(
","
))
{
val
count
=
if
(
rawCounts
.
contains
(
tag
))
rawCounts
(
tag
)
else
0
if
(
allSenseCounts
.
contains
(
allSense
))
allSenseCounts
(
allSense
)
+=
count
else
allSenseCounts
+=
allSense
->
count
}
if
(!
antiSense
.
isEmpty
&&
!
antiSense
.
contains
(
","
))
{
val
count
=
if
(
rawCounts
.
contains
(
tag
))
rawCounts
(
tag
)
else
0
if
(
antiSenseCounts
.
contains
(
antiSense
))
antiSenseCounts
(
antiSense
)
+=
count
else
antiSenseCounts
+=
antiSense
->
count
}
if
(!
allAntiSense
.
isEmpty
&&
!
allAntiSense
.
contains
(
","
))
{
val
count
=
if
(
rawCounts
.
contains
(
tag
))
rawCounts
(
tag
)
else
0
if
(
allAntiSenseCounts
.
contains
(
allAntiSense
))
allAntiSenseCounts
(
allAntiSense
)
+=
count
else
allAntiSenseCounts
+=
allAntiSense
->
count
}
}
def
writeFile
(
file
:
File
,
counts
:
Map
[
String
,
Long
])
{
val
sorted
:
SortedMap
[
String
,
Long
]
=
SortedMap
(
counts
.
toArray
:_
*
)
if
(
file
!=
null
)
{
val
writer
=
new
PrintWriter
(
file
)
for
((
gene
,
count
)
<-
sorted
)
{
if
(
count
>
0
)
writer
.
println
(
gene
+
"\t"
+
count
)
}
writer
.
close
}
}
writeFile
(
countSense
,
senseCounts
)
writeFile
(
countAllSense
,
allSenseCounts
)
writeFile
(
countAntiSense
,
antiSenseCounts
)
writeFile
(
countAllAntiSense
,
allAntiSenseCounts
)
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala
View file @
7d8f335d
...
...
@@ -21,9 +21,22 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
@Input
(
doc
=
"squishedCountBed, by suppling this file the auto squish job will be skipped"
,
required
=
false
)
var
squishedCountBed
:
File
=
_
@Input
(
doc
=
"Transcriptome, used for generation of tag library"
,
required
=
false
)
var
transcriptome
:
File
=
_
var
tagsLibrary
:
File
=
_
def
init
()
{
for
(
file
<-
configfiles
)
globalConfig
.
loadConfigFile
(
file
)
if
(
countBed
==
null
&&
squishedCountBed
==
null
)
throw
new
IllegalStateException
(
"No bedfile supplied, please add a countBed or squishedCountBed"
)
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
if
(
countBed
==
null
)
countBed
=
config
(
"count_bed"
)
if
(
squishedCountBed
==
null
)
squishedCountBed
=
config
(
"squished_count_bed"
)
if
(
tagsLibrary
==
null
)
tagsLibrary
=
config
(
"tags_library"
)
if
(
transcriptome
==
null
)
transcriptome
=
config
(
"transcriptome"
)
if
(
transcriptome
==
null
&&
tagsLibrary
==
null
)
throw
new
IllegalStateException
(
"No transcriptome or taglib found"
)
if
(
countBed
==
null
&&
squishedCountBed
==
null
)
throw
new
IllegalStateException
(
"No bedfile supplied, please add a countBed or squishedCountBed"
)
}
def
biopetScript
()
{
...
...
@@ -33,7 +46,16 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
squishedCountBed
=
squishBed
.
output
}
// Tag library creation
if
(
tagsLibrary
==
null
)
{
val
cdl
=
new
CreateDeepsageLibrary
(
this
)
cdl
.
input
=
transcriptome
cdl
.
output
=
outputDir
+
"tablib/tab.lib"
cdl
.
noAntiTagsOutput
=
outputDir
+
"tablib/no_antisense_genes.txt"
cdl
.
noTagsOutput
=
outputDir
+
"tablib/no_sense_genes.txt"
cdl
.
allGenesOutput
=
outputDir
+
"tablib/all_genes.txt"
add
(
cdl
)
tagsLibrary
=
cdl
.
output
}
runSamplesJobs
}
...
...
@@ -56,14 +78,15 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
add
(
mergeSamFiles
)
mergeSamFiles
.
output
}
else
null
val
fastqFile
:
File
=
if
(
library
Bamf
iles
.
size
==
1
)
library
Bamf
iles
.
head
else
if
(
library
Bamf
iles
.
size
>
1
)
{
val
cat
=
Cat
.
apply
(
this
,
library
Bamf
iles
,
sampleDir
+
sampleID
+
".fastq"
)
val
fastqFile
:
File
=
if
(
library
FastqF
iles
.
size
==
1
)
library
FastqF
iles
.
head
else
if
(
library
FastqF
iles
.
size
>
1
)
{
val
cat
=
Cat
.
apply
(
this
,
library
FastqF
iles
,
sampleDir
+
sampleID
+
".fastq"
)
add
(
cat
)
cat
.
output
}
else
null
this
.
addBedtoolsCounts
(
bamFile
,
sampleID
,
sampleDir
)
addBedtoolsCounts
(
bamFile
,
sampleID
,
sampleDir
)
addTablibCounts
(
fastqFile
,
sampleID
,
sampleDir
)
return
outputFiles
}
...
...
@@ -109,9 +132,8 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
addAll
(
mapping
.
functions
)
if
(
config
(
"library_counts"
,
default
=
false
).
getBoolean
)
{
this
.
addBedtoolsCounts
(
mapping
.
outputFiles
(
"finalBamFile"
),
sampleID
+
"-"
+
runID
,
runDir
)
// TODO: Tag counts
addBedtoolsCounts
(
mapping
.
outputFiles
(
"finalBamFile"
),
sampleID
+
"-"
+
runID
,
runDir
)
addTablibCounts
(
prefixFastq
.
output
,
sampleID
+
"-"
+
runID
,
runDir
)
}
outputFiles
+=
(
"FinalBam"
->
mapping
.
outputFiles
(
"finalBamFile"
))
...
...
@@ -120,13 +142,29 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
def
addBedtoolsCounts
(
bamFile
:
File
,
outputPrefix
:
String
,
outputDir
:
String
)
{
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".sense.count"
,
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".
genome.
sense.count
s
"
,
depth
=
false
,
sameStrand
=
true
,
diffStrand
=
false
))
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".antisense.count"
,
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".
genome.
antisense.count
s
"
,
depth
=
false
,
sameStrand
=
false
,
diffStrand
=
true
))
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".count"
,
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".
genome.
count
s
"
,
depth
=
false
,
sameStrand
=
false
,
diffStrand
=
false
))
}
def
addTablibCounts
(
fastq
:
File
,
outputPrefix
:
String
,
outputDir
:
String
)
{
val
countFastq
=
new
CountFastq
(
this
)
countFastq
.
input
=
fastq
countFastq
.
output
=
outputDir
+
outputPrefix
+
".raw.counts"
add
(
countFastq
)
val
createTagCounts
=
new
CreateTagCounts
(
this
)
createTagCounts
.
input
=
countFastq
.
output
createTagCounts
.
tagLib
=
tagsLibrary
createTagCounts
.
countSense
=
outputDir
+
outputPrefix
+
".tagcount.sense.counts"
createTagCounts
.
countAllSense
=
outputDir
+
outputPrefix
+
".tagcount.all.sense.counts"
createTagCounts
.
countAntiSense
=
outputDir
+
outputPrefix
+
".tagcount.antisense.counts"
createTagCounts
.
countAllAntiSense
=
outputDir
+
outputPrefix
+
".tagcount.all.antisense.counts"
add
(
createTagCounts
)
}
}
object
Sage
extends
PipelineCommand
{
...
...
Write
Preview