Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
01f3bf84
Commit
01f3bf84
authored
Feb 11, 2016
by
Wai Yi Leung
Browse files
Merge from develop
parents
e1042d1d
2ab5616f
Changes
66
Expand all
Hide whitespace changes
Inline
Side-by-side
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala
View file @
01f3bf84
...
...
@@ -75,7 +75,7 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
}
}
override
def
keepMergedFiles
:
Boolean
=
config
(
"keep_merged_files"
,
default
=
false
)
override
def
keepMergedFiles
:
Boolean
=
config
(
"keep_merged_files"
,
default
=
!
useIndelRealigner
)
override
def
summarySettings
=
super
.
summarySettings
+
(
"use_indel_realigner"
->
useIndelRealigner
)
...
...
public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
View file @
01f3bf84
...
...
@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import
java.io.File
import
nl.lumc.sasc.biopet.core.annotations.
{
RibosomalRefFlat
,
AnnotationRefFlat
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.core.
{
Reference
,
BiopetFifoPipe
,
PipelineCommand
,
SampleLibraryTag
}
...
...
@@ -31,16 +32,15 @@ class BamMetrics(val root: Configurable) extends QScript
with
SummaryQScript
with
SampleLibraryTag
with
Reference
with
TargetRegions
{
with
TargetRegions
with
AnnotationRefFlat
with
RibosomalRefFlat
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Bam File"
,
shortName
=
"BAM"
,
required
=
true
)
var
inputBam
:
File
=
_
/** Settings for CollectRnaSeqMetrics */
var
transcriptRefFlatFile
:
Option
[
File
]
=
config
(
"transcript_refflat"
)
/** return location of summary file */
def
summaryFile
=
(
sampleId
,
libId
)
match
{
case
(
Some
(
s
),
Some
(
l
))
=>
new
File
(
outputDir
,
s
+
"-"
+
l
+
".BamMetrics.summary.json"
)
...
...
@@ -92,7 +92,7 @@ class BamMetrics(val root: Configurable) extends QScript
add
(
gcBiasMetrics
)
addSummarizable
(
gcBiasMetrics
,
"gc_bias"
)
if
(
transcriptRefFlatFile
.
isEmpty
)
{
if
(
config
(
"wgs_metrics"
,
default
=
true
)
)
{
val
wgsMetrics
=
new
CollectWgsMetrics
(
this
)
wgsMetrics
.
input
=
inputBam
wgsMetrics
.
output
=
swapExt
(
outputDir
,
inputBam
,
".bam"
,
".wgs.metrics"
)
...
...
@@ -100,12 +100,13 @@ class BamMetrics(val root: Configurable) extends QScript
addSummarizable
(
wgsMetrics
,
"wgs"
)
}
if
(
transcriptRefFlatFile
.
isDefined
)
{
if
(
config
(
"rna_metrics"
,
default
=
false
)
)
{
val
rnaMetrics
=
new
CollectRnaSeqMetrics
(
this
)
rnaMetrics
.
input
=
inputBam
rnaMetrics
.
output
=
swapExt
(
outputDir
,
inputBam
,
".bam"
,
".rna.metrics"
)
rnaMetrics
.
chartOutput
=
Some
(
swapExt
(
outputDir
,
inputBam
,
".bam"
,
".rna.metrics.pdf"
))
rnaMetrics
.
refFlat
=
transcriptRefFlatFile
.
get
rnaMetrics
.
refFlat
=
annotationRefFlat
()
rnaMetrics
.
ribosomalIntervals
=
ribosomalRefFlat
()
add
(
rnaMetrics
)
addSummarizable
(
rnaMetrics
,
"rna"
)
}
...
...
public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala
View file @
01f3bf84
...
...
@@ -61,6 +61,12 @@ object BammetricsReport extends ReportBuilder {
val
wgsExecuted
=
summary
.
getValue
(
sampleId
,
libId
,
metricsTag
,
"stats"
,
"wgs"
).
isDefined
val
rnaExecuted
=
summary
.
getValue
(
sampleId
,
libId
,
metricsTag
,
"stats"
,
"rna"
).
isDefined
val
insertsizeMetrics
=
summary
.
getValue
(
sampleId
,
libId
,
metricsTag
,
"stats"
,
"CollectInsertSizeMetrics"
,
"metrics"
)
match
{
case
Some
(
None
)
=>
false
case
Some
(
_
)
=>
true
case
_
=>
false
}
val
targets
=
(
summary
.
getValue
(
sampleId
,
libId
,
metricsTag
,
"settings"
,
"amplicon_name"
),
summary
.
getValue
(
sampleId
,
libId
,
metricsTag
,
"settings"
,
"roi_name"
)
...
...
@@ -77,11 +83,10 @@ object BammetricsReport extends ReportBuilder {
targets
.
map
(
t
=>
t
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp"
,
Map
(
"target"
->
Some
(
t
)))),
Map
())),
List
(
"Summary"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"
),
// FIXME: the insert size block, should only displayed when we have paired end data.
"Insert Size"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp"
,
Map
(
"showPlot"
->
true
))
)
++
(
if
(
wgsExecuted
)
List
(
"Whole genome coverage"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp"
,
"Summary"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"
))
++
(
if
(
insertsizeMetrics
)
List
(
"Insert Size"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp"
,
Map
(
"showPlot"
->
true
))
)
else
Nil
)
++
(
if
(
wgsExecuted
)
List
(
"Whole genome coverage"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp"
,
Map
(
"showPlot"
->
true
)))
else
Nil
)
++
(
if
(
rnaExecuted
)
List
(
"Rna coverage"
->
ReportSection
(
"/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp"
,
...
...
public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala
View file @
01f3bf84
...
...
@@ -50,22 +50,22 @@ class BamMetricsTest extends TestNGSuite with Matchers {
@DataProvider
(
name
=
"bammetricsOptions"
)
def
bammetricsOptions
=
{
val
rois
=
Array
(
0
,
1
,
2
,
3
)
val
amplicon
=
Array
(
true
,
false
)
val
rna
=
Array
(
true
,
false
)
val
bool
=
Array
(
true
,
false
)
for
(
rois
<-
rois
;
amplicon
<-
amplicon
;
rna
<-
rna
)
yield
Array
(
rois
,
amplicon
,
rna
)
amplicon
<-
bool
;
rna
<-
bool
;
wgs
<-
bool
)
yield
Array
(
rois
,
amplicon
,
rna
,
wgs
)
}
@Test
(
dataProvider
=
"bammetricsOptions"
)
def
testBamMetrics
(
rois
:
Int
,
amplicon
:
Boolean
,
rna
:
Boolean
)
=
{
val
map
=
ConfigUtils
.
mergeMaps
(
Map
(
"output_dir"
->
BamMetricsTest
.
outputDir
),
def
testBamMetrics
(
rois
:
Int
,
amplicon
:
Boolean
,
rna
:
Boolean
,
wgs
:
Boolean
)
=
{
val
map
=
ConfigUtils
.
mergeMaps
(
Map
(
"output_dir"
->
BamMetricsTest
.
outputDir
,
"rna_metrics"
->
rna
,
"wgs_metrics"
->
wgs
),
Map
(
BamMetricsTest
.
executables
.
toSeq
:
_
*
))
++
(
if
(
amplicon
)
Map
(
"amplicon_bed"
->
"amplicon.bed"
)
else
Map
())
++
(
if
(
rna
)
Map
(
"
transcript
_refflat"
->
"transcripts.refFlat"
)
else
Map
())
++
(
if
(
rna
)
Map
(
"
annotation
_refflat"
->
"transcripts.refFlat"
)
else
Map
())
++
Map
(
"regions_of_interest"
->
(
1
to
rois
).
map
(
"roi_"
+
_
+
".bed"
).
toList
)
val
bammetrics
:
BamMetrics
=
initPipeline
(
map
)
...
...
@@ -77,7 +77,7 @@ class BamMetricsTest extends TestNGSuite with Matchers {
var
regions
:
Int
=
rois
+
(
if
(
amplicon
)
1
else
0
)
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CollectRnaSeqMetrics
])
shouldBe
(
if
(
rna
)
1
else
0
)
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CollectWgsMetrics
])
shouldBe
(
if
(
rna
)
0
else
1
)
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CollectWgsMetrics
])
shouldBe
(
if
(
wgs
)
1
else
0
)
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CollectMultipleMetrics
])
shouldBe
1
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CalculateHsMetrics
])
shouldBe
(
if
(
amplicon
)
1
else
0
)
bammetrics
.
functions
.
count
(
_
.
isInstanceOf
[
CollectTargetedPcrMetrics
])
shouldBe
(
if
(
amplicon
)
1
else
0
)
...
...
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
View file @
01f3bf84
...
...
@@ -103,7 +103,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
functions
.
filter
(
_
.
jobOutputFile
==
null
).
foreach
(
f
=>
{
try
{
f
.
jobOutputFile
=
new
File
(
f
.
firstOutput
.
getAbsoluteFile
.
getParent
,
"."
+
f
.
firstOutput
.
getName
+
"."
+
config
Name
+
".out"
)
f
.
jobOutputFile
=
new
File
(
f
.
firstOutput
.
getAbsoluteFile
.
getParent
,
"."
+
f
.
firstOutput
.
getName
+
"."
+
f
.
getClass
.
getSimple
Name
+
".out"
)
}
catch
{
case
e
:
NullPointerException
=>
logger
.
warn
(
s
"Can't generate a jobOutputFile for $f"
)
}
...
...
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala
View file @
01f3bf84
...
...
@@ -71,7 +71,7 @@ trait Reference extends Configurable {
val
file
:
File
=
config
(
"reference_fasta"
)
checkFasta
(
file
)
val
dict
=
new
File
(
file
.
getAbsolutePath
.
stripSuffix
(
".fa"
).
stripSuffix
(
".fasta"
)
+
".dict"
)
val
dict
=
new
File
(
file
.
getAbsolutePath
.
stripSuffix
(
".fa"
).
stripSuffix
(
".fasta"
)
.
stripSuffix
(
".fna"
)
+
".dict"
)
val
fai
=
new
File
(
file
.
getAbsolutePath
+
".fai"
)
this
match
{
...
...
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/annotations/Annotations.scala
0 → 100644
View file @
01f3bf84
package
nl.lumc.sasc.biopet.core.annotations
import
nl.lumc.sasc.biopet.core.BiopetQScript
import
nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
import
nl.lumc.sasc.biopet.utils.LazyCheck
import
org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 1/12/16.
*/
trait
AnnotationGtf
extends
BiopetQScript
{
qscript
:
QScript
=>
/** GTF reference file */
lazy
val
annotationGtf
:
File
=
{
val
file
:
File
=
config
(
"annotation_gtf"
,
freeVar
=
true
)
inputFiles
:+
InputFile
(
file
,
config
(
"annotation_gtf_md5"
,
freeVar
=
true
))
file
}
}
trait
AnnotationRefFlat
extends
BiopetQScript
{
qscript
:
QScript
=>
/** GTF reference file */
lazy
val
annotationRefFlat
=
new
LazyCheck
({
val
file
:
File
=
config
(
"annotation_refflat"
,
freeVar
=
true
)
inputFiles
:+
InputFile
(
file
,
config
(
"annotation_refflat_md5"
,
freeVar
=
true
))
file
})
}
trait
RibosomalRefFlat
extends
BiopetQScript
{
qscript
:
QScript
=>
/** GTF reference file */
lazy
val
ribosomalRefFlat
=
new
LazyCheck
({
val
file
:
Option
[
File
]
=
config
(
"ribosome_refflat"
,
freeVar
=
true
)
file
match
{
case
Some
(
f
)
=>
inputFiles
:+
InputFile
(
f
,
config
(
"ribosome_refflat_md5"
,
freeVar
=
true
))
case
_
=>
}
file
})
}
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala
View file @
01f3bf84
...
...
@@ -15,6 +15,8 @@
*/
package
nl.lumc.sasc.biopet.core.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.utils.rscript.Rscript
...
...
@@ -28,7 +30,7 @@ trait RscriptCommandLineFunction extends BiopetCommandLineFunction with Rscript
executable
=
rscriptExecutable
override
def
beforeGraph
()
:
Unit
=
{
checkScript
(
Some
(
jobTempDir
))
checkScript
(
Some
(
new
File
(
".queue"
+
File
.
separator
+
"tmp"
)
))
}
def
cmdLine
:
String
=
repeat
(
cmd
)
...
...
public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py
View file @
01f3bf84
...
...
@@ -21,6 +21,9 @@ from __future__ import print_function
__author__
=
"Wai Yi Leung"
import
sys
import
re
upacPatern
=
re
.
compile
(
r
'[RYKMSWBDHV]'
)
if
__name__
==
"__main__"
:
"""
...
...
@@ -46,4 +49,5 @@ if __name__ == "__main__":
if
new_size
==
0
:
l
[
5
]
=
""
l
[
2
]
=
upacPatern
.
sub
(
"N"
,
l
[
2
])
print
(
"
\t
"
.
join
(
map
(
str
,
l
)))
public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala
View file @
01f3bf84
...
...
@@ -36,7 +36,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var
refFlat
:
File
=
null
@Input
(
doc
=
"Location of rRNA sequences in interval list format"
,
required
=
false
)
var
ribosomalIntervals
:
Option
[
File
]
=
c
on
fig
(
"ribosomal_intervals"
)
var
ribosomalIntervals
:
Option
[
File
]
=
N
on
e
@Output
(
doc
=
"Output metrics file"
,
required
=
true
)
var
output
:
File
=
null
...
...
public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala
0 → 100644
View file @
01f3bf84
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.tools
import
java.io.
{
PrintWriter
,
File
}
import
nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
*
*/
class
BaseCounter
(
val
root
:
Configurable
)
extends
ToolCommandFunction
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
BaseCounter
@Input
(
doc
=
"Input Bed file"
,
required
=
true
)
var
refFlat
:
File
=
_
@Input
(
doc
=
"Bam File"
,
required
=
true
)
var
bamFile
:
File
=
_
var
outputDir
:
File
=
_
var
prefix
:
String
=
"output"
override
def
defaultCoreMemory
=
3.0
override
def
defaultThreads
=
4
def
transcriptTotalCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.counts"
)
def
transcriptTotalSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.sense.counts"
)
def
transcriptTotalAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.antisense.counts"
)
def
transcriptExonicCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.exonic.counts"
)
def
transcriptExonicSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.exonic.sense.counts"
)
def
transcriptExonicAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.exonic.antisense.counts"
)
def
transcriptIntronicCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.intronic.counts"
)
def
transcriptIntronicSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.intronic.sense.counts"
)
def
transcriptIntronicAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.transcript.intronic.antisense.counts"
)
def
exonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.counts"
)
def
exonSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.sense.counts"
)
def
exonAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.antisense.counts"
)
def
intronCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.counts"
)
def
intronSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.sense.counts"
)
def
intronAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.antisense.counts"
)
def
geneTotalCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.counts"
)
def
geneTotalSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.sense.counts"
)
def
geneTotalAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.antisense.counts"
)
def
geneExonicCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.exonic.counts"
)
def
geneExonicSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.exonic.sense.counts"
)
def
geneExonicAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.exonic.antisense.counts"
)
def
geneIntronicCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.intronic.counts"
)
def
geneIntronicSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.intronic.sense.counts"
)
def
geneIntronicAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.gene.intronic.antisense.counts"
)
def
mergeExonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.merge.counts"
)
def
mergeExonSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.merge.sense.counts"
)
def
mergeExonAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.exon.merge.antisense.counts"
)
def
mergeIntronCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.merge.counts"
)
def
mergeIntronSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.merge.sense.counts"
)
def
mergeIntronAntiSenseCounts
=
new
File
(
outputDir
,
s
"$prefix.base.intron.merge.antisense.counts"
)
def
nonStrandedMetaExonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.metaexons.non_stranded.counts"
)
def
strandedMetaExonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.metaexons.stranded.counts"
)
def
strandedSenseMetaExonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.metaexons.stranded.sense.counts"
)
def
strandedAntiSenseMetaExonCounts
=
new
File
(
outputDir
,
s
"$prefix.base.metaexons.stranded.antisense.counts"
)
override
def
beforeGraph
()
:
Unit
=
{
outputFiles
++=
List
(
transcriptTotalCounts
,
transcriptTotalSenseCounts
,
transcriptTotalAntiSenseCounts
,
transcriptExonicCounts
,
transcriptExonicSenseCounts
,
transcriptExonicAntiSenseCounts
,
transcriptIntronicCounts
,
transcriptIntronicSenseCounts
,
transcriptIntronicAntiSenseCounts
,
exonCounts
,
exonSenseCounts
,
exonAntiSenseCounts
,
intronCounts
,
intronSenseCounts
,
intronAntiSenseCounts
,
geneTotalCounts
,
geneTotalSenseCounts
,
geneTotalAntiSenseCounts
,
geneExonicCounts
,
geneExonicSenseCounts
,
geneExonicAntiSenseCounts
,
geneIntronicCounts
,
geneIntronicSenseCounts
,
geneIntronicAntiSenseCounts
,
mergeExonCounts
,
mergeExonSenseCounts
,
mergeExonAntiSenseCounts
,
mergeIntronCounts
,
mergeIntronSenseCounts
,
mergeIntronAntiSenseCounts
,
nonStrandedMetaExonCounts
,
strandedMetaExonCounts
,
strandedSenseMetaExonCounts
,
strandedAntiSenseMetaExonCounts
)
jobOutputFile
=
new
File
(
outputDir
,
s
".$prefix.basecounter.out"
)
super
.
beforeGraph
()
}
override
def
cmdLine
=
super
.
cmdLine
+
required
(
"--refFlat"
,
refFlat
)
+
required
(
"-b"
,
bamFile
)
+
required
(
"-o"
,
outputDir
)
+
optional
(
"--prefix"
,
prefix
)
}
public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala
View file @
01f3bf84
...
...
@@ -77,3 +77,24 @@ class MergeTables(val root: Configurable) extends ToolCommandFunction {
required
(
"-o"
,
output
)
+
required
(
""
,
repeat
(
inputTables
),
escape
=
false
)
}
object
MergeTables
{
def
apply
(
root
:
Configurable
,
tables
:
List
[
File
],
outputFile
:
File
,
idCols
:
List
[
Int
],
valCol
:
Int
,
numHeaderLines
:
Int
=
0
,
fallback
:
String
=
"-"
,
fileExtension
:
Option
[
String
]
=
None
)
:
MergeTables
=
{
val
job
=
new
MergeTables
(
root
)
job
.
inputTables
=
tables
job
.
output
=
outputFile
job
.
idColumnIndices
=
idCols
.
map
(
_
.
toString
)
job
.
valueColumnIndex
=
valCol
job
.
fallbackString
=
Option
(
fallback
)
job
.
numHeaderLines
=
Option
(
numHeaderLines
)
job
.
fileExtension
=
fileExtension
job
}
}
\ No newline at end of file
public/biopet-tools/pom.xml
View file @
01f3bf84
...
...
@@ -70,5 +70,10 @@
<artifactId>
biojava3-sequencing
</artifactId>
<version>
3.1.0
</version>
</dependency>
<dependency>
<groupId>
com.github.broadinstitute
</groupId>
<artifactId>
picard
</artifactId>
<version>
1.141
</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala
0 → 100644
View file @
01f3bf84
This diff is collapsed.
Click to expand it.
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala
View file @
01f3bf84
...
...
@@ -108,6 +108,7 @@ object MergeTables extends ToolCommand {
idColumnIndices
:
Seq
[
Int
]
=
Seq
.
empty
[
Int
],
valueColumnIndex
:
Int
=
-
1
,
fileExtension
:
String
=
""
,
columnNames
:
Option
[
Seq
[
String
]]
=
None
,
numHeaderLines
:
Int
=
0
,
fallbackString
:
String
=
"-"
,
delimiter
:
Char
=
'\t'
,
...
...
@@ -146,6 +147,10 @@ object MergeTables extends ToolCommand {
c
.
copy
(
idColumnName
=
x
)
}
text
"Name of feature ID column in the output merged file (default: feature)"
opt
[
String
](
'N'
,
"column_names"
)
optional
()
valueName
"<name>"
action
{
(
x
,
c
)
=>
c
.
copy
(
columnNames
=
Some
(
x
.
split
(
","
)))
}
text
"Name of feature ID column in the output merged file (default: feature)"
opt
[
String
](
'e'
,
"strip_extension"
)
optional
()
valueName
"<ext>"
action
{
(
x
,
c
)
=>
c
.
copy
(
fileExtension
=
x
)
}
text
"Common extension of all input tables to strip (default: empty string)"
...
...
@@ -186,10 +191,17 @@ object MergeTables extends ToolCommand {
.
getOrElse
(
sys
.
exit
(
1
))
/** Transforms the input file seq into a seq of [[InputTable]] objects */
def
prepInput
(
inFiles
:
Seq
[
File
],
ext
:
String
=
""
)
:
Seq
[
InputTable
]
=
{
require
(
inFiles
.
map
(
_
.
getName
.
stripSuffix
(
ext
)).
distinct
.
size
==
inFiles
.
size
,
"Duplicate samples exist in inputs"
)
inFiles
.
map
(
tableFile
=>
InputTable
(
tableFile
.
getName
.
stripSuffix
(
ext
),
Source
.
fromFile
(
tableFile
)))
def
prepInput
(
inFiles
:
Seq
[
File
],
ext
:
String
,
columnNames
:
Option
[
Seq
[
String
]])
:
Seq
[
InputTable
]
=
{
(
ext
,
columnNames
)
match
{
case
(
_
,
Some
(
names
))
=>
require
(
names
.
size
==
inFiles
.
size
,
"columnNames are not the same number as input Files"
)
names
.
zip
(
inFiles
).
map
{
case
(
name
,
tableFile
)
=>
InputTable
(
name
,
Source
.
fromFile
(
tableFile
))
}
case
_
=>
require
(
inFiles
.
map
(
_
.
getName
.
stripSuffix
(
ext
)).
distinct
.
size
==
inFiles
.
size
,
"Duplicate samples exist in inputs"
)
inFiles
.
map
(
tableFile
=>
InputTable
(
tableFile
.
getName
.
stripSuffix
(
ext
),
Source
.
fromFile
(
tableFile
)))
}
}
/** Creates the output writer object */
...
...
@@ -205,7 +217,7 @@ object MergeTables extends ToolCommand {
import
commandArgs._
val
outStream
=
prepOutput
(
out
)
val
merged
=
mergeTables
(
prepInput
(
inputTables
,
fileExtension
),
val
merged
=
mergeTables
(
prepInput
(
inputTables
,
fileExtension
,
columnNames
),
idColumnIndices
,
valueColumnIndex
,
numHeaderLines
,
delimiter
)
writeOutput
(
merged
,
outStream
,
fallbackString
,
idColumnName
)
outStream
.
close
()
...
...
public/biopet-tools/src/test/resources/chrQ.refflat
0 → 100644
View file @
01f3bf84
geneA t1 chrQ + 200 500 225 475 3 200,300,400 250,350,500
public/biopet-tools/src/test/resources/empty.bai
0 → 100644
View file @
01f3bf84
File added
public/biopet-tools/src/test/resources/empty.bam
0 → 100644
View file @
01f3bf84
File added
public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BaseCounterTest.scala
0 → 100644
View file @
01f3bf84
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.nio.file.Paths
import
com.google.common.io.Files
import
htsjdk.samtools.
{
SAMReadGroupRecord
,
SAMSequenceRecord
,
SAMLineParser
,
SAMFileHeader
}
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
picard.annotation.Gene
import
scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 1/29/16.
*/
class
BaseCounterTest
extends
TestNGSuite
with
Matchers
{
import
BaseCounter._
import
BaseCounterTest._
@Test
def
testCountsClass
()
:
Unit
=
{
val
counts
=
new
Counts
counts
.
antiSenseBases
shouldBe
0
counts
.
antiSenseReads
shouldBe
0
counts
.
senseBases
shouldBe
0
counts
.
senseReads
shouldBe
0
counts
.
totalBases
shouldBe
0
counts
.
totalReads
shouldBe
0
counts
.
antiSenseBases
=
1
counts
.
senseBases
=
2
counts
.
totalBases
shouldBe
3
counts
.
antiSenseReads
=
1
counts
.
senseReads
=
2
counts
.
totalReads
shouldBe
3
}
@Test
def
testBamRecordBasesOverlapBlocks
()
:
Unit
=
{
val
read
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t0\tchrQ\t50\t60\t4M2D4M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
bamRecordBasesOverlap
(
read
,
40
,
70
)
shouldBe
8
bamRecordBasesOverlap
(
read
,
50
,
59
)
shouldBe
8
bamRecordBasesOverlap
(
read
,
50
,
55
)
shouldBe
4
bamRecordBasesOverlap
(
read
,
55
,
60
)
shouldBe
4
bamRecordBasesOverlap
(
read
,
10
,
20
)
shouldBe
0
bamRecordBasesOverlap
(
read
,
40
,
49
)
shouldBe
0
bamRecordBasesOverlap
(
read
,
40
,
50
)
shouldBe
1
bamRecordBasesOverlap
(
read
,
40
,
51
)
shouldBe
2
bamRecordBasesOverlap
(
read
,
58
,
70
)
shouldBe
2
bamRecordBasesOverlap
(
read
,
59
,
70
)
shouldBe
1
bamRecordBasesOverlap
(
read
,
60
,
70
)
shouldBe
0
}
@Test
def
testBamRecordBasesOverlap
()
:
Unit
=
{
val
read
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
bamRecordBasesOverlap
(
read
,
40
,
70
)
shouldBe
10
bamRecordBasesOverlap
(
read
,
50
,
59
)
shouldBe
10
bamRecordBasesOverlap
(
read
,
50
,
55
)
shouldBe
6
bamRecordBasesOverlap
(
read
,
55
,
60
)
shouldBe
5
bamRecordBasesOverlap
(
read
,
10
,
20
)
shouldBe
0
bamRecordBasesOverlap
(
read
,
40
,
49
)
shouldBe
0
bamRecordBasesOverlap
(
read
,
40
,
50
)
shouldBe
1
bamRecordBasesOverlap
(
read
,
40
,
51
)
shouldBe
2
bamRecordBasesOverlap
(
read
,
58
,
70
)
shouldBe
2
bamRecordBasesOverlap
(
read
,
59
,
70
)
shouldBe
1
bamRecordBasesOverlap
(
read
,
60
,
70
)
shouldBe
0
val
counts
=
new
Counts
bamRecordBasesOverlap
(
read
,
40
,
70
,
counts
,
true
)
counts
.
senseBases
shouldBe
10
counts
.
antiSenseBases
shouldBe
0
counts
.
senseReads
shouldBe
1
counts
.
antiSenseReads
shouldBe
0
bamRecordBasesOverlap
(
read
,
50
,
54
,
counts
,
false
)
counts
.
senseBases
shouldBe
10
counts
.
antiSenseBases
shouldBe
5
counts
.
senseReads
shouldBe
1
counts
.
antiSenseReads
shouldBe
1
}
@Test
def
testSamRecordStrand
:
Unit
=
{
val
readPlusUnpaired
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
val
readMinUnpaired
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t16\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
val
readPlusPairedR1
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t73\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
val
readMinPairedR1
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t89\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
val
readPlusPairedR2
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t137\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
val
readMinPairedR2
=
BaseCounterTest
.
lineParser
.
parseLine
(
"r02\t153\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
)
samRecordStrand
(
readPlusUnpaired
,
true
)
shouldBe
false
samRecordStrand
(
readMinUnpaired
,
true
)
shouldBe
true
samRecordStrand
(
readPlusPairedR1
,
true
)
shouldBe
false
samRecordStrand
(
readMinPairedR1
,
true
)
shouldBe
true
samRecordStrand
(
readPlusPairedR2
,
true
)
shouldBe
true
samRecordStrand
(
readMinPairedR2
,
true
)
shouldBe
false
samRecordStrand
(
readPlusUnpaired
,
false
)
shouldBe
true
samRecordStrand
(
readMinUnpaired
,
false
)
shouldBe
false
samRecordStrand
(
readPlusPairedR1
,
false
)
shouldBe
true
samRecordStrand
(
readMinPairedR1
,
false
)
shouldBe
false
samRecordStrand
(
readPlusPairedR2
,
false
)
shouldBe
false
samRecordStrand
(
readMinPairedR2
,
false
)
shouldBe
true
samRecordStrand
(
readPlusUnpaired
,
geneA
)
shouldBe
false
samRecordStrand
(
readMinUnpaired
,
geneA
)
shouldBe
true