Commit 1d4a2df0 authored by bow's avatar bow
Browse files

Flatten library docs into its own MongoDB collection

parent 6f0fdc53
......@@ -175,6 +175,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponse.body must not /("annotIds" -> ".+".r)
priorResponse.body must not /("refId" -> ".+".r)
priorResponse.body must not /("sampleIds" -> ".+".r)
priorResponse.body must not /("libIds" -> ".+".r)
}
}
}
......@@ -203,6 +204,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponse.body must not /("annotIds" -> ".+".r)
priorResponse.body must not /("refId" -> ".+".r)
priorResponse.body must not /("sampleIds" -> ".+".r)
priorResponse.body must not /("libIds" -> ".+".r)
}
}
}
......@@ -234,6 +236,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.head.body must not /("annotIds" -> ".+".r)
priorResponses.head.body must not /("refId" -> ".+".r)
priorResponses.head.body must not /("sampleIds" -> ".+".r)
priorResponses.head.body must not /("libIds" -> ".+".r)
}
"return status 201 for the second upload" in {
......@@ -251,6 +254,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.last.body must not /("annotIds" -> ".+".r)
priorResponses.last.body must not /("refId" -> ".+".r)
priorResponses.last.body must not /("sampleIds" -> ".+".r)
priorResponses.last.body must not /("libIds" -> ".+".r)
}
}
}
......@@ -341,6 +345,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.head.body must not /("annotIds" -> ".+".r)
priorResponses.head.body must not /("refId" -> ".+".r)
priorResponses.head.body must not /("sampleIds" -> ".+".r)
priorResponses.head.body must not /("libIds" -> ".+".r)
}
"return status 409 for the second upload" in {
......@@ -385,6 +390,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.head.body must not /("annotIds" -> ".+".r)
priorResponses.head.body must not /("refId" -> ".+".r)
priorResponses.head.body must not /("sampleIds" -> ".+".r)
priorResponses.head.body must not /("libIds" -> ".+".r)
}
"return status 409 for the second upload" in {
......@@ -531,6 +537,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponse.body must /("nLibs" -> 1)
priorResponse.body must /("runId" -> """\S+""".r)
priorResponse.body must not /("sampleIds" -> ".+".r)
priorResponse.body must not /("libIds" -> ".+".r)
// TODO: use raw JSON matchers when we upgrade specs2
priorResponse.jsonBody must beSome.like { case json => (json \ "annotIds") must haveSize(3) }
}
......@@ -706,6 +713,7 @@ class RunsControllerSpec extends SentinelServletSpec {
body must /#(0) */("nSamples" -> 0)
body must /#(0) */("nLibs" -> 0)
body must not /("sampleIds" -> ".+".r)
body must not /("libIds" -> ".+".r)
body must not /# 0 */ "refId"
body must not /# 0 */ "annotIds"
}
......@@ -843,6 +851,7 @@ class RunsControllerSpec extends SentinelServletSpec {
body must /("nLibs" -> 0)
body must /("pipeline" -> "plain")
body must not /("sampleIds" -> ".+".r)
body must not /("libIds" -> ".+".r)
}
}
}
......@@ -891,6 +900,7 @@ class RunsControllerSpec extends SentinelServletSpec {
body must /("runId" -> userRunId)
body must /("uploaderId" -> user.id)
body must not /("sampleIds" -> ".+".r)
body must not /("libIds" -> ".+".r)
body must /("nSamples" -> 0)
body must /("nLibs" -> 0)
body must /("pipeline" -> "plain")
......@@ -960,6 +970,7 @@ class RunsControllerSpec extends SentinelServletSpec {
body must /("nLibs" -> 0)
body must /("pipeline" -> "plain")
body must not /("sampleIds" -> ".+".r)
body must not /("libIds" -> ".+".r)
}
}
}
......@@ -1008,6 +1019,7 @@ class RunsControllerSpec extends SentinelServletSpec {
body must /("runId" -> userRunId)
body must /("uploaderId" -> user.id)
body must not /("sampleIds" -> ".+".r)
body must not /("libIds" -> ".+".r)
body must /("nSamples" -> 0)
body must /("nLibs" -> 0)
body must /("pipeline" -> "plain")
......@@ -1209,6 +1221,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.last.body must /("runId" -> userRunId)
priorResponses.last.body must /("uploaderId" -> user.id)
priorResponses.last.body must not /("sampleIds" -> ".+".r)
priorResponses.last.body must not /("libIds" -> ".+".r)
priorResponses.last.body must /("nSamples" -> 0)
priorResponses.last.body must /("nLibs" -> 0)
priorResponses.last.body must /("pipeline" -> "plain")
......@@ -1279,6 +1292,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.last.body must /("runId" -> userRunId)
priorResponses.last.body must /("uploaderId" -> user.id)
priorResponses.last.body must not /("sampleIds" -> ".+".r)
priorResponses.last.body must not /("libIds" -> ".+".r)
priorResponses.last.body must /("annotIds" -> ".+".r)
priorResponses.last.body must /("refId" -> """\S+""".r)
priorResponses.last.body must /("nSamples" -> 1)
......@@ -1352,6 +1366,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.last.body must /("runId" -> userRunId)
priorResponses.last.body must /("uploaderId" -> user.id)
priorResponses.last.body must not /("sampleIds" -> ".+".r)
priorResponses.last.body must not /("libIds" -> ".+".r)
priorResponses.last.body must /("nSamples" -> 0)
priorResponses.last.body must /("nLibs" -> 0)
priorResponses.last.body must /("pipeline" -> "plain")
......@@ -1422,6 +1437,7 @@ class RunsControllerSpec extends SentinelServletSpec {
priorResponses.last.body must /("runId" -> userRunId)
priorResponses.last.body must /("uploaderId" -> user.id)
priorResponses.last.body must not /("sampleIds" -> ".+".r)
priorResponses.last.body must not /("libIds" -> ".+".r)
priorResponses.last.body must /("annotIds" -> ".+".r)
priorResponses.last.body must /("refId" -> """\S+""".r)
priorResponses.last.body must /("nSamples" -> 1)
......
......@@ -125,7 +125,7 @@ class RunsController(implicit val swagger: Swagger, mongo: MongodbAccessObject)
delete("/?") { halt(400, CommonMessages.UnspecifiedRunId) }
// format: OFF
val runsRunIdGetOperation = (apiOperation[File]("runsRunIdGet")
val runsRunIdGetOperation = (apiOperation[RunRecord]("runsRunIdGet")
summary "Retrieves single run summaries."
notes
"""This endpoint retrieves the a single record of an uploaded summary. Optionally, you can also download the
......
......@@ -25,7 +25,7 @@ import org.scalatra.json.JacksonJsonSupport
import org.scalatra.swagger.{ DataType, Model, SwaggerSupport }
import org.slf4j.LoggerFactory
import nl.lumc.sasc.sentinel.models.ApiMessage
import nl.lumc.sasc.sentinel.models.{ ApiMessage, RunRecord }
import nl.lumc.sasc.sentinel.utils.{ SentinelJsonFormats, separateObjectIds, splitParam }
/** Base servlet for all Sentinel controllers. */
......@@ -69,9 +69,9 @@ abstract class SentinelServlet extends ScalatraServlet
(propName, interceptedProp)
}
val newModel =
if (model.id == "RunDocument")
if (model.id == "RunRecord")
model.copy(properties = interceptedProp.filter {
case (propName, prop) => propName != "sampleIds" || propName != "samples"
case (propName, prop) => !RunRecord.hiddenAttributes.contains(propName)
})
else
model.copy(properties = interceptedProp)
......
......@@ -245,7 +245,7 @@ class StatsController(implicit val swagger: Swagger, mongo: MongodbAccessObject)
}
}
gentrap.getAlignmentAggregateStats(accLevel, libType, runIds, refIds, annotIds) match {
gentrap.getAlignmentAggr(accLevel, libType, runIds, refIds, annotIds) match {
case None => NotFound(CommonMessages.MissingDataPoints)
case Some(res) => Ok(transformMapReduceResult(res))
}
......@@ -322,12 +322,17 @@ class StatsController(implicit val swagger: Swagger, mongo: MongodbAccessObject)
.getOrElse(halt(400, CommonMessages.InvalidLibType))
}
Ok(gentrap.getSeqStats(libType, qcPhase, user, runIds, refIds, annotIds, sorted))
val queryFunc = qcPhase match {
case SeqQcPhase.Raw => gentrap.getSeqStatsRaw
case SeqQcPhase.Processed => gentrap.getSeqStatsProcessed
}
Ok(queryFunc(libType, user, runIds, refIds, annotIds, sorted))
}
// format: OFF
val statsGentrapSequencesAggregateGetOperation =
(apiOperation[SeqStatsAggr]("statsGentrapSequencesAggregationsGet")
(apiOperation[SeqStatsAggr[ReadStatsAggr]]("statsGentrapSequencesAggregationsGet")
summary "Retrieves the aggregate sequencing statistics of Gentrap pipeline runs."
parameters (
queryParam[Seq[String]]("runIds")
......@@ -382,7 +387,12 @@ class StatsController(implicit val swagger: Swagger, mongo: MongodbAccessObject)
.getOrElse(halt(400, CommonMessages.InvalidLibType))
}
gentrap.getSeqAggregateStats(libType, qcPhase, runIds, refIds, annotIds) match {
val queryFunc = qcPhase match {
case SeqQcPhase.Raw => gentrap.getSeqStatsAggrRaw
case SeqQcPhase.Processed => gentrap.getSeqStatsAggrProcessed
}
queryFunc(libType, runIds, refIds, annotIds) match {
case None => NotFound(CommonMessages.MissingDataPoints)
case Some(res) => Ok(transformMapReduceResult(res))
}
......
......@@ -168,7 +168,7 @@ trait RunsAdapter extends MongodbConnector {
*
* When a run record is deleted, the following happens:
* - The underlying run summary file is removed from the database.
* - All sample documents created from the run summary file is removed from the database.
* - All sample and library documents created from the run summary file is removed from the database.
* - The run record itself is *not* removed from the database, but it is marked with a `deletionTimeUtc` attribute
* to mark when the delete request was made.
*
......@@ -209,12 +209,17 @@ trait RunsAdapter extends MongodbConnector {
.map { case dbo => (grater[RunRecord].asObject(dbo), true) }
docToDelete.foreach {
case (doc, _) =>
val collSamples = mongo.db(collectionNames.pipelineSamples(doc.pipeline))
val samplesColl = mongo.db(collectionNames.pipelineSamples(doc.pipeline))
val libsColl = mongo.db(collectionNames.pipelineLibs(doc.pipeline))
// remove the GridFS entry
mongo.gridfs.remove(doc.runId)
// and all samples linked to this run
doc.sampleIds.foreach {
case oid => collSamples.remove(MongoDBObject("_id" -> oid))
case oid => samplesColl.remove(MongoDBObject("_id" -> oid))
}
// and all libs linked to this run
doc.libIds.foreach {
case oid => libsColl.remove(MongoDBObject("_id" -> oid))
}
}
docToDelete
......
......@@ -20,20 +20,24 @@ import com.mongodb.casbah.BulkWriteResult
import com.novus.salat._
import com.novus.salat.global._
import nl.lumc.sasc.sentinel.models.BaseSampleDocument
import nl.lumc.sasc.sentinel.models.{ BaseLibDocument, BaseSampleDocument }
/**
* Trait for storing samples from run summaries.
* Trait for storing samples and libraries from run summaries.
*
* @tparam T Subclass of [[nl.lumc.sasc.sentinel.models.BaseSampleDocument]] representing a sample run by a pipeline.
* @tparam S Subclass of [[nl.lumc.sasc.sentinel.models.BaseSampleDocument]] representing a sample run by a pipeline.
* @tparam L Subclass of [[nl.lumc.sasc.sentinel.models.BaseLibDocument]] representing a library run by a pipeline.
*/
trait SamplesAdapter[T <: BaseSampleDocument] extends MongodbConnector { this: RunsAdapter =>
trait UnitsAdapter[S <: BaseSampleDocument, L <: BaseLibDocument] extends MongodbConnector { this: RunsAdapter =>
/** Name of the pipeline that produces the run summary file. */
def pipelineName: String
/** Collection used by this adapter. */
private lazy val coll = mongo.db(collectionNames.pipelineSamples(pipelineName))
/** Collection for the samples. */
private lazy val samplesColl = mongo.db(collectionNames.pipelineSamples(pipelineName))
/** Collection for the libraries. */
private lazy val libsColl = mongo.db(collectionNames.pipelineLibs(pipelineName))
/**
* Stores the given sequence of samples into the sample collection.
......@@ -41,10 +45,24 @@ trait SamplesAdapter[T <: BaseSampleDocument] extends MongodbConnector { this: R
* @param samples Samples to store.
* @return Bulk write operation result.
*/
def storeSamples(samples: Seq[T])(implicit m: Manifest[T]): BulkWriteResult = {
def storeSamples(samples: Seq[S])(implicit m: Manifest[S]): BulkWriteResult = {
// TODO: refactor to use Futures instead
val builder = samplesColl.initializeUnorderedBulkOperation
val docs = samples.map { case sample => grater[S].asDBObject(sample) }
docs.foreach { case doc => builder.insert(doc) }
builder.execute()
}
/**
* Stores the given sequence of libraries into the sample collection.
*
* @param libs Libraries to store.
* @return Bulk write operation result.
*/
def storeLibs(libs: Seq[L])(implicit m: Manifest[L]): BulkWriteResult = {
// TODO: refactor to use Futures instead
val builder = coll.initializeUnorderedBulkOperation
val docs = samples.map { case sample => grater[T].asDBObject(sample) }
val builder = libsColl.initializeUnorderedBulkOperation
val docs = libs.map { case lib => grater[L].asDBObject(lib) }
docs.foreach { case doc => builder.insert(doc) }
builder.execute()
}
......
......@@ -79,10 +79,18 @@ package object db {
/**
* Retrieves the sample collection name for the given pipeline.
*
* @param name pipeline name.
* @return collection name for the samples parsed from the pipeline's run summary file.
* @param name Pipeline name.
* @return Collection name for the samples parsed from the pipeline's run summary file.
*/
def pipelineSamples(name: String) = s"$name.samples" // TODO: use enums instead
/**
* Retrieves the library collection name for the given pipeline.
*
* @param name Pipeline name.
* @return Collection name for the libraries parsed from the pipeline's run summary file.
*/
def pipelineLibs(name: String) = s"$name.libs"
}
}
}
/*
* Copyright (c) 2015 Leiden University Medical Center and contributors
* (see AUTHORS.md file for details).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.lumc.sasc.sentinel.models
import com.novus.salat.annotations.Salat
/** Representation of a library within a sample. */
@Salat abstract class BaseLibDocument {
/** Name of the run that produced this library. */
def runName: Option[String]
/** Name of the sample which this library belongs to. */
def sampleName: Option[String]
/** Library name. */
def libName: Option[String]
}
......@@ -66,5 +66,11 @@ case class RunRecord(
runName: Option[String] = None,
deletionTimeUtc: Option[Date] = None,
sampleIds: Seq[ObjectId] = Seq(),
libIds: Seq[ObjectId] = Seq(),
refId: Option[ObjectId] = None,
annotIds: Option[Seq[ObjectId]] = None) extends BaseRunRecord
object RunRecord {
/** Attributes that is hidden when this object is serialized into JSON. */
val hiddenAttributes = Set("sampleIds", "libIds")
}
......@@ -18,27 +18,44 @@ package nl.lumc.sasc.sentinel.models
import java.util.Date
import com.novus.salat.annotations.Salat
import com.novus.salat.annotations.{ Persist, Salat }
import org.bson.types.ObjectId
/** Representation of a sample within a run. */
@Salat abstract class BaseSampleDocument {
/** Representation of a sequencing accumulation level unit. */
@Salat abstract class BaseUnitDocument {
/** Internal database ID for the library document. */
def id: ObjectId
/** Name of the run summary file uploader. */
/** Name of the uploader of the run summary which contains this library. */
def uploaderId: String
/** Name of the run which this sample belongs to. */
/** Database sample ID. */
def runId: ObjectId
/** Name of the run that produced this library. */
def runName: Option[String]
/** UTC time when the sample document was created. */
def creationTimeUtc: Date
}
/** Representation of a sample within a run. */
@Salat abstract class BaseSampleDocument extends BaseUnitDocument {
/** Sample name. */
def sampleName: Option[String]
}
/** Database sample ID. */
def runId: ObjectId
/** Representation of a library within a sample. */
@Salat abstract class BaseLibDocument extends BaseUnitDocument {
/** Libraries belonging to this sample. */
def libs: Seq[BaseLibDocument]
/** Name of the sample which this library belongs to. */
def sampleName: Option[String]
/** UTC time when the sample document was created. */
def creationTimeUtc: Date
/** Library name. */
def libName: Option[String]
/** Short hand attribute that returns true if the library was create from a paired-end sequence. */
@Persist def isPaired: Boolean
}
......@@ -16,23 +16,38 @@
*/
package nl.lumc.sasc.sentinel.models
import com.novus.salat.annotations.Persist
import com.novus.salat.annotations.{ Persist, Salat }
import nl.lumc.sasc.sentinel.utils.pctOf
/**
* Sequencing input statistics.
* Trait for sequence statistics container.
*
* @param read1 Statistics of the first read (if paired-end) or the only read (if single-end).
* @param read2 Statistics of the second read. Only defined for paired-end inputs.
* @param labels Data point labels.
* @tparam T Container for read-level statistics.
*/
case class SeqStats(read1: ReadStats, read2: Option[ReadStats] = None, labels: Option[DataPointLabels] = None) {
@Salat trait SeqStatsLike[T] {
/** Statistics of the first read. */
def read1: T
/** Statistics of the second read. */
def read2: Option[T]
/** Combined statistics of the first and second read. */
def readAll: Option[_]
/** Data points labels. */
def labels: Option[DataPointLabels]
}
/** Sequencing input statistics.*/
case class SeqStats(read1: ReadStats, read2: Option[ReadStats] = None, labels: Option[DataPointLabels] = None)
extends SeqStatsLike[ReadStats] {
/** Combined counts for both read1 and read2 (if present). */
@Persist lazy val readAll: ReadStats = read2 match {
@Persist lazy val readAll: Option[ReadStats] = read2 match {
case Some(r2) =>
ReadStats(
Option(ReadStats(
nBases = read1.nBases + r2.nBases,
nBasesA = read1.nBasesA + r2.nBasesA,
nBasesT = read1.nBasesT + r2.nBasesT,
......@@ -41,8 +56,8 @@ case class SeqStats(read1: ReadStats, read2: Option[ReadStats] = None, labels: O
nBasesN = read1.nBasesN + r2.nBasesN,
nReads = read1.nReads, // nReads for each pair is equal
nBasesByQual = Seq.empty[Long],
medianQualByPosition = Seq.empty[Double])
case otherwise => read1
medianQualByPosition = Seq.empty[Double]))
case otherwise => Option(read1)
}
}
......@@ -55,8 +70,7 @@ case class SeqStats(read1: ReadStats, read2: Option[ReadStats] = None, labels: O
* @param readAll Aggregated statistics of both reads. Only defined if there is at least a paired-end data point
* in aggregation.
*/
case class SeqStatsAggr(read1: ReadStatsAggr, read2: Option[ReadStatsAggr] = None,
readAll: Option[ReadStatsAggr] = None)
case class SeqStatsAggr[T <: AnyRef](read1: T, read2: Option[T] = None, readAll: Option[T] = None)
/**
* Statistics of a single read file.
......
/*
* Copyright (c) 2015 Leiden University Medical Center and contributors
* (see AUTHORS.md file for details).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.lumc.sasc.sentinel.processors
import scala.collection.mutable.ListBuffer
import scala.util.Random.shuffle
import com.novus.salat._
import com.novus.salat.global._
import com.mongodb.casbah.Imports._
import nl.lumc.sasc.sentinel.{ AccLevel, LibType, SeqQcPhase }
import nl.lumc.sasc.sentinel.db.MongodbConnector
import nl.lumc.sasc.sentinel.models.{ SeqStatsAggr, User }
/**
* Base trait that provides support for querying and aggregating metrics for a pipeline.
*/
trait OutputProcessor extends MongodbConnector {
// TODO: refactor functions in here ~ we can do with less duplication
/** Name of the pipeline that produces the metrics to query. */
def pipelineName: String
/** Name of the unit attribute that denotes whether it comes from a paired-end library or not. */
protected implicit val pairAttrib = "isPaired"
/** MongoDB samples collection name of the pipeline. */
protected lazy val samplesColl = mongo.db(collectionNames.pipelineSamples(pipelineName))
/** MongoDB libraries collection name of the pipeline. */
protected lazy val libsColl = mongo.db(collectionNames.pipelineLibs(pipelineName))
/**
* Match operation builder for collection aggregations.
*
* @param runs Run IDs to filter in. If empty, no run ID filtering is done.
* @param references Reference IDs to filter in. If empty, no reference ID filtering is done.
* @param annotations Annotation IDs to filter in. If empty, no annotation ID filtering is done.
* @param paired If defined, a boolean denoting whether the unit is paired-end or not. If not defined, both paired-end
* and single-end are included in the match.
* @param withKey Whether to return only the `query` object with the `$match` key or not.
* @return a [[DBObject]] representing the `$match` aggregation operation.
*/
private[processors] def buildMatchOp(runs: Seq[ObjectId], references: Seq[ObjectId], annotations: Seq[ObjectId],
paired: Option[Boolean] = None, withKey: Boolean = true): DBObject = {
val matchBuffer = new ListBuffer[MongoDBObject]()
if (runs.nonEmpty)
matchBuffer += MongoDBObject("runId" -> MongoDBObject("$in" -> runs))
if (references.nonEmpty)
matchBuffer += MongoDBObject("referenceId" -> MongoDBObject("$in" -> references))
if (annotations.nonEmpty)
matchBuffer += MongoDBObject("annotationIds" ->
MongoDBObject("$elemMatch" -> MongoDBObject("$in" -> annotations)))
paired match {
case Some(isPaired) => matchBuffer += MongoDBObject(pairAttrib -> isPaired)
case None => ;
}
val query =
if (matchBuffer.nonEmpty) MongoDBObject("$and" -> matchBuffer.toSeq)
else MongoDBObject.empty
if (withKey) MongoDBObject("$match" -> query)
else query
}
/** Sort operation for unit documents */
private[processors] val opSortUnit = MongoDBObject("$sort" -> MongoDBObject("creationTimeUtc" -> -1))
/** Raw string of the map function for mapReduce. */
private[processors] def mapFunc(metricName: String,
libType: Option[LibType.Value])(implicit pairAttrib: String): JSFunction = {
val isPaired = libType match {
case None => "undefined" // don't check for pairs
case Some(LibType.Paired) => "true" // check for isPaired === true
case Some(LibType.Single) => "false" // check for isPaired === false
case otherwise => throw new NotImplementedError
}