Commit 7d6e11c2 authored by bow's avatar bow
Browse files

Update gentrap v0.4 schema and parsing: reference section

parent 17e3c5c7
......@@ -118,6 +118,16 @@
"type": "object",
"minItems": 1,
"additionalProperties": { "$ref": "#/definitions/referenceContig" }
},
"species": {
"description": "Reference sequence species name",
"type": "string"
},
"name": {
"description": "Build name of the reference sequence. For example, hg38 for the human genome.",
"type": "string"
}
}
}
......@@ -515,7 +525,7 @@
"referenceContig": {
"description": "Entry for a single contig in a reference sequence",
"type": "object",
"required": [ "md5", "size" ],
"required": [ "md5", "length" ],
"properties": {
......@@ -524,8 +534,8 @@
"type": "string"
},
"size": {
"description": "Size of the contig in base pairs",
"length": {
"description": "Length of the contig in base pairs",
"type": "integer"
}
}
......
......@@ -25,14 +25,23 @@ import org.bson.types.ObjectId
* Representation of an alignment reference sequence.
*
* @param refId Database IDs.
* @param contigMd5s MD5 checksums of all contigs / chromosomes in this reference sequence.
* @param contigs Record of all contigs / chromosomes in this reference sequence.
* @param combinedMd5 MD5 checksum of the concatenated string of all contig MD5 checksums, sorted alphabetically.
* @param name Reference sequence name.
* @param creationTimeUtc UTC time when the reference record was created.
*/
case class ReferenceRecord(
@Key("_id") refId: ObjectId,
contigMd5s: Seq[String],
contigs: Seq[ReferenceContigRecord],
combinedMd5: String,
name: Option[String] = None,
species: Option[String] = None,
creationTimeUtc: Option[Date] = None)
/**
* Representation of a reference sequence contig / chromosome.
*
* @param md5 MD5 checksum of the sequence.
* @param length Length of the sequence.
*/
case class ReferenceContigRecord(md5: String, length: Long)
......@@ -49,15 +49,16 @@ class GentrapV04InputProcessor(protected val mongo: MongodbAccessObject)
/** Extracts a reference record from a Gentrap summary. */
private[processors] def extractReference(runJson: JValue): ReferenceRecord = {
val contigMd5s = (runJson \ "gentrap" \ "settings" \ "reference" \\ "md5")
.children
.map(_.extract[String])
.sorted
val combinedMd5 = calcMd5(contigMd5s)
val refJson = runJson \ "gentrap" \ "settings" \ "reference"
val contigs = (refJson \ "contigs")
.extract[Map[String, ReferenceContigRecord]]
.values.toSeq
ReferenceRecord(
refId = new ObjectId,
combinedMd5 = combinedMd5,
contigMd5s = contigMd5s,
combinedMd5 = calcMd5(contigs.map(_.md5).sorted),
contigs = contigs,
species = (refJson \ "species").extractOpt[String],
name = (refJson \ "name").extractOpt[String],
creationTimeUtc = Option(getUtcTimeNow))
}
......
......@@ -22,5 +22,6 @@ import org.json4s.jackson.JsonMethods.parse
import nl.lumc.sasc.sentinel.utils.getResourceStream
trait JsonLoader {
/** Given a schema URL, parses the file as JSON and returns its contents as a JValue object. */
def loadJson(url: String): JValue = parse(getResourceStream(url))
}
......@@ -16,11 +16,10 @@
*/
package nl.lumc.sasc.sentinel.processors.gentrap
import org.specs2.mutable.Specification
import org.specs2.mock.Mockito
import nl.lumc.sasc.sentinel.JsonLoader
import nl.lumc.sasc.sentinel.db.MongodbAccessObject
import org.specs2.mock.Mockito
import org.specs2.mutable.Specification
class GentrapValidationSpec extends Specification with JsonLoader with Mockito {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment