Commit b0678894 authored by bow's avatar bow
Browse files

Add reference contig checksums to Gentrap summary

parent af7ae98e
......@@ -16,8 +16,10 @@
package nl.lumc.sasc.biopet.pipelines.gentrap
import java.io.File
import scala.collection.JavaConverters._
import scala.language.reflectiveCalls
import htsjdk.samtools.reference._
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function.QFunction
import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
......@@ -114,6 +116,17 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
dictFile
}
/** Information about reference file */
private lazy val refInfo: Map[String, String] =
new FastaSequenceFile(reference, false)
.getSequenceDictionary.getSequences.asScala
.map { case samrec =>
val md5 = samrec.getAttribute("M5")
val name = samrec.getSequenceName
if (md5 == null) throw new IllegalArgumentException(s"Reference sequence '$name' does not have an MD5 checksum")
md5 -> name
}.toMap
/** Adds output merge jobs for the given expression mode */
// TODO: can we combine the enum with the file extension (to reduce duplication and potential errors)
private def makeMergeTableJob(inFunc: (Sample => Option[File]), ext: String, idCols: List[Int], valCol: Int,
......@@ -291,6 +304,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
"strand_protocol" -> strandProtocol.toString,
"call_variants" -> callVariants,
"remove_ribosomal_reads" -> removeRibosomalReads,
"reference" -> refInfo,
"version" -> FullVersion
)
......@@ -338,6 +352,9 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
def init(): Unit = {
checkDictFile()
// initialize reference info ~ checking if all MD5 checksums are present
refInfo
// TODO: validate that exons are flattened or not (depending on another option flag?)
// validate required annotation files
if (expMeasures.contains(FragmentsPerGene))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment