diff --git a/docs/config.md b/docs/config.md index c10419859bed6d3f510c80afe0b27fea913d774f..de3342b195b1dc7acb338729792d9dabc59c5228 100644 --- a/docs/config.md +++ b/docs/config.md @@ -72,16 +72,16 @@ Global setting examples are: #### Example settings config ~~~ { - "reference": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/ucsc.hg19_nohap.fasta", - "dbsnp": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf", + "reference": "/references/hg19_nohap/ucsc.hg19_nohap.fasta", + "dbsnp": "/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf", "joint_variantcalling": false, "haplotypecaller": { "scattercount": 100 }, "multisample": { "haplotypecaller": { "scattercount": 1000 } }, "picard": { "validationstringency": "LENIENT" }, "library_variantcalling_temp": true, - "target_bed_temp": "/data/LGTC/projects/vandoorn-melanoma/analysis/target.bed", + "target_bed_temp": "analysis/target.bed", "min_dp": 5, - "bedtools": {"exe":"/share/isilon/system/local/BEDtools/bedtools-2.17.0/bin/bedtools"}, + "bedtools": {"exe":"/BEDtools/bedtools-2.17.0/bin/bedtools"}, "bam_to_fastq": true, "baserecalibrator": { "memory_limit": 8, "vmem":"16G" }, "samtofastq": {"memory_limit": 8, "vmem": "16G"}, @@ -95,4 +95,4 @@ Global setting examples are: ### JSON validation To check if the JSON file created is correct we can use multiple options the simplest way is using [this](http://jsonformatter.curiousconcept.com/) -website. It is also possible to use Python or Scala for validating but this requires some more knowledge. \ No newline at end of file +website. It is also possible to use Python or Scala for validating but this requires some more knowledge. diff --git a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index f71dfefb75ea9cd38ed2415161d1e794ec9ae755..5088e44c2eca6f54b23104c7695c3a03d5f762c8 100644 --- a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -33,7 +33,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { + class Library(libId: String) extends AbstractLibrary(libId) { protected def addJobs(): Unit = {} } diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala index 147398ac798c077da0722b2078f7ea21c666b7d1..3d1b5585028f69c01934f0ddfc0d54c6a9c25169 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala @@ -13,13 +13,13 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction { override def subPath = "gatk" :: super.subPath - jarFile = config("gatk_jar", required = true) + jarFile = config("gatk_jar") override val defaultVmem = "7G" if (config.contains("intervals")) intervals = config("intervals").asFileList if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList - reference_sequence = config("reference", required = true) + reference_sequence = config("reference") if (config.contains("gatk_key")) gatk_key = config("gatk_key") if (config.contains("pedigree")) pedigree = config("pedigree").asFileList } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index 0c88447906c21a55a1e398a3c8c4786f18bebbf1..be4566211351a3d077877d1a99cf8be6ba0f58cd 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -35,17 +35,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri var jointGenotyping: Boolean = config("joint_genotyping", default = false) var singleSampleCalling = config("single_sample_calling", default = true) - var reference: File = config("reference", required = true) + var reference: File = config("reference") var useAllelesOption: Boolean = config("use_alleles_option", default = false) val externalGvcfs = config("external_gvcfs_files", default = Nil).asFileList def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { + class Library(libId: String) extends AbstractLibrary(libId) { val mapping = new Mapping(qscript) mapping.sampleId = sampleId - mapping.libraryId = libraryId + mapping.libId = libId mapping.outputDir = libDir + "/variantcalling/" /** Library variantcalling */ @@ -66,8 +66,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile) if (config("bam_to_fastq", default = false).asBoolean) { - val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libraryId + ".R1.fastq", - libDir + sampleId + "-" + libraryId + ".R2.fastq") + val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libId + ".R1.fastq", + libDir + sampleId + "-" + libId + ".R2.fastq") samToFastq.isIntermediate = true qscript.add(samToFastq) mapping.input_R1 = samToFastq.fastqR1 @@ -82,17 +82,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri val header = inputSam.getFileHeader.getReadGroups for (readGroup <- inputSam.getFileHeader.getReadGroups) { if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libraryId) logger.warn("Library ID readgroup in bam file is not the same") - if (readGroup.getSample != sampleId || readGroup.getLibrary != libraryId) readGroupOke = false + if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") + if (readGroup.getSample != sampleId || readGroup.getLibrary != libId) readGroupOke = false } inputSam.close if (!readGroupOke) { if (config("correct_readgroups", default = false)) { logger.info("Correcting readgroups, file:" + bamFile) - val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libraryId + ".bam")) - aorrg.RGID = sampleId + "-" + libraryId - aorrg.RGLB = libraryId + val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libId + ".bam")) + aorrg.RGID = sampleId + "-" + libId + aorrg.RGLB = libId aorrg.RGSM = sampleId aorrg.isIntermediate = true qscript.add(aorrg) @@ -105,7 +105,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri Some(bamFile) } } else { - logger.error("Sample: " + sampleId + ": No R1 found for run: " + libraryId) + logger.error("Sample: " + sampleId + ": No R1 found for run: " + libId) None } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index 8bac4aaf68c33a245da877d460bc26abb9ebe564..47715e26adb11f0ed73520d2769ca55120a58d93 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -30,7 +30,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr var rawVcfInput: File = _ @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Argument(doc = "OutputName", required = false) var outputName: String = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index b6aba52ca8f6119f3bdbaea9fe0797f61ff6d0f5..2e391520b498205c3230653d4fdbca904ef2217e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -78,7 +78,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab /** * Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report */ - protected def checkExecutable { + protected[core] def checkExecutable { if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) { try if (executable != null) { if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) { @@ -91,8 +91,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab val file = new File(executable) executable = file.getCanonicalPath } else { - logger.error("executable: '" + executable + "' not found, please check config") - throw new QException("executable: '" + executable + "' not found, please check config") + BiopetQScript.addError("executable: '" + executable + "' not found, please check config") } BiopetCommandLineFunctionTrait.executableCache += oldExecutable -> executable BiopetCommandLineFunctionTrait.executableCache += executable -> executable @@ -113,9 +112,8 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe) } } - val md5 = BiopetCommandLineFunctionTrait.executableMd5Cache(executable) - if (md5 == null) addJobReportBinding("md5sum_exe", md5) - else addJobReportBinding("md5sum_exe", "None") + val md5 = BiopetCommandLineFunctionTrait.executableMd5Cache.get(executable) + addJobReportBinding("md5sum_exe", md5.getOrElse("None")) } /** @@ -145,6 +143,8 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab /** Executes the version command */ private def getVersionInternal: String = { if (versionCommand == null || versionRegex == null) return "N/A" + val exe = new File(versionCommand.trim.split(" ")(0)) + if (!exe.exists()) return "N/A" val stdout = new StringBuffer() val stderr = new StringBuffer() def outputLog = "Version command: \n" + versionCommand + @@ -206,4 +206,4 @@ object BiopetCommandLineFunctionTrait { private val versionCache: Map[String, String] = Map() private val executableMd5Cache: Map[String, String] = Map() private val executableCache: Map[String, String] = Map() -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 11f40ddf484eaacac7be762d0dae11771494e1a3..3ca783580b5f45475d79271b8f14d33b3248f3d4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -23,6 +23,7 @@ import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.function.QFunction import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } +import scala.collection.mutable.ListBuffer /** * Base for biopet pipeline @@ -33,8 +34,8 @@ trait BiopetQScript extends Configurable with GatkLogging { val configfiles: List[File] = Nil var outputDir: String = { - val temp = Config.getValueFromMap(Config.global.map, ConfigValueIndex(this.configName, configPath, "output_dir")) - if (temp.isEmpty) throw new IllegalArgumentException("No output_dir defined in config") + val temp = Config.getValueFromMap(globalConfig.map, ConfigValueIndex(this.configName, configPath, "output_dir")) + if (temp.isEmpty) "" else { val t = temp.get.value.toString if (!t.endsWith("/")) t + "/" else t @@ -62,8 +63,9 @@ trait BiopetQScript extends Configurable with GatkLogging { * Script from queue itself, final to force some checks for each pipeline and write report */ final def script() { - outputDir = config("output_dir", required = true) - if (!outputDir.endsWith("/")) outputDir += "/" + outputDir = config("output_dir") + if (outputDir.isEmpty) outputDir = new File(".").getAbsolutePath() + else if (!outputDir.endsWith("/")) outputDir += "/" init biopetScript @@ -72,11 +74,17 @@ trait BiopetQScript extends Configurable with GatkLogging { case _ => } for (function <- functions) function match { - case f: BiopetCommandLineFunctionTrait => f.afterGraph - case _ => + case f: BiopetCommandLineFunctionTrait => { + f.checkExecutable + f.afterGraph + } + case _ => } - Config.global.writeReport(qSettings.runName, outputDir + ".log/" + qSettings.runName) + if (new File(outputDir).canWrite) globalConfig.writeReport(qSettings.runName, outputDir + ".log/" + qSettings.runName) + else BiopetQScript.addError("Output dir: '" + outputDir + "' is not writeable") + + BiopetQScript.checkErrors } /** Get implemented from org.broadinstitute.gatk.queue.QScript */ @@ -92,3 +100,28 @@ trait BiopetQScript extends Configurable with GatkLogging { add(function) } } + +object BiopetQScript extends Logging { + private val errors: ListBuffer[Exception] = ListBuffer() + + def addError(error: String, debug: String = null): Unit = { + val msg = error + (if (debug != null && logger.isDebugEnabled) "; " + debug else "") + errors.append(new Exception(msg)) + } + + protected def checkErrors: Unit = { + if (!errors.isEmpty) { + logger.error("*************************") + logger.error("Biopet found some errors:") + if (logger.isDebugEnabled) { + for (e <- errors) { + logger.error(e.getMessage) + logger.debug(e.getStackTrace.mkString("Stack trace:\n", "\n", "\n")) + } + } else { + errors.map(_.getMessage).sorted.distinct.foreach(logger.error(_)) + } + throw new IllegalStateException("Biopet found errors") + } + } +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 0217615d1bedf61bdac37f878f7f1fcdf698b0ec..4184f29afacc7f6afcf6dc9d0eb45f9eddfc1993 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -28,7 +28,7 @@ trait MultiSampleQScript extends BiopetQScript { @Argument(doc = "Only Sample", shortName = "sample", required = false) private val onlySamples: List[String] = Nil - require(Config.global.map.contains("samples"), "No Samples found in config") + require(globalConfig.map.contains("samples"), "No Samples found in config") /** * Sample class with basic functions build in @@ -40,26 +40,26 @@ trait MultiSampleQScript extends BiopetQScript { /** * Library class with basic functions build in - * @param libraryId + * @param libId */ - abstract class AbstractLibrary(val libraryId: String) { + abstract class AbstractLibrary(val libId: String) { /** Overrules config of qscript with default sample and default library */ - val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libraryId) + val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId) /** Adds the library jobs */ final def addAndTrackJobs(): Unit = { currentSample = Some(sampleId) - currentLib = Some(libraryId) + currentLib = Some(libId) addJobs() currentLib = None currentSample = None } /** Creates a library file with given suffix */ - def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libraryId + suffix) + def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix) /** Returns library directory */ - def libDir = sampleDir + "lib_" + libraryId + File.separator + def libDir = sampleDir + "lib_" + libId + File.separator /** Function that add library jobs */ protected def addJobs() @@ -80,7 +80,7 @@ trait MultiSampleQScript extends BiopetQScript { /** returns a set with library names */ protected def libIds: Set[String] = { - ConfigUtils.getMapFromPath(Config.global.map, List("samples", sampleId, "libraries")).getOrElse(Map()).keySet + ConfigUtils.getMapFromPath(globalConfig.map, List("samples", sampleId, "libraries")).getOrElse(Map()).keySet } /** Adds sample jobs */ @@ -95,7 +95,7 @@ trait MultiSampleQScript extends BiopetQScript { /** function add all libraries in one call */ protected final def addPerLibJobs(): Unit = { - for ((libraryId, library) <- libraries) { + for ((libId, library) <- libraries) { library.addAndTrackJobs() } } @@ -125,7 +125,7 @@ trait MultiSampleQScript extends BiopetQScript { val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap /** Returns a list of all sampleIDs */ - protected def sampleIds: Set[String] = ConfigUtils.any2map(Config.global.map("samples")).keySet + protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet /** Runs addAndTrackJobs method for each sample */ final def addSamplesJobs() { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala index 4ea65cb5a7fc7ceb1226853d47687a0865c818d0..e25e8ef508188adbd5f767b20ca1f688c4eed922 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala @@ -133,11 +133,11 @@ class Config(var map: Map[String, Any]) extends Logging { */ protected[config] def apply(module: String, path: List[String], key: String, default: Any = null, freeVar: Boolean = true): ConfigValue = { val requestedIndex = ConfigValueIndex(module, path, key, freeVar) - if (contains(requestedIndex)) return foundCache(requestedIndex) + if (contains(requestedIndex)) foundCache(requestedIndex) else if (default != null) { defaultCache += (requestedIndex -> ConfigValue(requestedIndex, null, default, freeVar)) - return defaultCache(requestedIndex) - } else throw new IllegalStateException("Value in config could not be found but it seems required, index: " + requestedIndex) + defaultCache(requestedIndex) + } else ConfigValue(requestedIndex, null, null, freeVar) } def writeReport(id: String, directory: String): Unit = { @@ -174,7 +174,7 @@ class Config(var map: Map[String, Any]) extends Logging { val fullEffective = ConfigUtils.mergeMaps(effectiveFound, effectiveDefaultFound) val fullEffectiveWithNotFound = ConfigUtils.mergeMaps(fullEffective, notFound) - writeMapToJsonFile(Config.global.map, "input") + writeMapToJsonFile(this.map, "input") writeMapToJsonFile(found, "found") writeMapToJsonFile(effectiveFound, "effective.found") writeMapToJsonFile(effectiveDefaultFound, "effective.defaults") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala index 54e6dfd170a8cb2e68f5c63dd8ca2675513cee7d..51997ccb8a12fb5c3a234e5e7d95c001dd704b4f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala @@ -21,6 +21,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions trait Configurable extends ImplicitConversions { /** Should be object of parant object */ val root: Configurable + val globalConfig: Config = if (root != null) root.globalConfig else Config.global /** subfix to the path */ def subPath: List[String] = Nil @@ -79,7 +80,6 @@ trait Configurable extends ImplicitConversions { * @param key Name of value * @param default Default value if not found * @param submodule Adds to the path - * @param required Default false, if true and value is not found this function will raise an exception * @param freeVar Default true, if set false value must exist in module * @param sample Default null, when set path is prefixed with "samples" -> "sampleID" * @param library Default null, when set path is prefixed with "libraries" -> "libraryID" @@ -88,7 +88,6 @@ trait Configurable extends ImplicitConversions { def apply(key: String, default: Any = null, submodule: String = null, - required: Boolean = false, freeVar: Boolean = true, sample: String = null, library: String = null): ConfigValue = { @@ -100,14 +99,8 @@ trait Configurable extends ImplicitConversions { val value = Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) if (value.isDefined) value.get.value else default } - if (!contains(key, submodule, freeVar, sample = s, library = l) && d == null) { - if (required) { - Logging.logger.error("Value in config could not be found but it is required, key: " + key + " module: " + m + " path: " + p) - throw new IllegalStateException("Value in config could not be found but it is required, key: " + key + " module: " + m + " path: " + p) - } else return null - } - if (d == null) return Config.global(m, p, key, freeVar = freeVar) - else return Config.global(m, p, key, d, freeVar) + if (d == null) globalConfig(m, p, key, freeVar = freeVar) + else globalConfig(m, p, key, d, freeVar) } /** @@ -129,7 +122,7 @@ trait Configurable extends ImplicitConversions { val m = if (submodule != null) submodule else configName val p = path(s, l, submodule) - Config.global.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None) + globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None) } } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala index 6e1a86cd7f3aaa3d56d10263de84f4bca41a0c32..3aaab82b6bad27c00a94f572eaf024c61913ceb6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala @@ -83,19 +83,15 @@ object BiopetQCommandLine extends GatkLogging { Runtime.getRuntime.addShutdownHook(shutdownHook) + CommandLineProgram.start(qCommandLine, argv) try { - CommandLineProgram.start(qCommandLine, argv) - try { - Runtime.getRuntime.removeShutdownHook(shutdownHook) - qCommandLine.shutdown() - } catch { - case e: Exception => /* ignore, example 'java.lang.IllegalStateException: Shutdown in progress' */ - } - if (CommandLineProgram.result != 0) - System.exit(CommandLineProgram.result) + Runtime.getRuntime.removeShutdownHook(shutdownHook) + qCommandLine.shutdown() } catch { - case e: Exception => CommandLineProgram.exitSystemWithError(e) + case e: Exception => /* ignore, example 'java.lang.IllegalStateException: Shutdown in progress' */ } + if (CommandLineProgram.result != 0) + System.exit(CommandLineProgram.result) } } @@ -116,21 +112,28 @@ class BiopetQCommandLine extends CommandLineProgram with Logging { private var qScriptClasses: File = _ private var shuttingDown = false + /** + * we modified this in Biopet to skip compiling and show full stacktrace again + */ private lazy val qScriptPluginManager = { qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) - //qScriptManager.loadScripts(scripts, qScriptClasses) - //var temp: Seq[URL] = Seq() for (t <- scripts) { val is = getClass.getResourceAsStream(t.getAbsolutePath) val os = new FileOutputStream(qScriptClasses.getAbsolutePath + "/" + t.getName) org.apache.commons.io.IOUtils.copy(is, os) os.close() - //temp :+= this.getClass.getResource(t.toString) - //logger.info(this.getClass.getResource(t.toString)) val s = if (t.getName.endsWith("/")) t.getName.substring(0, t.getName.length - 1) else t.getName pipelineName = s.substring(0, s.lastIndexOf(".")) + "." + System.currentTimeMillis } - new PluginManager[QScript](qPluginType, List(qScriptClasses.toURI.toURL)) + + // override createByType to pass the correct exceptions + new PluginManager[QScript](qPluginType, List(qScriptClasses.toURI.toURL)) { + override def createByType(plugintype: Class[_ <: QScript]) = { + val noArgsConstructor = plugintype.getDeclaredConstructor() + noArgsConstructor.setAccessible(true) + noArgsConstructor.newInstance() + } + } } private lazy val qCommandPlugin = { @@ -188,12 +191,7 @@ class BiopetQCommandLine extends CommandLineProgram with Logging { //if (settings.run) script.pullInputs() script.qSettings = settings.qSettings - try { - script.script() - } catch { - case e: Exception => - throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e) - } + script.script() if (remoteFileConverter != null) { if (remoteFileConverter.convertToRemoteEnabled) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala index e192a845be6db96d2a754d2d2ff6a5e393d136ca..9ac74448e880d7be01063a3f12bdd613141b34a4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala @@ -29,7 +29,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction { var R2: File = _ @Input(doc = "The reference file for the bam files.", shortName = "R") - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Output(doc = "Output file SAM", shortName = "output") var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala index 6c36a4f0f4bb5bb24dc4fca1fbca78ad35936356..7bda8792544eea2d863ab06192b51b55ae900077 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala @@ -25,16 +25,16 @@ import nl.lumc.sasc.biopet.core.config.Configurable class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Contaminants", required = false) - var contaminants: File = _ + var contaminants: Option[File] = None @Input(doc = "Adapters", required = false) - var adapters: File = _ + var adapters: Option[File] = None @Input(doc = "Fastq file", shortName = "FQ") - var fastqfile: File = _ + var fastqfile: File = null @Output(doc = "Output", shortName = "out") - var output: File = _ + var output: File = null executable = config("exe", default = "fastqc") var java_exe: String = config("exe", default = "java", submodule = "java", freeVar = false) @@ -50,17 +50,31 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { override def afterGraph { this.checkExecutable - if (contaminants == null) { - val fastqcDir = executable.substring(0, executable.lastIndexOf("/")) - val defaultContams = getVersion match { - case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt") - case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") - } - val defaultAdapters = getVersion match { - case "v0.11.2" => new File(fastqcDir + "/Configuration/adapter_list.txt") - case _ => null - } - contaminants = config("contaminants", default = defaultContams) + + val fastqcDir = new File(executable).getParent + + contaminants = contaminants match { + // user-defined contaminants file take precedence + case userDefinedValue @ Some(_) => userDefinedValue + // otherwise, use default contaminants file (depending on FastQC version) + case None => + val defaultContams = getVersion match { + case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt") + case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") + } + config("contaminants", default = defaultContams) + } + + adapters = adapters match { + // user-defined contaminants file take precedence + case userDefinedValue @ Some(_) => userDefinedValue + // otherwise, check if adapters are already present (depending on FastQC version) + case None => + val defaultAdapters = getVersion match { + case "v0.11.2" => Option(new File(fastqcDir + "/Configuration/adapter_list.txt")) + case _ => None + } + defaultAdapters.collect { case adp => config("adapters", default = adp) } } } @@ -74,6 +88,6 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { conditional(noextract, "--noextract") + conditional(extract, "--extract") + conditional(quiet, "--quiet") + - required("-o", output.getParent()) + + required("-o", output.getParent) + required(fastqfile) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala index 4f6236179755659b3b42d74cef6c722713fbf4c9..1d7b45ec9617091a457f073e4110ff5e01932e24 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala @@ -50,6 +50,8 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { @Argument(doc = "Output directory", required = true) var w: String = _ + var noBfgs: Boolean = config("no_bfgs", default = false) + @Input(required = false) var t: File = _ @@ -101,5 +103,6 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { optional("-f", f) + optional("-t", t) + optional("-z", z) + + conditional(noBfgs, "--no-bgfs") + required("-T", threads) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala index a7bfeab78447e62eee1634c73ca09c880679327e..1db8f6d67c8d892e6188aa750efb40b814fbbf1e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala @@ -29,13 +29,13 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { var R2: File = _ @Input(doc = "The reference file for the bam files.", shortName = "ref") - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Input(doc = "The genome prefix.") - var genome: File = config("genome", required = true) + var genome: File = config("genome") @Input(doc = "The hash prefix") - var hash: File = config("hash", required = true) + var hash: File = config("hash") @Output(doc = "Output file SAM", shortName = "output") var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala index e0b8bd68bda46a6b3dc6915a54482b093cfd43d6..98ee6de521d499710e186bb2c196d9d471c6aa5d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala @@ -29,7 +29,7 @@ class TopHat(val root: Configurable) extends BiopetCommandLineFunction { var R2: File = _ @Input(doc = "Bowtie index", shortName = "bti") - var bowtie_index: File = config("bowtie_index", required = true) + var bowtie_index: File = config("bowtie_index") @Argument(doc = "Output Directory") var outputDir: String = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala index bfd0a5846e6531b47f285453a2d848b1b7f1bafe..3298702fcfd5cd98e1eaf0bae8f6b059d353d2c9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala @@ -13,7 +13,7 @@ class BwaAln(val root: Configurable) extends Bwa { var fastq: File = _ @Input(doc = "The reference file for the bam files.", required = true) - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Output(doc = "Output file SAM", required = false) var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala index fc790b183b5bae2922a1b5f89ec66fcf4f5b85b2..74cdad0cea290e5a8cdcc2b360fa3fa32444791c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala @@ -29,7 +29,7 @@ class BwaMem(val root: Configurable) extends Bwa { var R2: File = _ @Input(doc = "The reference file for the bam files.", shortName = "R") - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Output(doc = "Output file SAM", shortName = "output") var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala index b857eea014ac52acb9608debb94db9cd75cef929..255811561e732b229b1417c95d866fad4694c0c9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala @@ -26,7 +26,7 @@ class BwaSampe(val root: Configurable) extends Bwa { var saiR2: File = _ @Input(doc = "The reference file for the bam files.", required = true) - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Output(doc = "Output file SAM", required = false) var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala index 51f9a0f30eaf73a897fefaa2b6cf69d4e3386b62..8bbf918474e21d2e2837ce01d5bc4490e13bd087 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala @@ -16,7 +16,7 @@ class BwaSamse(val root: Configurable) extends Bwa { var sai: File = _ @Input(doc = "The reference file for the bam files.", required = true) - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Output(doc = "Output file SAM", required = false) var output: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala new file mode 100644 index 0000000000000000000000000000000000000000..d017864f6988828100f6bbda421fbf734a9a5878 --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala @@ -0,0 +1,14 @@ +/** + * Created by wyleung on 5-1-15. + */ + +package nl.lumc.sasc.biopet.extensions.igvtools + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction + +abstract class IGVTools extends BiopetCommandLineFunction { + executable = config("exe", default = "igvtools", submodule = "igvtools", freeVar = false) + override def versionCommand = executable + " version" + override val versionRegex = """IGV Version: ([\d\.]) .*""".r + override val versionExitcode = List(0) +} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala new file mode 100644 index 0000000000000000000000000000000000000000..8037616834ecd4de02e9949883b75d20b45c7347 --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala @@ -0,0 +1,105 @@ + +package nl.lumc.sasc.biopet.extensions.igvtools + +import java.nio.file.InvalidPathException + +import nl.lumc.sasc.biopet.core.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +import java.io.{ FileNotFoundException, File } + +/** + * IGVTools `count` wrapper + * + * @constructor create a new IGVTools instance from a `.bam` file + * + */ + +class IGVToolsCount(val root: Configurable) extends IGVTools { + @Input(doc = "Bam File") + var input: File = _ + + @Input(doc = "<genome>.chrom.sizes File") + var genomeChromSizes: File = _ + + @Output + var tdf: Option[File] = _ + + @Output + var wig: Option[File] = _ + + var maxZoom: Option[Int] = config("maxZoom") + var windowSize: Option[Int] = config("windowSize") + var extFactor: Option[Int] = config("extFactor") + + var preExtFactor: Option[Int] = config("preExtFactor") + var postExtFactor: Option[Int] = config("postExtFactor") + + var windowFunctions: Option[String] = config("windowFunctions") + var strands: Option[String] = config("strands") + var bases: Boolean = config("bases", default = false) + + var query: Option[String] = config("query") + var minMapQuality: Option[Int] = config("minMapQuality") + var includeDuplicates: Boolean = config("includeDuplicates", default = false) + + var pairs: Boolean = config("pairs", default = false) + + override def afterGraph { + super.afterGraph + if (!input.exists()) throw new FileNotFoundException("Input bam is required for IGVToolsCount") + + if (!wig.isEmpty && !wig.get.getAbsolutePath.endsWith(".wig")) throw new IllegalArgumentException("Wiggle file should have a .wig file-extension") + if (!tdf.isEmpty && !tdf.get.getAbsolutePath.endsWith(".tdf")) throw new IllegalArgumentException("TDF file should have a .tdf file-extension") + } + + def cmdLine = { + required(executable) + + required("count") + + optional("--maxZoom", maxZoom) + + optional("--windowSize", windowSize) + + optional("--extFactor", extFactor) + + optional("--preExtFactor", preExtFactor) + + optional("--postExtFactor", postExtFactor) + + optional("--windowFunctions", windowFunctions) + + optional("--strands", strands) + + conditional(bases, "--bases") + + optional("--query", query) + + optional("--minMapQuality", minMapQuality) + + conditional(includeDuplicates, "--includeDuplicates") + + conditional(pairs, "--pairs") + + required(input) + + required(outputArg) + + required(genomeChromSizes) + } + + /** + * This part should never fail, these values are set within this wrapper + * + */ + private def outputArg: String = { + (tdf, wig) match { + case (None, None) => throw new IllegalArgumentException("Either TDF or WIG should be supplied"); + case (Some(a), None) => a.getAbsolutePath; + case (None, Some(b)) => b.getAbsolutePath; + case (Some(a), Some(b)) => a.getAbsolutePath + "," + b.getAbsolutePath; + } + } +} + +object IGVToolsCount { + /** + * Create an object by specifying the `input` (.bam), + * and the `genomename` (hg18,hg19,mm10) + * + * @param input Bamfile to count reads from + * @return a new IGVToolsCount instance + * @throws FileNotFoundException bam File is not found + * @throws IllegalArgumentException tdf or wig not supplied + */ + def apply(root: Configurable, input: File, genomeChromSizes: File): IGVToolsCount = { + val counting = new IGVToolsCount(root) + counting.input = input + counting.genomeChromSizes = genomeChromSizes + return counting + } +} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 7ffcb50d8fec02138c07e4fc95d7608e8bfba72e..9cab9bfc90298f243f6cfdc8e051e7d451a4cf90 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -35,7 +35,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard { var outputSummary: File = _ @Argument(doc = "Reference file", required = false) - var reference: File = config("reference", required = true) + var reference: File = config("reference") @Argument(doc = "Window size", required = false) var windowSize: Option[Int] = config("windowsize") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala index 271c06085ddd086a5f015c88d84e29a008272c89..7d46c4b8fb910398f61e9fb3b873f682efaf023f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala @@ -26,7 +26,7 @@ class MultisamplePipelineTemplate(val root: Configurable) extends QScript with M class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { + class Library(libId: String) extends AbstractLibrary(libId) { protected def addJobs(): Unit = { // Library jobs } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 24ddb994e641914e0de9fab3d21e56c9d05d7f02..76f8887cced08a6c2ef0e16ca469f1700a7ee208 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } import scala.collection.JavaConversions._ +import scala.io.Source class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName @@ -58,6 +59,7 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { object VcfFilter extends ToolCommand { case class Args(inputVcf: File = null, outputVcf: File = null, + invertedOutputVcf: Option[File] = None, minQualscore: Option[Double] = None, minSampleDepth: Int = -1, minTotalDepth: Int = -1, @@ -69,7 +71,8 @@ object VcfFilter extends ToolCommand { diffGenotype: List[(String, String)] = Nil, filterHetVarToHomVar: List[(String, String)] = Nil, filterRefCalls: Boolean = false, - filterNoCalls: Boolean = false) extends AbstractArgs + filterNoCalls: Boolean = false, + iDset: Set[String] = Set()) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => @@ -78,6 +81,9 @@ object VcfFilter extends ToolCommand { opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => c.copy(outputVcf = x) } text ("Output vcf file") + opt[File]("invertedOutputVcf") maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(invertedOutputVcf = Some(x)) + } text ("inverted output vcf file") opt[Int]("minSampleDepth") unbounded () valueName ("<int>") action { (x, c) => c.copy(minSampleDepth = x) } text ("Min value for DP in genotype fields") @@ -116,6 +122,12 @@ object VcfFilter extends ToolCommand { opt[Double]("minQualscore") unbounded () action { (x, c) => c.copy(minQualscore = Some(x)) } text ("Min qual score") + opt[String]("id") unbounded () action { (x, c) => + c.copy(iDset = c.iDset + x) + } text ("Id that may pass the filter") + opt[File]("id-file") unbounded () action { (x, c) => + c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines()) + } text ("File that contain list of IDs to get from vcf file") } var commandArgs: Args = _ @@ -124,6 +136,7 @@ object VcfFilter extends ToolCommand { * @param args the command line arguments */ def main(args: Array[String]): Unit = { + logger.info("Start") val argsParser = new OptParser commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) @@ -132,6 +145,11 @@ object VcfFilter extends ToolCommand { val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build) writer.writeHeader(header) + val invertedWriter = commandArgs.invertedOutputVcf.collect { case x => new VariantContextWriterBuilder().setOutputFile(x).build } + invertedWriter.foreach(_.writeHeader(header)) + + var counterTotal = 0 + var counterLeft = 0 for (record <- reader) { if (minQualscore(record) && filterRefCalls(record) && @@ -143,12 +161,20 @@ object VcfFilter extends ToolCommand { mustHaveVariant(record) && notSameGenotype(record) && filterHetVarToHomVar(record) && - denovoInSample(record)) { + denovoInSample(record) && + inIdSet(record)) { writer.add(record) - } + counterLeft += 1 + } else + invertedWriter.foreach(_.add(record)) + counterTotal += 1 + if (counterTotal % 100000 == 0) logger.info(counterTotal + " variants processed, " + counterLeft + " left") } + logger.info(counterTotal + " variants processed, " + counterLeft + " left") reader.close writer.close + invertedWriter.foreach(_.close()) + logger.info("Done") } def minQualscore(record: VariantContext): Boolean = { @@ -241,4 +267,9 @@ object VcfFilter extends ToolCommand { } return true } + + def inIdSet(record: VariantContext): Boolean = { + if (commandArgs.iDset.isEmpty) true + else record.getID.split(",").exists(commandArgs.iDset.contains(_)) + } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index 9f1cddb7913cf7846daa72c7b9e695988b3a14c6..2c9165036824f135f9552b69fa3b4085be25fc93 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -16,6 +16,7 @@ package nl.lumc.sasc.biopet.utils import java.io.File +import nl.lumc.sasc.biopet.core.BiopetQScript import nl.lumc.sasc.biopet.core.Logging import nl.lumc.sasc.biopet.core.config.ConfigValue import argonaut._, Argonaut._ @@ -330,14 +331,27 @@ object ConfigUtils extends Logging { trait ImplicitConversions { import scala.language.implicitConversions + private def requiredValue(value: ConfigValue): Boolean = { + val exist = valueExists(value) + if (!exist) + BiopetQScript.addError("Value does not exist but is required, key: " + value.requestIndex.key + + " module: " + value.requestIndex.module, + (if (value.requestIndex.path != Nil) " path: " + value.requestIndex.path.mkString("->") else null)) + exist + } + + private def valueExists(value: ConfigValue): Boolean = { + value != null && value.value != null && value.value != None + } + /** * Convert ConfigValue to File * @param value Input ConfigValue * @return */ implicit def configValue2file(value: ConfigValue): File = { - if (value != null && value.value != null && value.value != None) new File(any2string(value.value)) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) new File(any2string(value.value)) + else new File("") } /** @@ -346,7 +360,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionFile(value: ConfigValue): Option[File] = { - if (value != null && value.value != null && value.value != None) Some(new File(any2string(value.value))) + if (valueExists(value)) Some(new File(any2string(value.value))) else None } @@ -356,8 +370,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2string(value: ConfigValue): String = { - if (value != null && value.value != null && value.value != None) any2string(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2string(value.value) + else "" } /** @@ -366,7 +380,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionString(value: ConfigValue): Option[String] = { - if (value != null && value.value != null && value.value != None) Some(any2string(value.value)) + if (valueExists(value)) Some(any2string(value.value)) else None } @@ -376,8 +390,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2long(value: ConfigValue): Long = { - if (value != null && value.value != null && value.value != None) any2long(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2long(value.value) + else 0L } /** @@ -386,7 +400,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionLong(value: ConfigValue): Option[Long] = { - if (value != null && value.value != null && value.value != None) Option(any2long(value.value)) + if (valueExists(value)) Option(any2long(value.value)) else None } @@ -396,8 +410,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2int(value: ConfigValue): Int = { - if (value != null && value.value != null && value.value != None) any2int(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2int(value.value) + else 0 } /** @@ -406,7 +420,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionInt(value: ConfigValue): Option[Int] = { - if (value != null && value.value != null && value.value != None) Option(any2int(value.value)) + if (valueExists(value)) Option(any2int(value.value)) else None } @@ -416,8 +430,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2double(value: ConfigValue): Double = { - if (value != null && value.value != null && value.value != None) any2double(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2double(value.value) + else 0.0 } /** @@ -426,7 +440,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionDouble(value: ConfigValue): Option[Double] = { - if (value != null && value.value != null && value.value != None) Option(any2double(value.value)) + if (valueExists(value)) Option(any2double(value.value)) else None } @@ -436,8 +450,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2float(value: ConfigValue): Float = { - if (value != null && value.value != null && value.value != None) any2float(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2float(value.value) + else 0f } /** @@ -446,7 +460,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionFloat(value: ConfigValue): Option[Float] = { - if (value != null && value.value != null && value.value != None) Option(any2float(value.value)) + if (valueExists(value)) Option(any2float(value.value)) else None } @@ -456,8 +470,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2boolean(value: ConfigValue): Boolean = { - if (value != null && value.value != null && value.value != None) any2boolean(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2boolean(value.value) + else false } /** @@ -466,7 +480,7 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2optionBoolean(value: ConfigValue): Option[Boolean] = { - if (value != null && value.value != null && value.value != None) Option(any2boolean(value.value)) + if (valueExists(value)) Option(any2boolean(value.value)) else None } @@ -476,8 +490,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2list(value: ConfigValue): List[Any] = { - if (value != null && value.value != null && value.value != None) any2list(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2list(value.value) + else Nil } /** @@ -486,8 +500,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2stringList(value: ConfigValue): List[String] = { - if (value != null && value.value != null && value.value != None) any2stringList(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2stringList(value.value) + else Nil } /** @@ -496,8 +510,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2stringSet(value: ConfigValue): Set[String] = { - if (value != null && value.value != null && value.value != None) any2stringList(value.value).toSet - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2stringList(value.value).toSet + else Set() } /** @@ -506,8 +520,8 @@ object ConfigUtils extends Logging { * @return */ implicit def configValue2map(value: ConfigValue): Map[String, Any] = { - if (value != null && value.value != null && value.value != None) any2map(value.value) - else throw new IllegalStateException("Value does not exist") + if (requiredValue(value)) any2map(value.value) + else Map() } } } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala index 92d0b9f63f60ad1a6c9489ba1d71fd3f25ab6828..b7353b2ad1d4a91cd05340edbd7a61a51e64e266 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala @@ -30,9 +30,6 @@ class ConfigTest extends TestNGSuite with Matchers with ConfigUtils.ImplicitConv @Test def testApply: Unit = { ConfigTest.config("m1", Nil, "k1").asString shouldBe "v2" ConfigTest.config("m1", Nil, "notexist", default = "default").asString shouldBe "default" - intercept[IllegalStateException] { - ConfigTest.config("m1", Nil, "notexist") - } } @Test def testMergeConfigs: Unit = { diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigurableTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigurableTest.scala index 1b493f2e14df822b75534fed3722f88917882fe1..19940fde2ecb2c923cdbed79b9fcb85bd1ab4e39 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigurableTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigurableTest.scala @@ -11,10 +11,10 @@ import org.testng.annotations.Test */ class ConfigurableTest extends TestNGSuite with Matchers { @Test def testConfigurable: Unit = { - Config.global.map = Map() - Config.global.loadConfigFile(ConfigurableTest.file) - - val classC = new ClassC + val classC = new ClassC { + override def configName = "classc" + override val globalConfig = new Config(ConfigurableTest.map) + } classC.configPath shouldBe Nil classC.configFullPath shouldBe List("classc") @@ -27,10 +27,6 @@ class ConfigurableTest extends TestNGSuite with Matchers { classC.classB.get("k1").asString shouldBe "c1" classC.classB.classA.get("k1").asString shouldBe "c1" - classC.get("notexist") shouldBe null - intercept[IllegalStateException] { - classC.get("notexist", required = true) - } classC.get("notexist", default = "default").asString shouldBe "default" classC.get("k1", freeVar = false).asString shouldBe "c1" @@ -47,23 +43,24 @@ abstract class Cfg extends Configurable { def get(key: String, default: String = null, submodule: String = null, - required: Boolean = false, freeVar: Boolean = true, sample: String = null, library: String = null) = { - config(key, default, submodule, required, freeVar = freeVar, sample = sample, library = library) + config(key, default, submodule, freeVar = freeVar, sample = sample, library = library) } } class ClassA(val root: Configurable) extends Cfg class ClassB(val root: Configurable) extends Cfg { - val classA = new ClassA(this) + lazy val classA = new ClassA(this) + // Why this needs to be lazy? } class ClassC(val root: Configurable) extends Cfg { def this() = this(null) - val classB = new ClassB(this) + lazy val classB = new ClassB(this) + // Why this needs to be lazy? } object ConfigurableTest { @@ -85,6 +82,4 @@ object ConfigurableTest { ) ) ) - - val file = ConfigUtilsTest.writeTemp(ConfigUtils.mapToJson(map).spaces2) } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala index 26f989c5369c4e138ded82620d1d03f0c076ce7c..ddcf887d819eb0edc09bfc5b6a42954115e62192 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala @@ -4,6 +4,7 @@ import java.io.{ PrintWriter, File } import argonaut.Argonaut._ import argonaut.Json +import nl.lumc.sasc.biopet.core.BiopetQScript import nl.lumc.sasc.biopet.core.config.{ ConfigValueIndex, ConfigValue } import org.scalatest.Matchers import org.scalatest.mock.MockitoSugar @@ -170,79 +171,23 @@ class ConfigUtilsTest extends TestNGSuite with Matchers { val index = ConfigValueIndex("test", Nil, "test") new ImplicitConversions { configValue2list(ConfigValue(index, index, List(""))) shouldBe List("") - intercept[IllegalStateException] { - configValue2list(ConfigValue(index, index, null)) - } - configValue2stringList(ConfigValue(index, index, List(""))) shouldBe List("") - intercept[IllegalStateException] { - configValue2stringList(ConfigValue(index, index, null)) - } - configValue2stringSet(ConfigValue(index, index, List(""))) shouldBe Set("") - intercept[IllegalStateException] { - configValue2stringSet(ConfigValue(index, index, null)) - } var int: Int = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - int = ConfigValue(index, index, null) - } - var long: Long = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - long = ConfigValue(index, index, null) - } - var double: Double = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - double = ConfigValue(index, index, null) - } - var float: Float = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - float = ConfigValue(index, index, null) - } - var boolean: Boolean = ConfigValue(index, index, true) - intercept[IllegalStateException] { - boolean = ConfigValue(index, index, null) - } - var intOption: Option[Int] = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - int = ConfigValue(index, index, null) - } - var longOption: Option[Long] = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - long = ConfigValue(index, index, null) - } - var doubleOption: Option[Double] = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - double = ConfigValue(index, index, null) - } - var floatOption: Option[Float] = ConfigValue(index, index, 1) - intercept[IllegalStateException] { - float = ConfigValue(index, index, null) - } - var booleanOption: Option[Boolean] = ConfigValue(index, index, true) - intercept[IllegalStateException] { - boolean = ConfigValue(index, index, null) - } - var string: String = ConfigValue(index, index, "test") - intercept[IllegalStateException] { - string = ConfigValue(index, index, null) - } - var file: File = ConfigValue(index, index, "test") - intercept[IllegalStateException] { - file = ConfigValue(index, index, null) - } + + //TODO: test BiopetQScript error message } } } diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index f0ff3f4eb54cc109ebe2e2b7bb7659cc99b7c76b..578f3afc79f6cac8d2621399bb577222383ee2bb 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -44,14 +44,14 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript { def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { + class Library(libId: String) extends AbstractLibrary(libId) { val mapping = new Mapping(qscript) def addJobs(): Unit = { if (config.contains("R1")) { mapping.input_R1 = config("R1") if (config.contains("R2")) mapping.input_R2 = config("R2") - mapping.libraryId = libraryId + mapping.libId = libId mapping.sampleId = sampleId mapping.outputDir = libDir @@ -59,7 +59,7 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript { mapping.biopetScript addAll(mapping.functions) - } else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libraryId) + } else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libId) } } diff --git a/public/flexiprep/pom.xml b/public/flexiprep/pom.xml index e9b58ab28a615ac8ce4e76063285b125f2b66b1b..86666db29645cd35f9d8f71c6b1aa775d0f22444 100644 --- a/public/flexiprep/pom.xml +++ b/public/flexiprep/pom.xml @@ -39,5 +39,17 @@ <artifactId>BiopetFramework</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.11</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index 1bd84bb36e21c8e577adaf5e9ad33d02b1db47fa..9aaca5f66336e38b16b215a9c175781fafc97fe0 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -33,14 +33,14 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada override def beforeCmd() { super.beforeCmd - val foundAdapters = fastqc.getFoundAdapters.map(_.seq) + val foundAdapters = fastqc.foundAdapters.map(_.seq) if (default_clip_mode == "3") opt_adapter ++= foundAdapters else if (default_clip_mode == "5") opt_front ++= foundAdapters else if (default_clip_mode == "both") opt_anywhere ++= foundAdapters } override def cmdLine = { - if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) { + if (opt_adapter.nonEmpty || opt_anywhere.nonEmpty || opt_front.nonEmpty) { analysisName = getClass.getSimpleName super.cmdLine } else { diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 4ee01c2605d5449dac33b19ac9c2ab360b383d45..21cb069d658623ab5a7b8986e454db6000f927fa 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -16,82 +16,154 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable +import java.io.{ File, FileNotFoundException } + import scala.io.Source import argonaut._, Argonaut._ import scalaz._, Scalaz._ +import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.utils.ConfigUtils + +/** + * FastQC wrapper with added functionality for the Flexiprep pipeline + * + * This wrapper implements additional methods for parsing FastQC output files and aggregating everything in a summary + * object. The current implementation is based on FastQC v0.10.1. + */ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) { - def getDataBlock(name: String): Array[String] = { // Based on Fastqc v0.10.1 - val outputDir = output.getAbsolutePath.stripSuffix(".zip") - val dataFile = new File(outputDir + "/fastqc_data.txt") - if (!dataFile.exists) return null - val data = Source.fromFile(dataFile).mkString - for (block <- data.split(">>END_MODULE\n")) { - val b = if (block.startsWith("##FastQC")) block.substring(block.indexOf("\n") + 1) else block - if (b.startsWith(">>" + name)) - return for (line <- b.split("\n")) - yield line - } - return null - } - def getEncoding: String = { - val block = getDataBlock("Basic Statistics") - if (block == null) return null - for ( - line <- block if (line.startsWith("Encoding")) - ) return line.stripPrefix("Encoding\t") - return null // Could be default Sanger with a warning in the log + /** Class for storing a single FastQC module result */ + protected case class FastQCModule(name: String, status: String, lines: Seq[String]) + + /** Default FastQC output directory containing actual results */ + // this is a def instead of a val since the value depends on the variable `output`, which is null on class creation + def outputDir: File = new File(output.getAbsolutePath.stripSuffix(".zip")) + + /** Default FastQC output data file */ + // this is a def instead of a val since the value depends on the variable `output`, which is null on class creation + def dataFile: File = new File(outputDir, "fastqc_data.txt") + + /** + * FastQC QC modules. + * + * @return Mapping of FastQC module names and its contents as array of strings (one item per line) + * @throws FileNotFoundException if the FastQC data file can not be found. + * @throws IllegalStateException if the module lines have no content or mapping is empty. + */ + def qcModules: Map[String, FastQCModule] = { + val fqModules = Source.fromFile(dataFile) + // drop all the characters before the first module delimiter (i.e. '>>') + .dropWhile(_ != '>') + // pull everything into a string + .mkString + // split into modules + .split(">>END_MODULE\n") + // make map of module name -> module lines + .map { + case (modString) => + // module name is in the first line, without '>>' and before the tab character + val Array(firstLine, otherLines) = modString + // drop all '>>' character (start of module) + .dropWhile(_ == '>') + // split first line and others + .split("\n", 2) + // and slice them + .slice(0, 2) + // extract module name and module status + val Array(modName, modStatus) = firstLine + .split("\t", 2) + .slice(0, 2) + modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq) + } + .toMap + + if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString) + else fqModules } - protected case class Sequence(name: String, seq: String) - def getFoundAdapters: List[Sequence] = { - def getSeqs(file: File) = { - if (file != null) { - (for ( - line <- Source.fromFile(file).getLines(); if line.startsWith("#"); - values = line.split("\t*") if values.size >= 2 - ) yield Sequence(values(0), values(1))).toList - } else Nil - } + /** + * Retrieves the FASTQ file encoding as computed by FastQC. + * + * @return encoding name + * @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or + * when a line starting with "Encoding" does not exist. + */ + def encoding: String = { + if (dataFile.exists) // On a dry run this file does not yet exist + qcModules("Basic Statistics") //FIXME: not save + .lines + .dropWhile(!_.startsWith("Encoding")) + .head + .stripPrefix("Encoding\t") + .stripSuffix("\t") + else "" + } - val seqs = getSeqs(adapters) ::: getSeqs(contaminants) + /** Case class representing a known adapter sequence */ + protected case class AdapterSequence(name: String, seq: String) - val block = getDataBlock("Overrepresented sequences") - if (block == null) return Nil + /** + * Retrieves overrepresented sequences found by FastQ. + * + * @return a [[Set]] of [[AdapterSequence]] objects. + */ + def foundAdapters: Set[AdapterSequence] = { + if (dataFile.exists) { // On a dry run this file does not yet exist + /** Returns a list of adapter and/or contaminant sequences known to FastQC */ + def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match { + case None => Set.empty[AdapterSequence] + case Some(f) => + (for { + line <- Source.fromFile(f).getLines() + if !line.startsWith("#") + values = line.split("\t+") + if values.size >= 2 + } yield AdapterSequence(values(0), values(1))).toSet + } - val found = for ( - line <- block if !line.startsWith("#"); - values = line.split("\t") if values.size >= 4 - ) yield values(3) + val found = qcModules.get("Overrepresented sequences") match { + case None => Seq.empty[String] + case Some(qcModule) => + for ( + line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">")); + values = line.split("\t") if values.size >= 4 + ) yield values(3) + } - seqs.filter(x => found.exists(_.startsWith(x.name))) + // select full sequences from known adapters and contaminants + // based on overrepresented sequences results + (getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants)) + .filter(x => found.exists(_.startsWith(x.name))) + } else Set() } - def getSummary: Json = { - val subfixs = Map("plot_duplication_levels" -> "Images/duplication_levels.png", - "plot_kmer_profiles" -> "Images/kmer_profiles.png", - "plot_per_base_gc_content" -> "Images/per_base_gc_content.png", - "plot_per_base_n_content" -> "Images/per_base_n_content.png", - "plot_per_base_quality" -> "Images/per_base_quality.png", - "plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png", - "plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png", - "plot_per_sequence_quality" -> "Images/per_sequence_quality.png", - "plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png", - "fastqc_data" -> "fastqc_data.txt") - val dir = output.getAbsolutePath.stripSuffix(".zip") + "/" - var outputMap: Map[String, Map[String, String]] = Map() - for ((k, v) <- subfixs) outputMap += (k -> Map("path" -> (dir + v))) - - val temp = ("" := outputMap) ->: jEmptyObject - return temp.fieldOrEmptyObject("") + /** Summary of the FastQC run, stored in a [[Json]] object */ + def summary: Json = { + + val outputMap = + Map("plot_duplication_levels" -> "Images/duplication_levels.png", + "plot_kmer_profiles" -> "Images/kmer_profiles.png", + "plot_per_base_gc_content" -> "Images/per_base_gc_content.png", + "plot_per_base_n_content" -> "Images/per_base_n_content.png", + "plot_per_base_quality" -> "Images/per_base_quality.png", + "plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png", + "plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png", + "plot_per_sequence_quality" -> "Images/per_sequence_quality.png", + "plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png", + "fastqc_data" -> "fastqc_data.txt") + .map { + case (name, relPath) => + name -> Map("path" -> (outputDir + File.separator + relPath)) + } + + ConfigUtils.mapToJson(outputMap) } } object Fastqc { + def apply(root: Configurable, fastqfile: File, outDir: String): Fastqc = { val fastqcCommand = new Fastqc(root) fastqcCommand.fastqfile = fastqfile @@ -102,6 +174,6 @@ object Fastqc { //if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3) fastqcCommand.output = new File(outDir + "/" + filename + "_fastqc.zip") fastqcCommand.afterGraph - return fastqcCommand + fastqcCommand } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 42ea3b6d9d9f407476d359268b5ac3c40798b745..9ab16032bd095d40728e91f92a611366b400af2f 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -30,7 +30,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { var input_R1: File = _ @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false) - var input_R2: Option[File] = _ + var input_R2: Option[File] = None /** Skip Trim fastq files */ var skipTrim: Boolean = config("skip_trim", default = false) @@ -38,13 +38,17 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { /** Skip Clip fastq files */ var skipClip: Boolean = config("skip_clip", default = false) + // TODO: hide sampleId and libId from the command line so they do not interfere with our config values + /** Sample name */ + @Argument(doc = "Sample ID", shortName = "sample", required = true) var sampleId: String = _ /** Library name */ - var libraryId: String = _ + @Argument(doc = "Library ID", shortName = "library", required = true) + var libId: String = _ - var paired: Boolean = (input_R2 != null) + var paired: Boolean = input_R2.isDefined var R1_ext: String = _ var R2_ext: String = _ var R1_name: String = _ @@ -58,11 +62,12 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { val summary = new FlexiprepSummary(this) def init() { - if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module") - if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module") - if (sampleId == null) throw new IllegalStateException("Missing Sample name on flexiprep module") - if (libraryId == null) throw new IllegalStateException("Missing Library name on flexiprep module") - else if (!outputDir.endsWith("/")) outputDir += "/" + require(outputDir != null, "Missing output directory on flexiprep module") + require(input_R1 != null, "Missing input R1 on flexiprep module") + require(sampleId != null, "Missing sample ID on flexiprep module") + require(libId != null, "Missing library ID on flexiprep module") + + paired = input_R2.isDefined if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz")) else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip")) @@ -82,7 +87,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { case _ => } - summary.out = outputDir + sampleId + "-" + libraryId + ".qc.summary.json" + summary.out = outputDir + sampleId + "-" + libId + ".qc.summary.json" } def biopetScript() { diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala index 84acd18206b1f44e37e60b7c6d3b703e1b0d2d21..4ff18fb7cc90d7c3a90255c3c9a49ffb0a191eda 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala @@ -121,7 +121,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co md5Summary() val summary = ("samples" := ( flexiprep.sampleId := - ("libraries" := ( flexiprep.libraryId := ( + ("libraries" := ( flexiprep.libId := ( ("flexiprep" := ( ("clipping" := !flexiprep.skipClip) ->: ("trimming" := !flexiprep.skipTrim) ->: @@ -201,7 +201,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co def fastqcSummary(fastqc: Fastqc): Option[Json] = { if (fastqc == null) return None - else return Option(fastqc.getSummary) + else return Option(fastqc.summary) } def clipstatSummary(): Option[Json] = { diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala index f6d6ac9d7c2727445723bd26c61d77398d081e95..05236e13cacae24e5b5dffb408e1aa01b1c297e4 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala @@ -25,13 +25,14 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk. override def beforeCmd { super.beforeCmd if (fastqc != null && Q == None) { - val encoding = fastqc.getEncoding + val encoding = fastqc.encoding Q = encoding match { - case null => None + case null => None case s if (s.contains("Sanger / Illumina 1.9")) => None - case s if (s.contains("Illumina <1.3")) => Option(64) - case s if (s.contains("Illumina 1.3")) => Option(64) - case s if (s.contains("Illumina 1.5")) => Option(64) + case s if (s.contains("Illumina <1.3")) => Option(64) + case s if (s.contains("Illumina 1.3")) => Option(64) + case s if (s.contains("Illumina 1.5")) => Option(64) + case _ => None } if (Q != None) V = true } diff --git a/public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt b/public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt new file mode 100644 index 0000000000000000000000000000000000000000..13c6a999940201a402c3a7f9dd931ab9102de360 --- /dev/null +++ b/public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt @@ -0,0 +1,170 @@ +# This file contains a list of potential contaminants which are +# frequently found in high throughput sequencing reactions. These +# are mostly sequences of adapters / primers used in the various +# sequencing chemistries. +# +# Please DO NOT rely on these sequences to design your own oligos, some +# of them are truncated at ambiguous positions, and none of them are +# definitive sequences from the manufacturers so don't blame us if you +# try to use them and they don't work. +# +# You can add more sequences to the file by putting one line per entry +# and specifying a name[tab]sequence. If the contaminant you add is +# likely to be of use to others please consider sending it to the FastQ +# authors, either via a bug report at www.bioinformatics.bbsrc.ac.uk/bugzilla/ +# or by directly emailing simon.andrews@bbsrc.ac.uk so other users of +# the program can benefit. + +Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG +Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT + +Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT +Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT + +Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC +Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG +Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG + +Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC +Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG +Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT +Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT + +Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC +Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC +Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC +Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC +Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC +Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC +Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC +Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC +Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC +Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC +Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC +Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC + +Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC +Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC +Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG +Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC +Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG +Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA +Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG +Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG + +Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA +Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC +Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG + +Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG +Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG + +Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA +Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA + +RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA + +ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT +ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG +ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT +ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG +ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT +ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC +ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC +ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG +ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG diff --git a/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt b/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d44bfae6fa962cd5d3e88084107b22efed3b025 --- /dev/null +++ b/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt @@ -0,0 +1,838 @@ +##FastQC 0.10.1 +>>Basic Statistics pass +#Measure Value +Filename ct_r1.fq +File type Conventional base calls +Encoding Sanger / Illumina 1.9 +Total Sequences 1000 +Filtered Sequences 0 +Sequence length 100 +%GC 53 +>>END_MODULE +>>Per base sequence quality fail +#Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile +1 32.244 33.0 31.0 34.0 30.0 34.0 +2 32.589 34.0 31.0 34.0 31.0 34.0 +3 32.814 34.0 31.0 34.0 31.0 34.0 +4 36.231 37.0 35.0 37.0 35.0 37.0 +5 35.907 37.0 35.0 37.0 35.0 37.0 +6 35.934 37.0 35.0 37.0 35.0 37.0 +7 35.783 37.0 35.0 37.0 35.0 37.0 +8 36.008 37.0 35.0 37.0 35.0 37.0 +9 37.706 39.0 37.0 39.0 35.0 39.0 +10-14 37.857600000000005 39.2 37.2 39.4 34.8 39.4 +15-19 38.9788 40.2 38.0 41.0 35.0 41.0 +20-24 38.8246 40.0 38.0 41.0 34.8 41.0 +25-29 38.589600000000004 40.0 38.0 41.0 34.4 41.0 +30-34 38.3568 40.0 38.0 41.0 33.8 41.0 +35-39 38.1592 40.0 37.4 41.0 33.6 41.0 +40-44 37.4808 39.8 36.0 41.0 32.6 41.0 +45-49 36.9478 39.0 35.0 40.8 31.2 41.0 +50-54 35.845600000000005 37.8 34.6 40.0 29.4 41.0 +55-59 34.739 36.6 33.6 40.0 27.4 41.0 +60-64 34.1336 35.4 33.4 38.6 27.2 40.2 +65-69 32.7464 35.0 32.6 37.2 24.6 39.6 +70-74 29.3478 34.0 29.6 35.6 2.0 38.6 +75-79 27.4908 33.2 26.4 35.0 2.0 36.6 +80-84 25.893000000000008 33.0 21.8 35.0 2.0 35.4 +85-89 25.031799999999997 32.4 16.2 34.6 2.0 35.0 +90-94 23.9446 31.4 6.4 34.0 2.0 35.0 +95-99 22.9358 30.4 2.0 34.0 2.0 35.0 +100 21.984 30.0 2.0 34.0 2.0 35.0 +>>END_MODULE +>>Per sequence quality scores pass +#Quality Count +11 1.0 +12 4.0 +13 3.0 +14 1.0 +15 4.0 +16 4.0 +17 6.0 +18 7.0 +19 4.0 +20 2.0 +21 7.0 +22 9.0 +23 9.0 +24 17.0 +25 23.0 +26 30.0 +27 52.0 +28 39.0 +29 28.0 +30 23.0 +31 33.0 +32 43.0 +33 47.0 +34 74.0 +35 88.0 +36 148.0 +37 202.0 +38 89.0 +39 3.0 +>>END_MODULE +>>Per base sequence content fail +#Base G A T C +1 52.35707121364093 17.251755265797392 11.735205616850552 18.655967903711137 +2 34.300000000000004 11.1 24.8 29.799999999999997 +3 41.0 6.5 20.200000000000003 32.300000000000004 +4 37.5 8.7 26.0 27.800000000000004 +5 35.4 12.4 31.8 20.4 +6 57.3 11.1 1.6 30.0 +7 20.9 24.7 32.6 21.8 +8 20.0 27.200000000000003 30.0 22.8 +9 24.5 21.5 27.800000000000004 26.200000000000003 +10-14 25.22 23.28 26.26 25.240000000000002 +15-19 26.44 21.34 26.1 26.119999999999997 +20-24 25.240000000000002 22.1 24.6 28.060000000000002 +25-29 24.62 22.06 25.119999999999997 28.199999999999996 +30-34 26.240000000000002 21.44 24.279999999999998 28.04 +35-39 24.8 22.439999999999998 24.34 28.42 +40-44 25.8 22.84 23.9 27.46 +45-49 26.26 22.64 23.66 27.439999999999998 +50-54 26.72 22.58 23.18 27.52 +55-59 25.019999999999996 22.58 24.38 28.02 +60-64 26.251501802162597 22.00640768922707 23.28794553464157 28.454144973968766 +65-69 25.683829444891394 23.873692679002414 23.049074818986323 27.39340305711987 +70-74 25.554134697357206 25.44757033248082 21.717817561807333 27.28047740835465 +75-79 25.818501428257523 23.643155350472423 23.071852340145025 27.466490881125026 +80-84 26.973532796317606 23.95857307249712 21.74913693901036 27.318757192174914 +85-89 25.452016689847014 24.849327770050998 22.624014835419565 27.07464070468243 +90-94 24.547101449275363 22.35054347826087 24.139492753623188 28.962862318840582 +95-99 25.318837549655026 24.231653773782146 23.186284758519758 27.263223918043067 +100 24.0 26.0 21.9 28.1 +>>END_MODULE +>>Per base GC content fail +#Base %GC +1 71.01303911735206 +2 64.1 +3 73.3 +4 65.3 +5 55.800000000000004 +6 87.3 +7 42.699999999999996 +8 42.8 +9 50.7 +10-14 50.46000000000001 +15-19 52.559999999999995 +20-24 53.300000000000004 +25-29 52.82 +30-34 54.279999999999994 +35-39 53.22 +40-44 53.26 +45-49 53.7 +50-54 54.24 +55-59 53.04 +60-64 54.70564677613135 +65-69 53.07723250201126 +70-74 52.834612105711855 +75-79 53.28499230938255 +80-84 54.29228998849251 +85-89 52.526657394529444 +90-94 53.509963768115945 +95-99 52.5820614676981 +100 52.1 +>>END_MODULE +>>Per sequence GC content fail +#GC Content Count +0 0.0 +1 0.0 +2 0.0 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10 0.0 +11 0.0 +12 0.0 +13 0.0 +14 0.0 +15 0.0 +16 0.0 +17 0.0 +18 0.0 +19 0.0 +20 0.0 +21 0.0 +22 0.0 +23 0.5 +24 0.5 +25 0.5 +26 1.0 +27 1.5 +28 2.0 +29 3.5 +30 5.5 +31 6.0 +32 6.5 +33 6.0 +34 4.5 +35 6.0 +36 11.0 +37 17.0 +38 21.0 +39 16.5 +40 15.0 +41 24.0 +42 28.5 +43 33.0 +44 35.5 +45 32.5 +46 32.0 +47 32.0 +48 29.5 +49 30.5 +50 30.0 +51 29.5 +52 30.0 +53 27.5 +54 26.5 +55 27.0 +56 29.5 +57 34.0 +58 36.0 +59 36.0 +60 37.0 +61 31.5 +62 24.0 +63 22.5 +64 27.0 +65 28.5 +66 20.5 +67 15.0 +68 17.0 +69 13.5 +70 8.0 +71 7.0 +72 9.0 +73 8.0 +74 5.5 +75 4.5 +76 2.0 +77 2.0 +78 3.0 +79 2.0 +80 1.5 +81 1.0 +82 0.0 +83 0.5 +84 1.0 +85 0.5 +86 0.0 +87 0.0 +88 0.0 +89 0.0 +90 0.0 +91 0.0 +92 0.0 +93 0.0 +94 0.0 +95 0.0 +96 0.0 +97 0.0 +98 0.0 +99 0.0 +100 0.0 +>>END_MODULE +>>Per base N content warn +#Base N-Count +1 0.3 +2 0.0 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10-14 0.0 +15-19 0.0 +20-24 0.0 +25-29 0.0 +30-34 0.0 +35-39 0.0 +40-44 0.0 +45-49 0.0 +50-54 0.0 +55-59 0.0 +60-64 0.12 +65-69 0.5599999999999999 +70-74 6.16 +75-79 8.98 +80-84 13.100000000000001 +85-89 13.719999999999999 +90-94 11.68 +95-99 4.34 +100 0.0 +>>END_MODULE +>>Sequence Length Distribution pass +#Length Count +100 1000.0 +>>END_MODULE +>>Sequence Duplication Levels pass +#Total Duplicate Percentage 3.4 +#Duplication Level Relative count +1 100.0 +2 0.4140786749482402 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10++ 0.2070393374741201 +>>END_MODULE +>>Overrepresented sequences fail +#Sequence Count Percentage Possible Source +AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT 14 1.4000000000000001 TruSeq Adapter, Index 1 (97% over 36bp) +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG 12 1.2 TruSeq Adapter, Index 1 (97% over 36bp) +AGGGGGAATGATGGTTGTCTTTGGATATACTACAGCGATGGCTATTGAGG 2 0.2 No Hit +GGCTTGTTTTATTTTAATGGCTGATCTATGTAATCACAGAGGCCAGTATG 2 0.2 No Hit +GTGGGGTGGTGTTTGTGGGGGACTTCATCATCTCAGGCTTCCCAGGGTCC 2 0.2 No Hit +CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG 2 0.2 TruSeq Adapter, Index 1 (96% over 33bp) +>>END_MODULE +>>Kmer Content fail +#Sequence Count Obs/Exp Overall Obs/Exp Max Max Obs/Exp Position +AAAAA 385 7.3597403 68.038994 65-69 +AGATC 435 5.4375157 23.135067 1 +GAAGA 375 5.258809 32.443344 6 +GGAAG 420 5.044668 33.345257 5 +TCCAG 475 4.8355613 14.131038 2 +AAGAG 320 4.487517 25.954676 7 +CCAGG 475 4.4180827 17.21471 3 +GAGCA 380 4.3399205 21.1377 9 +AGCAC 395 4.2895336 15.0741825 7 +CTCCA 415 4.0171337 12.105032 95-96 +AGAGC 340 3.883087 21.137697 8 +TTTTT 280 3.8749053 8.964593 10-14 +CTTCT 370 3.8646336 11.598914 55-59 +CTGAA 305 3.812511 13.130004 90-94 +CGGAA 320 3.65467 26.422123 5 +ACCAG 335 3.6379597 10.049457 7 +TCTGA 310 3.6325634 12.308498 90-94 +CACAC 340 3.5108058 14.806036 85-89 +ATCGG 325 3.4795394 24.768969 3 +TCGGA 320 3.426008 19.815174 3 +GATCG 320 3.426008 19.815174 1 +CGTCT 355 3.387832 11.578538 85-89 +CTGCT 355 3.387832 17.662533 3 +GCACA 310 3.3664696 15.0741825 8 +TCTTC 320 3.3423858 7.7326093 50-54 +CAGCA 305 3.3121717 10.049455 6 +GAACT 260 3.2500093 13.130004 90-94 +GTCTG 320 3.2116532 12.65067 90-94 +CAGGA 280 3.197836 15.8532715 3 +AACTC 265 3.1497202 23.781752 95-96 +TGAAC 250 3.125009 13.130004 90-94 +CCAGC 350 3.0954454 6.6359653 95-96 +AGTCA 240 3.0000086 10.41078 25-29 +CACCA 290 2.9945107 6.079907 70-74 +TGCTG 295 2.960743 9.2877 2 +CAGAT 230 2.875008 11.040063 70-74 +CTTCC 315 2.8583732 10.916445 30-34 +CACGT 280 2.8504362 12.351324 85-89 +CAGGG 290 2.8367646 22.630535 9 +ACACG 260 2.8234906 13.175687 85-89 +TTCCA 250 2.7855206 9.279795 30-34 +TTCTT 230 2.765239 6.6755276 50-54 +AGCAG 240 2.7410026 15.853272 2 +TTCTG 240 2.6363494 10.165324 55-59 +ACTCC 270 2.6135564 14.526036 95-96 +GCCAG 280 2.6043434 8.607355 1 +ACGTC 255 2.595933 10.105629 85-89 +GATCT 220 2.5779483 8.675031 40-44 +TCTGC 265 2.5289452 13.2469015 2 +AAGAT 160 2.4557784 12.783248 35-39 +ATCTC 220 2.4512577 9.279794 40-44 +CAGTC 240 2.4432309 8.554544 90-94 +TCCAA 205 2.4365761 10.999062 7 +CTTTT 200 2.4045558 16.688818 6 +TTCCT 230 2.40234 9.665762 7 +CCAGT 235 2.3923304 9.4206915 25-29 +TTTCT 195 2.3444414 16.688818 8 +CTGGG 255 2.3383298 6.004135 80-84 +TGCTT 210 2.3068056 10.165323 4 +TCTTT 190 2.284328 5.5629396 15-19 +TTTTC 190 2.2843277 11.125878 7 +GGGGG 255 2.2468696 16.307867 2 +AGGAA 160 2.2437584 19.466007 5 +GTCAC 220 2.2396283 10.184532 95-96 +TCACT 200 2.2284167 8.360176 95-96 +CACTT 200 2.2284167 10.3108835 30-34 +GAAAA 135 2.2103586 10.606119 60-64 +ACTTC 195 2.172706 9.279794 30-34 +TTGAA 150 2.1582448 11.9834385 60-64 +CTCCT 235 2.1324375 16.794533 4 +TCCTC 235 2.1324372 8.397265 5 +ATCTT 165 2.11616 7.1210704 10-14 +GGGGA 205 2.1089406 14.2801 3 +ACACA 165 2.092039 11.7331705 8 +TGCAG 195 2.0877237 9.907587 5 +GACCA 190 2.0633202 10.049455 6 +AGGGG 200 2.057503 9.520067 1 +CCTCC 260 2.049668 14.590484 5 +AGGAG 170 2.0418897 5.557543 2 +TCCTT 195 2.0367663 14.498643 4 +GTCTT 185 2.032186 15.247986 7 +GCTGG 220 2.0173824 8.485845 1 +CCAGA 185 2.0090222 5.3284492 70-74 +CCTGG 230 2.0054333 8.068818 3 +GCAGG 205 2.005299 9.052214 3 +GGACC 215 1.9997637 8.607355 5 +TTCAT 155 1.987908 5.934226 2 +CCTTT 190 1.9845415 14.498643 5 +TTTCC 190 1.9845415 5.799457 15-19 +TGGCA 185 1.980661 14.861383 2 +TCTTG 180 1.977262 10.165323 5 +CCAAG 180 1.9547247 9.044511 35-39 +CTTCA 175 1.9498644 10.310883 6 +CAAGA 145 1.933477 12.339583 35-39 +CTGGA 180 1.9271295 9.907587 6 +GGCTG 210 1.9256833 16.97169 2 +AATGA 125 1.918577 7.677627 95-96 +TGAAA 125 1.918577 15.623971 60-64 +GCTTC 200 1.9086379 13.2469015 2 +GTCCA 185 1.8833237 14.131036 1 +AGAAA 115 1.882898 7.5757995 7 +TGGGG 195 1.8805519 13.386638 1 +TTCTC 180 1.880092 5.799457 25-29 +CTTGA 160 1.8748715 8.675031 60-64 +ACAAA 120 1.8682072 5.762797 40-44 +TCTCG 195 1.8609219 8.831266 5 +GGGAC 190 1.8585701 9.052216 5 +TGAGG 165 1.8578365 5.209824 2 +TGAAG 140 1.8404517 6.082693 2 +CATCT 165 1.8384434 5.155441 4 +CACTG 180 1.8324232 9.4206915 6 +CTGCA 180 1.8324231 5.3465896 90-94 +GCTGC 210 1.8310483 8.068819 1 +GCAGA 160 1.8273348 10.568848 3 +CCTTC 200 1.8148402 8.397265 9 +AGGGA 150 1.8016673 6.0081544 95-96 +TTTCA 140 1.7955297 7.1210704 15-19 +CACAG 165 1.7918309 5.432139 95-96 +AAACA 115 1.7903653 7.6389136 70-74 +ATTTT 120 1.7715117 13.661307 6 +TTTTG 140 1.7701824 17.551357 7 +GGGGC 210 1.7594293 11.629828 3 +GATTT 130 1.7534488 12.481857 6 +CAAAT 120 1.7513192 6.7527947 50-54 +GAGGG 170 1.7488776 9.520067 1 +GAAGG 145 1.7416117 6.0081544 95-96 +CATTT 135 1.7314036 5.9342256 5 +ATTTC 135 1.7314036 5.9342256 7 +CCTCT 190 1.7240983 8.397266 1 +ATCCA 145 1.7234317 5.49953 4 +GCAGC 185 1.7207267 6.9789357 95-96 +TCCTG 180 1.717774 13.2469 2 +CTCTG 180 1.717774 13.2469 2 +AAAAC 110 1.7125233 7.6389136 70-74 +CTTGG 170 1.7061908 9.2877 2 +AAAAT 95 1.7024158 8.291661 9 +TCACC 175 1.693972 8.957724 8 +TCCAC 175 1.693972 8.957724 5 +GAGAA 120 1.6828189 6.488669 6 +TCTCC 185 1.6787271 5.038359 55-59 +GAGCC 180 1.6742208 8.607355 9 +TCATC 150 1.6713123 5.1554413 2 +AGACA 125 1.6667906 6.169792 2 +TGATG 135 1.6636823 11.404236 9 +GGGAG 160 1.6460025 9.520067 1 +AGCCA 150 1.6289369 6.029673 10-14 +ATGCC 160 1.6288207 8.478622 45-49 +CTCGT 170 1.6223421 8.831266 3 +GAGGA 135 1.6215005 11.115086 3 +TGTTG 140 1.6173534 10.690706 2 +CTCAT 145 1.6156021 5.1554418 2 +CAGGT 150 1.6059413 9.907587 4 +GCTTG 160 1.6058266 9.2877 60-64 +GGGTC 175 1.6047363 12.728768 2 +TCATT 125 1.6031516 5.934226 9 +GTTGA 130 1.6020645 5.702118 1 +ACAGA 120 1.6001189 10.005068 95-96 +GGAGG 155 1.5945649 9.520067 2 +GGGGT 165 1.5912362 13.386638 1 +TGGGA 140 1.5763463 10.419649 2 +GGATG 140 1.5763462 15.629472 6 +GCCTC 190 1.575248 7.672287 2 +CCTGC 190 1.5752479 11.508429 2 +GCTCC 190 1.5752479 11.508429 6 +TCTCT 150 1.5667434 5.224736 95-96 +GGGAA 130 1.561445 11.115086 4 +TCCAT 140 1.5598917 10.3108835 8 +GGCTT 155 1.5556445 13.93155 1 +TTGAT 115 1.5511277 6.240928 4 +CATCA 130 1.5451456 5.49953 2 +AGAGA 110 1.542584 6.488669 9 +AGGAC 135 1.541814 6.341309 55-59 +GTATG 125 1.5404466 9.123388 45-49 +AACAT 105 1.5324043 13.5055895 9 +AGCTC 150 1.5270194 9.4206915 5 +TTTGT 120 1.5172992 17.551357 8 +GATGA 115 1.5117996 6.082693 5 +GAGAT 115 1.5117996 6.082693 4 +AGGAT 115 1.5117996 12.165386 4 +TGAGA 115 1.5117996 6.082693 5 +CTGGT 150 1.5054625 9.2877 4 +GCTGT 150 1.5054625 18.5754 3 +TTCAC 135 1.504181 10.310883 7 +CCCAG 170 1.5035021 12.276537 2 +CAGTG 140 1.4988785 9.907587 5 +CTCCC 190 1.4978343 7.295242 1 +CCCTG 180 1.4923402 11.5084305 2 +CAGAG 130 1.4847097 7.398194 20-24 +CTTTG 135 1.4829465 10.165323 2 +CAAAA 95 1.4789973 7.203496 9 +TCTCA 130 1.4484707 5.1554413 8 +GAATG 110 1.4460692 12.165386 7 +GGAAT 110 1.4460692 12.165386 5 +TTTGG 125 1.4440656 5.345353 7 +GGCCT 165 1.4386805 12.103227 1 +GCTCT 150 1.4314783 6.1818867 20-24 +TCTGT 130 1.4280226 15.247986 3 +CTGTT 130 1.4280226 15.247986 4 +AGGTT 115 1.4172109 11.404235 8 +TTGAG 115 1.4172107 5.702117 4 +TTTGA 105 1.416247 7.4891143 10-14 +ATCTG 120 1.4061534 5.4218936 2 +GGTCT 140 1.4050984 9.287701 6 +TTTTA 95 1.4024467 7.384491 95-96 +GGGTG 145 1.3983592 13.386638 2 +GGCAC 150 1.3951839 8.607355 4 +AAAGA 85 1.3917071 7.5757985 8 +AAGAA 85 1.3917071 5.254889 75-79 +TTGTT 110 1.3908576 5.850453 4 +GGAGA 115 1.3812783 5.557543 3 +ATGAC 110 1.3750039 6.252721 95-96 +TGTTC 125 1.3730987 10.165325 5 +GGGCA 140 1.3694727 9.052216 4 +ATGAT 95 1.3668885 6.6574664 6 +CCACT 140 1.3551775 5.3746343 30-34 +TGGCT 135 1.3549163 13.931552 3 +GATGG 120 1.3511539 10.419648 9 +TCGTA 115 1.3475639 5.421894 40-44 +TGTCA 115 1.3475639 5.421894 5 +GCTGA 125 1.3382844 9.907587 6 +CAGAA 100 1.3334324 5.6025352 90-94 +CCAAA 105 1.3312978 5.8665853 8 +GGGCT 145 1.3296387 12.728768 1 +TAGGA 100 1.3146083 12.165386 4 +GACAG 115 1.313397 5.2844243 1 +GGTCC 150 1.3078917 8.068819 6 +CCATC 135 1.3067783 8.957724 9 +AAATG 85 1.3046323 7.101804 6 +TTCAA 95 1.2997144 6.330293 9 +CGTAT 110 1.2889742 8.675031 45-49 +TGACT 110 1.2889742 5.421894 3 +TATGC 110 1.2889739 8.67503 45-49 +GCCCT 155 1.2850707 7.672287 3 +TGGGC 140 1.283789 8.485846 7 +ACTTT 100 1.2825212 5.9342256 1 +ATGTT 95 1.2813665 6.2409286 1 +ATTTG 95 1.2813663 12.481856 9 +TGGTT 110 1.2707777 5.345353 5 +TGGTG 120 1.2666163 9.767722 7 +GTTTT 100 1.2644161 5.8504534 6 +GCCTG 145 1.2642952 12.103229 1 +TTGCT 115 1.2632507 6.0991945 50-54 +CCACC 150 1.2614243 7.7821474 5 +GGACA 110 1.2562928 15.853274 6 +GAAGC 110 1.2562928 10.568849 9 +TGACA 100 1.2500036 5.7837667 9 +GACAT 100 1.2500035 11.567533 7 +TGGAA 95 1.248878 6.082693 5 +ACAGC 115 1.2488517 10.049455 5 +AATCC 105 1.2480024 5.499531 7 +TGCCT 130 1.2406145 8.831266 3 +AGGTG 110 1.2385577 5.209824 4 +GTGGC 135 1.2379395 12.728768 1 +CATGT 105 1.2303842 5.4218936 1 +TAGAT 85 1.2230055 6.0453725 90-94 +CCCTC 155 1.2219174 7.295242 4 +GCCGT 140 1.2206988 8.068819 3 +AGTTT 90 1.2139261 6.2409286 7 +TTTAG 90 1.213926 6.240928 8 +TTGGG 115 1.2138406 9.767722 2 +ACCTC 125 1.20998 8.957724 1 +AGCAA 90 1.2000892 6.169792 9 +CAAAG 90 1.2000891 6.169791 5 +AAAGC 90 1.2000891 6.169791 6 +ACAGG 105 1.1991886 10.568849 8 +AGGCA 105 1.1991886 5.712891 95-96 +ATCAG 95 1.1875033 5.7837663 6 +ATGAG 90 1.1831475 6.082693 25-29 +CAGTT 100 1.1717947 5.1698627 85-89 +ATGCT 100 1.1717947 5.421894 8 +TCAAT 85 1.1629024 6.3302937 10-14 +TGTGT 100 1.1552525 10.690706 3 +GCCCA 130 1.1497369 12.276536 1 +TGATT 85 1.1464858 12.481857 5 +TGCTC 120 1.1451827 8.831267 4 +TGTCC 120 1.1451827 13.2469015 2 +TCCCC 145 1.143084 7.295242 2 +AAGGC 100 1.1420842 5.493164 65-69 +CAACA 90 1.1411123 5.8665853 8 +CACAA 90 1.1411123 11.7331705 9 +ACATC 95 1.129145 5.4995303 8 +AAGCT 90 1.1250031 6.2527194 95-96 +GAAAG 80 1.1218792 12.977338 7 +AAGGA 80 1.1218792 6.488669 3 +GCACT 110 1.1198142 9.4206915 5 +CCTGA 110 1.119814 9.420691 9 +ACCTT 100 1.1142083 5.1554418 7 +GTCAT 95 1.113205 5.421894 1 +TGATC 95 1.113205 10.843788 5 +TCATG 95 1.113205 5.421894 3 +TGGAT 90 1.1091216 5.702118 9 +GTGGG 115 1.1090435 8.924425 1 +CTGTG 110 1.1040058 9.2877 4 +GCTTT 100 1.0984789 5.4947696 95-96 +TGTCT 100 1.0984789 10.165323 5 +TTGGT 95 1.0974898 5.345353 4 +CTGTC 115 1.0974668 17.662535 4 +CAGAC 100 1.0859579 5.0247273 5 +GGAAC 95 1.0849801 5.2844243 6 +CCTCG 130 1.0778012 7.672287 6 +GCGGC 135 1.075477 7.372196 1 +ATAAA 60 1.0752101 8.291662 7 +GGGAT 95 1.0696635 10.419649 3 +CATCC 110 1.0647823 8.957723 3 +ACAGT 85 1.062503 5.7837663 4 +ACTGA 85 1.062503 11.567533 7 +GTTGG 100 1.0555136 9.767722 1 +TGTGG 100 1.0555136 9.767722 5 +GGAAA 75 1.0517617 19.466007 6 +GTGAA 80 1.0516868 6.082693 1 +GAAGT 80 1.0516866 6.082693 5 +GTCTC 110 1.0497508 8.831267 1 +CGGCT 120 1.046313 8.068818 1 +TTTAT 70 1.0333818 5.4645233 10-14 +GACAC 95 1.0316601 10.049455 7 +GGCAA 90 1.0278759 10.56885 3 +TCATA 75 1.0260904 6.330293 5 +ATTCA 75 1.0260903 6.3302927 7 +TAACA 70 1.0216029 6.7527957 8 +GGTCA 95 1.0170963 9.907589 3 +ATGGC 95 1.0170962 9.907587 1 +TCAGG 95 1.0170962 9.907587 8 +GGTGA 90 1.0133655 15.629474 3 +TGTTT 80 1.0115329 5.8504534 5 +TGAAT 70 1.007181 6.6574664 5 +ATTGA 70 1.0071809 6.6574664 7 +AAGTT 70 1.0071809 6.6574664 6 +TTGCC 105 1.0020349 8.831267 2 +CTTGC 105 1.0020349 8.831267 6 +GCAAA 75 1.0000744 6.169792 4 +CATAG 80 1.0000029 6.2527204 95-96 +GACTT 85 0.99602544 5.421894 1 +CTGAT 85 0.99602544 5.421894 4 +CTTGT 90 0.988631 10.165323 3 +AATGG 75 0.98595625 6.082693 8 +AAGGT 75 0.9859562 6.0826926 4 +GATGT 80 0.98588586 5.7021174 7 +GGATT 80 0.98588586 11.404235 5 +GGCGG 115 0.96349704 7.753219 1 +AGAGG 80 0.9608892 5.557543 8 +GAGGT 85 0.95706743 5.2098246 3 +ATGGG 85 0.9570673 5.209824 1 +CCGTC 115 0.95343953 7.672287 4 +TAGCA 75 0.9375027 5.7837667 1 +ACATG 75 0.9375026 5.7837663 2 +TTGCA 80 0.93743575 5.421894 4 +GTTCA 80 0.93743575 5.421894 6 +ATGTC 80 0.93743575 5.421894 5 +TTCAG 80 0.93743575 5.421894 8 +TTGAC 80 0.9374356 5.4218936 2 +GTTCT 85 0.93370706 5.0826616 1 +TTGTC 85 0.93370706 5.0826616 9 +TTTGC 85 0.93370706 5.0826616 3 +ATGGT 75 0.924268 5.7021174 4 +ATGAA 60 0.920917 7.1018047 9 +AGATG 70 0.92022586 6.082693 5 +GCTCA 90 0.91621155 5.092265 95-96 +AGTGC 85 0.9100334 9.907587 2 +AGGGT 80 0.90076935 10.419649 1 +GTAGG 80 0.90076923 10.419648 6 +AGTGG 80 0.90076923 5.209824 2 +TAAAA 50 0.89600843 8.291662 8 +CACAT 75 0.89143026 5.499531 6 +CCATT 80 0.89136666 10.3108835 9 +ATACT 65 0.8892783 6.330293 9 +ACATT 65 0.88927823 6.3302927 7 +GCGGG 105 0.87971467 7.753219 2 +ACACC 85 0.8777014 9.555587 9 +CATAA 60 0.8756596 6.7527947 6 +ACCCT 90 0.8711856 13.436585 1 +GAACA 65 0.8667311 6.169792 7 +ACTGC 85 0.8653109 5.092265 95-96 +GGTAT 70 0.86265016 17.106354 6 +AGTTG 70 0.86265016 5.702118 7 +GAGAC 75 0.85656327 5.2844243 1 +GTGTC 85 0.8530954 13.93155 1 +GTTGC 85 0.8530954 9.2877 1 +ATAGA 55 0.84417385 7.1018047 8 +GAAAT 55 0.84417385 7.1018047 5 +CATTC 75 0.83565605 5.155441 6 +TCACA 70 0.83200157 5.499531 3 +TGCGG 90 0.8252928 8.485845 3 +GCATT 70 0.8202563 5.421894 4 +GAACC 75 0.8144686 5.0247283 6 +CTCGA 80 0.81441027 9.420691 6 +GAATC 65 0.8125023 5.7837667 6 +TACAG 65 0.81250226 11.567533 7 +TGGTA 65 0.80103225 11.404236 5 +AAGAC 60 0.80005944 6.169791 8 +CAAGG 70 0.7994591 5.2844243 2 +ATGTA 55 0.7913565 6.6574664 4 +AATGT 55 0.7913565 6.6574664 3 +CGGCA 85 0.7906042 8.607354 2 +GAGAG 65 0.7807225 5.557543 8 +ACCAT 65 0.7725729 5.499531 8 +TTCTA 60 0.7695128 5.934226 9 +TAGAA 50 0.7674308 7.1018047 9 +GCATC 75 0.7635097 9.4206915 1 +GTTCC 80 0.76345515 8.831267 6 +AGCTT 65 0.76166654 5.421894 1 +TTAGC 65 0.76166654 5.421894 9 +CTGTA 65 0.76166654 5.421894 2 +ACTTG 65 0.7616664 5.4218936 2 +GTGCT 75 0.7527313 9.287701 3 +ATCAT 55 0.7524662 6.3302927 3 +GTTTG 65 0.7509141 5.345353 9 +GTGTT 65 0.7509141 10.690706 1 +GTCAA 60 0.75000215 11.5675335 6 +AATGC 60 0.75000215 6.252721 95-96 +CAAGT 60 0.7500021 5.7837663 9 +GCAAT 60 0.7500021 5.7837663 4 +GCAAG 65 0.74235487 5.2844243 1 +AGTGT 60 0.7394144 5.7021174 1 +TTAGG 60 0.7394144 5.702118 7 +AGCGG 75 0.73364604 9.052214 1 +ATCCT 65 0.72423524 5.155441 4 +ACTCT 65 0.72423524 5.155441 9 +AGTGA 55 0.7230346 6.082693 6 +AATAA 40 0.71680677 8.291662 6 +AACCT 60 0.71314424 5.4995303 1 +ATTCT 55 0.70538664 5.9342256 7 +AGTCT 60 0.7030768 5.421894 3 +GTGCA 65 0.69590795 9.907589 6 +AAAGT 45 0.69068766 7.101804 8 +AACTG 55 0.6875019 5.7837663 1 +CGAAG 60 0.68525064 5.2844243 4 +GATTG 55 0.67779654 5.702118 6 +GTGAT 55 0.67779654 11.404236 4 +TGTTA 50 0.67440337 12.481857 5 +TTGTA 50 0.6744033 6.240928 9 +TATTG 50 0.6744033 6.240928 7 +CTCTA 60 0.6685249 5.1554413 7 +TACCT 60 0.66852486 10.310882 8 +ATGGA 50 0.65730417 6.082693 8 +ATACA 45 0.6567447 6.7527957 6 +ATCAA 45 0.65674466 6.7527947 9 +TGTAA 45 0.6474735 6.6574664 7 +GCGGT 70 0.6418945 8.485846 4 +GGCCG 80 0.63731974 7.372196 2 +GGTTT 55 0.63538885 10.690706 9 +TTGTG 55 0.63538885 5.345353 1 +TATAT 40 0.62991583 7.2865515 8 +CCTGT 65 0.62030727 8.831266 3 +GTGAG 55 0.6192789 5.2098246 1 +TAGGG 55 0.61927885 5.209824 8 +GAGTT 50 0.6161787 5.7021174 6 +ATGTG 50 0.6161787 5.702118 2 +GAATA 40 0.61394465 7.1018047 6 +CTGCG 70 0.6103493 8.068818 2 +CGGTG 65 0.59604484 8.485845 2 +TAAGG 45 0.5915738 6.082693 9 +AAGTG 45 0.5915737 6.0826926 1 +TATTT 40 0.5905039 6.8306537 8 +GGCAT 55 0.5888452 14.861383 3 +GTATC 50 0.5858973 5.421894 4 +ATAAC 40 0.5837731 13.505591 7 +TTACT 45 0.57713455 5.934226 9 +GTATA 40 0.575532 13.314933 7 +GAGTG 50 0.5629808 5.209824 1 +GTACA 45 0.5625016 5.7837667 6 +ATAGC 45 0.5625016 5.7837667 9 +TCTAC 50 0.5571041 5.1554413 8 +GCGAG 55 0.53800714 9.052216 1 +ACGGG 55 0.5380071 9.052214 1 +GATAA 35 0.5372016 7.1018047 6 +AATAG 35 0.5372016 7.101805 7 +CAACT 45 0.53485817 5.4995303 6 +CATAC 45 0.53485817 5.4995303 5 +GATTC 45 0.52730757 5.421894 6 +AGGTA 40 0.5258433 12.165386 5 +CGGTC 60 0.52315664 8.068819 5 +ACGAG 45 0.51393795 5.2844243 7 +TATTC 40 0.5130085 5.9342256 7 +CTAAA 35 0.51080143 6.7527957 9 +TACAA 35 0.51080143 5.402236 35-39 +CCTTA 45 0.5013937 5.1554413 6 +CAGTA 40 0.50000143 5.7837667 4 +GTGTA 40 0.49294293 5.702118 4 +TAACT 35 0.47884214 6.330293 8 +CTTAA 35 0.47884214 6.330293 7 +CTATA 35 0.47884214 6.330293 4 +TTAAC 35 0.47884214 6.330293 8 +TATCA 35 0.4788421 6.3302927 5 +TCAAC 40 0.47542948 5.499531 7 +ACTCA 40 0.47542942 5.49953 8 +TTAGT 35 0.47208238 10.120425 95-96 +TGTAT 35 0.47208238 6.2409286 3 +ATTGT 35 0.47208235 6.240928 8 +GTTAC 40 0.46871787 5.421894 6 +TGTAC 40 0.46871787 10.843788 7 +AGAGT 35 0.46011293 6.082693 5 +AGTAG 35 0.46011293 6.082693 5 +CTCCG 55 0.45599285 7.672287 6 +GGTAG 40 0.45038468 5.2098246 2 +TTTAC 35 0.44888243 5.9342256 8 +CTACT 40 0.44568333 5.1554418 4 +AACTA 30 0.4378298 6.7527947 9 +TATAG 30 0.43164897 6.6574664 5 +ATATA 25 0.4199739 7.7728767 9 +CTCAA 35 0.41600078 5.499531 9 +TATAC 30 0.4104361 6.3302927 5 +ACTAT 30 0.4104361 6.3302927 6 +TACTA 30 0.4104361 6.3302927 5 +TCGAT 35 0.41012815 10.843788 7 +ACGTT 35 0.41012815 5.421894 4 +CGAAA 30 0.40002972 6.169792 9 +GTAAG 30 0.3943825 6.082693 8 +ATAGG 30 0.3943825 6.082693 3 +TCCTA 35 0.38997287 5.1554413 5 +TTACC 35 0.38997287 5.1554413 7 +ACCGA 35 0.3800853 5.0247273 7 +GCATA 30 0.37500107 5.7837667 1 +TCGAA 30 0.37500107 5.7837667 4 +GCTAA 30 0.37500107 5.7837667 8 +TAGGT 30 0.3697072 5.7021174 7 +GTTAG 30 0.3697072 5.702118 6 +CAATA 25 0.36485815 6.7527947 5 +ATACC 30 0.35657212 5.499531 6 +GACGA 30 0.3426253 5.284424 6 +AAGCG 30 0.3426253 10.568848 7 +GTTTA 25 0.33720168 6.2409286 7 +GTATT 25 0.33720168 12.481857 6 +AGATA 20 0.30697232 7.1018047 5 +CGTCA 30 0.30540386 9.420691 5 +CCTAA 25 0.29714343 5.499531 7 +TACCA 25 0.2971434 5.49953 9 +TGCTA 25 0.29294866 5.421894 7 +TACGT 25 0.29294863 5.4218936 9 +AGACG 25 0.2855211 5.284425 9 +CCTAT 25 0.2785521 5.1554418 3 +TAAGC 20 0.25000072 5.7837667 9 +CTAAG 20 0.25000072 5.7837667 8 +CGATT 20 0.23435894 5.421894 9 +GGGTA 20 0.22519234 5.2098246 2 +ACGCA 20 0.21719159 5.0247273 5 +GCGAA 15 0.17131266 5.284425 3 +CGAAC 15 0.16289368 5.0247273 5 +>>END_MODULE diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala new file mode 100644 index 0000000000000000000000000000000000000000..0951bea84834b611c323c8e0b1b77ae55f0461b1 --- /dev/null +++ b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala @@ -0,0 +1,80 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +class FastqcV0101Test extends TestNGSuite with Matchers { + + /** Returns the absolute path to test resource directory as a File object */ + private val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString) + + /** Given a resource file name, returns the the absolute path to it as a File object */ + private def resourceFile(p: String): File = new File(resourceDir, p) + + /** Mock output file of a FastQC v0.10.1 run */ + // the file doesn't actually exist, we just need it so the outputDir value can be computed correctly + private val outputv0101: File = resourceFile("v0101.fq_fastqc.zip") + + @Test def testOutputDir() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.outputDir shouldBe new File(resourceDir, "v0101.fq_fastqc") + } + + @Test def testQcModules() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + // 11 QC modules + fqc.qcModules.size shouldBe 11 + // first module + fqc.qcModules.keySet should contain("Basic Statistics") + // mid (6th module) + fqc.qcModules.keySet should contain("Per sequence GC content") + // last module + fqc.qcModules.keySet should contain("Kmer Content") + } + + @Test def testSingleQcModule() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.qcModules("Basic Statistics").name should ===("Basic Statistics") + fqc.qcModules("Basic Statistics").status should ===("pass") + fqc.qcModules("Basic Statistics").lines.size shouldBe 8 + } + + @Test def testEncoding() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.encoding shouldBe "Sanger / Illumina 1.9" + } + + @Test def testFoundAdapter() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.contaminants = Option(resourceFile("fqc_contaminants_v0101.txt")) + val adapters = fqc.foundAdapters + adapters.size shouldBe 1 + adapters.head.name should ===("TruSeq Adapter, Index 1") + // from fqc_contaminants_v0101.txt + adapters.head.seq should ===("GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG") + } +} \ No newline at end of file diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala index 5360b4ac78824c2e26649a12e1cc3c35262a8b50..94b3bcbe619ee5b01bd29bea6aa9b9e707934a6d 100644 --- a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala +++ b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala @@ -36,7 +36,7 @@ class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript { def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { + class Library(libId: String) extends AbstractLibrary(libId) { def addJobs(): Unit = { } diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 74779fc20935b6163dceb4f1c3ab57cda0c34c80..9094d38cddbe70d8c7b6787200d2afe94e19d486 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -66,8 +66,15 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { /** Readgroup ID */ protected var readgroupId: String = _ + // TODO: hide sampleId and libId from the command line so they do not interfere with our config values + /** Readgroup Library */ - var libraryId: String = _ + @Argument(doc = "Library ID", shortName = "library", required = true) + var libId: String = _ + + /**Readgroup sample */ + @Argument(doc = "Sample ID", shortName = "sample", required = true) + var sampleId: String = _ /** Readgroup Platform */ protected var platform: String = config("platform", default = "illumina") @@ -75,9 +82,6 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { /** Readgroup platform unit */ protected var platformUnit: String = config("platform_unit", default = "na") - /**Readgroup sample */ - var sampleId: String = _ - /** Readgroup sequencing center */ protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center") @@ -95,14 +99,14 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { def finalBamFile: File = outputDir + outputName + ".final.bam" def init() { - if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module") - else if (!outputDir.endsWith("/")) outputDir += "/" - if (input_R1 == null) throw new IllegalStateException("Missing FastQ R1 on mapping module") + require(outputDir != null, "Missing output directory on mapping module") + require(input_R1 != null, "Missing output directory on mapping module") + require(sampleId != null, "Missing sample ID on mapping module") + require(libId != null, "Missing library ID on mapping module") + paired = input_R2.isDefined - if (libraryId == null) libraryId = config("library_id") - if (sampleId == null) sampleId = config("sample_id") - if (readgroupId == null && sampleId != null && libraryId != null) readgroupId = sampleId + "-" + libraryId + if (readgroupId == null && sampleId != null && libId != null) readgroupId = sampleId + "-" + libId else if (readgroupId == null) readgroupId = config("readgroup_id") if (outputName == null) outputName = readgroupId @@ -127,7 +131,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { flexiprep.input_R1 = input_R1 flexiprep.input_R2 = input_R2 flexiprep.sampleId = this.sampleId - flexiprep.libraryId = this.libraryId + flexiprep.libId = this.libId flexiprep.init flexiprep.runInitialJobs } @@ -281,7 +285,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { var RG: String = "ID:" + readgroupId + "," RG += "SM:" + sampleId + "," - RG += "LB:" + libraryId + "," + RG += "LB:" + libId + "," if (readgroupDescription != null) RG += "DS" + readgroupDescription + "," RG += "PU:" + platformUnit + "," if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + "," @@ -332,7 +336,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { addOrReplaceReadGroups.createIndex = true addOrReplaceReadGroups.RGID = readgroupId - addOrReplaceReadGroups.RGLB = libraryId + addOrReplaceReadGroups.RGLB = libId addOrReplaceReadGroups.RGPL = platform addOrReplaceReadGroups.RGPU = platformUnit addOrReplaceReadGroups.RGSM = sampleId @@ -346,7 +350,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { def getReadGroup(): String = { var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t" - RG += "LB:" + libraryId + "\\t" + RG += "LB:" + libId + "\\t" RG += "PL:" + platform + "\\t" RG += "PU:" + platformUnit + "\\t" RG += "SM:" + sampleId + "\\t" diff --git a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index e51be1d7164bd1234f831418e7c1eedaca2f7235..77dba4b1469eba2d75bedafc9f53be7c23acb557 100644 --- a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -59,16 +59,16 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends AbstractSample(sampleId) { def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { - val inputFastq: File = config("R1", required = true) + class Library(libId: String) extends AbstractLibrary(libId) { + val inputFastq: File = config("R1") val prefixFastq: File = createFile(".prefix.fastq") val flexiprep = new Flexiprep(qscript) flexiprep.sampleId = sampleId - flexiprep.libraryId = libraryId + flexiprep.libId = libId val mapping = new Mapping(qscript) - mapping.libraryId = libraryId + mapping.libId = libId mapping.sampleId = sampleId protected def addJobs(): Unit = { @@ -93,8 +93,8 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { qscript.addAll(mapping.functions) if (config("library_counts", default = false).asBoolean) { - addBedtoolsCounts(mapping.finalBamFile, sampleId + "-" + libraryId, libDir) - addTablibCounts(pf.outputFastq, sampleId + "-" + libraryId, libDir) + addBedtoolsCounts(mapping.finalBamFile, sampleId + "-" + libId, libDir) + addTablibCounts(pf.outputFastq, sampleId + "-" + libId, libDir) } } } diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index 8e3ef0af511f28b19dda8403258b024ab8936b42..6e7257c7d94967eac350fc212e7c7b37ebf5dc58 100644 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -35,7 +35,7 @@ import org.broadinstitute.gatk.queue.engine.JobRunInfo class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with MultiSampleQScript { def this() = this(null) - var reference: File = config("reference", required = true) + var reference: File = config("reference") var finalBamFiles: List[File] = Nil /* class LibraryOutput extends AbstractLibraryOutput { @@ -124,11 +124,11 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with // Called for each run from a sample - def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = { + def runSingleLibraryJobs(libId: String, sampleID: String): LibraryOutput = { val libraryOutput = new LibraryOutput val alignmentDir: String = outputDir + sampleID + "/alignment/" - val runDir: String = alignmentDir + "run_" + libraryId + "/" + val runDir: String = alignmentDir + "run_" + libId + "/" if (config.contains("R1")) { val mapping = new Mapping(this) @@ -140,7 +140,7 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with mapping.input_R1 = config("R1") mapping.input_R2 = config("R2") mapping.paired = (mapping.input_R2 != null) - mapping.RGLB = libraryId + mapping.RGLB = libId mapping.RGSM = sampleID mapping.RGPL = config("PL") mapping.RGPU = config("PU") @@ -154,7 +154,7 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with // start sambamba dedup libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile") - } else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId) + } else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libId) return libraryOutput // logger.debug(outputFiles) // return outputFiles