Commit 4e936deb authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

format fix

parent c906dc08
......@@ -2,13 +2,13 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 20/06/16.
*/
* Created by pjvanthof on 20/06/16.
*/
class GffRead(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "gffread", freeVar = false)
......@@ -22,7 +22,7 @@ class GffRead(val root: Configurable) extends BiopetCommandLineFunction {
var T: Boolean = config("T", default = false, freeVar = false)
def cmdLine = executable +
(if(inputAsStdin) "" else required(input)) +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else required("-o", output)) +
conditional(T, "-T")
}
......@@ -2,15 +2,15 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.util.matching.Regex
/**
* Created by pjvanthof on 18/05/16.
*/
* Created by pjvanthof on 18/05/16.
*/
class Sed(val root: Configurable) extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sed", freeVar = false)
......
......@@ -18,7 +18,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* @deprecated Use picard.util.BedToIntervalList instead
......
package nl.lumc.sasc.biopet.tools
import java.io.{File, PrintWriter}
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.io.Source
/**
* Created by pjvan_thof on 21-9-16.
*/
* Created by pjvan_thof on 21-9-16.
*/
object DownloadNcbiAssembly extends ToolCommand {
case class Args(assemblyId: String = null,
......@@ -40,11 +40,11 @@ object DownloadNcbiAssembly extends ToolCommand {
}
/**
* @param args the command line arguments
*/
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdargs: Args = argsParser.parse (args, Args () ) getOrElse (throw new IllegalArgumentException)
val cmdargs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info(s"Reading ${cmdargs.assemblyId} from NCBI")
val reader = Source.fromURL(s"ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/${cmdargs.assemblyId}.assembly.txt")
......@@ -80,14 +80,14 @@ object DownloadNcbiAssembly extends ToolCommand {
filterLength.foreach(l => logger.info(s"Filtered length: ${l}"))
filterContigs.foreach { values =>
val id = if (values(6) == "na") values(4) else values(6)
logger.info(s"Start download ${id}")
val fastaReader = Source.fromURL(s"${baseUrlEutils}/efetch.fcgi?db=nuccore&id=${id}&retmode=text&rettype=fasta")
fastaReader.getLines()
.map(x => nameId.map(y => x.replace(">", s">${values(y)} ")).getOrElse(x))
.foreach(fastaWriter.println)
fastaReader.close()
}
val id = if (values(6) == "na") values(4) else values(6)
logger.info(s"Start download ${id}")
val fastaReader = Source.fromURL(s"${baseUrlEutils}/efetch.fcgi?db=nuccore&id=${id}&retmode=text&rettype=fasta")
fastaReader.getLines()
.map(x => nameId.map(y => x.replace(">", s">${values(y)} ")).getOrElse(x))
.foreach(fastaWriter.println)
fastaReader.close()
}
logger.info("Downloading complete")
......
......@@ -17,8 +17,8 @@ package nl.lumc.sasc.biopet.pipelines.generateindexes
import java.util
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand}
import nl.lumc.sasc.biopet.extensions.{Cat, Curl, Zcat}
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.{ Cat, Curl, Zcat }
import nl.lumc.sasc.biopet.extensions.tools.DownloadNcbiAssembly
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -74,10 +74,10 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
case _ => {
val fastaUris = genomeConfig.getOrElse("fasta_uri",
throw new IllegalArgumentException(s"No fasta_uri found for $speciesName - $genomeName")) match {
case a: Traversable[_] => a.map(_.toString).toArray
case a: util.ArrayList[_] => a.map(_.toString).toArray
case a => Array(a.toString)
}
case a: Traversable[_] => a.map(_.toString).toArray
case a: util.ArrayList[_] => a.map(_.toString).toArray
case a => Array(a.toString)
}
val fastaFiles = for (fastaUri <- fastaUris) yield {
val curl = new Curl(this)
......@@ -119,7 +119,6 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
}
}
val generateIndexes = new GenerateIndexes(this)
generateIndexes.fastaFile = fastaFile
generateIndexes.speciesName = speciesName
......@@ -128,115 +127,115 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
//TODO: add gtf file
add(generateIndexes)
// val annotationDir = new File(genomeDir, "annotation")
//
// genomeConfig.get("vep_cache_uri").foreach { vepCacheUri =>
// val vepDir = new File(annotationDir, "vep")
// val curl = new Curl(this)
// curl.url = vepCacheUri.toString
// curl.output = new File(vepDir, new File(curl.url).getName)
// curl.isIntermediate = true
// add(curl)
//
// val tar = new TarExtract(this)
// tar.inputTar = curl.output
// tar.outputDir = vepDir
// add(tar)
//
// val regex = """.*\/(.*)_vep_(\d*)_(.*)\.tar\.gz""".r
// vepCacheUri.toString match {
// case regex(species, version, assembly) if version.forall(_.isDigit) =>
// outputConfig ++= Map("varianteffectpredictor" -> Map(
// "species" -> species,
// "assembly" -> assembly,
// "cache_version" -> version.toInt,
// "cache" -> vepDir,
// "fasta" -> createLinks(vepDir)))
// case _ => throw new IllegalArgumentException("Cache found but no version was found")
// }
// }
//
// genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
// val contigMap = genomeConfig.get("dbsnp_contig_map").map(_.asInstanceOf[Map[String, Any]])
// val contigSed = contigMap.map { map =>
// val sed = new Sed(this)
// sed.expressions = map.map(x => s"""s/^${x._1}\t/${x._2}\t/""").toList
// sed
// }
//
// val cv = new CombineVariants(this)
// cv.reference_sequence = fastaFile
// def addDownload(uri: String): Unit = {
// val isZipped = uri.endsWith(".gz")
// val output = new File(annotationDir, new File(uri).getName + (if (isZipped) "" else ".gz"))
// val curl = new Curl(this)
// curl.url = uri
//
// val downloadCmd = (isZipped, contigSed) match {
// case (true, Some(sed)) => curl | Zcat(this) | sed | new Bgzip(this) > output
// case (false, Some(sed)) => curl | sed | new Bgzip(this) > output
// case (true, None) => curl > output
// case (false, None) => curl | new Bgzip(this) > output
// }
// downloadCmd.isIntermediate = true
// add(downloadCmd)
//
// val tabix = new Tabix(this)
// tabix.input = output
// tabix.p = Some("vcf")
// tabix.isIntermediate = true
// add(tabix)
//
// cv.variant :+= output
// }
//
// dbsnpUri match {
// case l: Traversable[_] => l.foreach(x => addDownload(x.toString))
// case l: util.ArrayList[_] => l.foreach(x => addDownload(x.toString))
// case _ => addDownload(dbsnpUri.toString)
// }
//
// cv.out = new File(annotationDir, "dbsnp.vcf.gz")
// add(cv)
// outputConfig += "dbsnp" -> cv.out
// }
//
// val gffFile: Option[File] = genomeConfig.get("gff_uri").map { gtfUri =>
// val outputFile = new File(annotationDir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
// val curl = new Curl(this)
// curl.url = gtfUri.toString
// if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
// else add(curl > outputFile)
// outputConfig += "annotation_gff" -> outputFile
// outputFile
// }
//
// val gtfFile: Option[File] = if (gffFile.isDefined) gffFile.map { gff =>
// val gffRead = new GffRead(this)
// gffRead.input = gff
// gffRead.output = swapExt(annotationDir, gff, ".gff", ".gtf")
// add(gffRead)
// gffRead.output
// } else genomeConfig.get("gtf_uri").map { gtfUri =>
// val outputFile = new File(annotationDir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
// val curl = new Curl(this)
// curl.url = gtfUri.toString
// if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
// else add(curl > outputFile)
// outputConfig += "annotation_gtf" -> outputFile
// outputFile
// }
//
// val refFlatFile: Option[File] = gtfFile.map { gtf =>
// val refFlat = new File(gtf + ".refFlat")
// val gtfToGenePred = new GtfToGenePred(this)
// gtfToGenePred.inputGtfs :+= gtf
//
// add(gtfToGenePred | Awk(this, """{ print $12"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10 }""") > refFlat)
//
// outputConfig += "annotation_refflat" -> refFlat
// refFlat
// }
// val annotationDir = new File(genomeDir, "annotation")
//
// genomeConfig.get("vep_cache_uri").foreach { vepCacheUri =>
// val vepDir = new File(annotationDir, "vep")
// val curl = new Curl(this)
// curl.url = vepCacheUri.toString
// curl.output = new File(vepDir, new File(curl.url).getName)
// curl.isIntermediate = true
// add(curl)
//
// val tar = new TarExtract(this)
// tar.inputTar = curl.output
// tar.outputDir = vepDir
// add(tar)
//
// val regex = """.*\/(.*)_vep_(\d*)_(.*)\.tar\.gz""".r
// vepCacheUri.toString match {
// case regex(species, version, assembly) if version.forall(_.isDigit) =>
// outputConfig ++= Map("varianteffectpredictor" -> Map(
// "species" -> species,
// "assembly" -> assembly,
// "cache_version" -> version.toInt,
// "cache" -> vepDir,
// "fasta" -> createLinks(vepDir)))
// case _ => throw new IllegalArgumentException("Cache found but no version was found")
// }
// }
//
// genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
// val contigMap = genomeConfig.get("dbsnp_contig_map").map(_.asInstanceOf[Map[String, Any]])
// val contigSed = contigMap.map { map =>
// val sed = new Sed(this)
// sed.expressions = map.map(x => s"""s/^${x._1}\t/${x._2}\t/""").toList
// sed
// }
//
// val cv = new CombineVariants(this)
// cv.reference_sequence = fastaFile
// def addDownload(uri: String): Unit = {
// val isZipped = uri.endsWith(".gz")
// val output = new File(annotationDir, new File(uri).getName + (if (isZipped) "" else ".gz"))
// val curl = new Curl(this)
// curl.url = uri
//
// val downloadCmd = (isZipped, contigSed) match {
// case (true, Some(sed)) => curl | Zcat(this) | sed | new Bgzip(this) > output
// case (false, Some(sed)) => curl | sed | new Bgzip(this) > output
// case (true, None) => curl > output
// case (false, None) => curl | new Bgzip(this) > output
// }
// downloadCmd.isIntermediate = true
// add(downloadCmd)
//
// val tabix = new Tabix(this)
// tabix.input = output
// tabix.p = Some("vcf")
// tabix.isIntermediate = true
// add(tabix)
//
// cv.variant :+= output
// }
//
// dbsnpUri match {
// case l: Traversable[_] => l.foreach(x => addDownload(x.toString))
// case l: util.ArrayList[_] => l.foreach(x => addDownload(x.toString))
// case _ => addDownload(dbsnpUri.toString)
// }
//
// cv.out = new File(annotationDir, "dbsnp.vcf.gz")
// add(cv)
// outputConfig += "dbsnp" -> cv.out
// }
//
// val gffFile: Option[File] = genomeConfig.get("gff_uri").map { gtfUri =>
// val outputFile = new File(annotationDir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
// val curl = new Curl(this)
// curl.url = gtfUri.toString
// if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
// else add(curl > outputFile)
// outputConfig += "annotation_gff" -> outputFile
// outputFile
// }
//
// val gtfFile: Option[File] = if (gffFile.isDefined) gffFile.map { gff =>
// val gffRead = new GffRead(this)
// gffRead.input = gff
// gffRead.output = swapExt(annotationDir, gff, ".gff", ".gtf")
// add(gffRead)
// gffRead.output
// } else genomeConfig.get("gtf_uri").map { gtfUri =>
// val outputFile = new File(annotationDir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
// val curl = new Curl(this)
// curl.url = gtfUri.toString
// if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
// else add(curl > outputFile)
// outputConfig += "annotation_gtf" -> outputFile
// outputFile
// }
//
// val refFlatFile: Option[File] = gtfFile.map { gtf =>
// val refFlat = new File(gtf + ".refFlat")
// val gtfToGenePred = new GtfToGenePred(this)
// gtfToGenePred.inputGtfs :+= gtf
//
// add(gtfToGenePred | Awk(this, """{ print $12"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10 }""") > refFlat)
//
// outputConfig += "annotation_refflat" -> refFlat
// refFlat
// }
}
}
}
......
package nl.lumc.sasc.biopet.pipelines.generateindexes
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.bowtie.{Bowtie2Build, BowtieBuild}
import nl.lumc.sasc.biopet.extensions.{Ln, Star}
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2Build, BowtieBuild }
import nl.lumc.sasc.biopet.extensions.{ Ln, Star }
import nl.lumc.sasc.biopet.extensions.bwa.BwaIndex
import nl.lumc.sasc.biopet.extensions.gmap.GmapBuild
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2Build
......@@ -14,8 +14,8 @@ import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable.ListBuffer
/**
* Created by pjvan_thof on 21-9-16.
*/
* Created by pjvan_thof on 21-9-16.
*/
class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment