Commit 20eefdf6 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'feature-gentrap_additions' into 'develop'

Feature gentrap additions

This is the latest updates and fixes on the Gentrap pipeline to be compatible with the most recent framework updates.

In particular:

* Issue #156 is fixed ~ this was due to class initialization problems
* Issue #157 is fixed
* TopHat alignment results are now merged with its unmapped alignment file, producing the correct statistics
* The pipeline is updated with the new reference module.
* Redundant CollectRnaSeqMetrics is removed (now using the one from BamMetrics instead).

See merge request !181
parents 3ec61eee 84b4fbf2
{
"samples" : {
"sampleA" : {
"libraries" : {
"lib_1" : {
"R1" : "/path/to/inputA_R1.fq.gz",
"R2" : "/path/to/inputA_R2.fq.gz"
}
}
},
"sampleB" : {
"libraries" : {
"lib_1" : {
"R1" : "/path/to/inputB_1_R1.fq.gz",
"R2" : "/path/to/inputB_1_R2.fq.gz"
},
"lib_2": {
"R1" : "/path/to/inputB_2_R1.fq.gz",
"R2" : "/path/to/inputB_2_R2.fq.gz"
}
}
}
},
"gentrap": {
"output_dir": "/path/to/output_dir",
"expression_measures": ["fragments_per_gene", "bases_per_gene", "bases_per_exon"],
"strand_protocol": "non_specific",
"aligner": "gsnap",
"reference": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap/reference.fa",
"annotation_gtf": "/path/to/data/annotation/ucsc_refseq.gtf",
"annotation_bed": "/path/to/data/annotation/ucsc_refseq.bed",
"annotation_refflat": "/path/to/data/annotation/ucsc_refseq.refFlat",
"gsnap": {
"dir": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap",
"db": "hg19_nohap",
"quiet_if_excessive": true,
"npaths": 1
},
"cutadapt": {
"minimum_length": 20
},
"mapping": {
"flexiprep": {
"fastqc": {
"threads": 6,
"nogroup": true
}
}
},
"rawbasecounter": {
"core_memory": "20G"
}
}
}
......@@ -110,18 +110,23 @@ Thus, an example settings configuration is as follows:
}
~~~
#### Example configurations
In most cases, it's practical to combine the samples and settings configuration into one file. Here is an [example config file](/examples/gentrap_example.json) where both samples and settings are stored into one file. Note also that there are additional tool configurations in the config file.
## Running Gentrap
As with other pipelines in the Biopet suite, Gentrap can be run by specifying the pipeline after the `pipeline` subcommand:
~~~
java -jar </path/to/biopet.jar> pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
$ java -jar </path/to/biopet.jar> pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
~~~
If you already have the `biopet` environment module loaded, you can also simply call `biopet`:
You can also use the `biopet` environment module (recommended) when you are running the pipeline in SHARK:
~~~
biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
$ module load biopet/v0.3.1
$ biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
~~~
It is also a good idea to specify retries (we recomend `-retry 3` up to `-retry 5`) so that cluster glitches do not interfere with your pipeline runs.
......
......@@ -38,7 +38,9 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
/** Bed of amplicon that is used */
var ampliconBedFile: Option[File] = config("amplicon_bed")
var rnaMetrics: Boolean = config("rna_metrcis", default = false)
/** Settings for CollectRnaSeqMetrics */
var rnaMetricsSettings: Map[String, String] = Map()
var transcriptRefFlatFile: Option[File] = config("transcript_refflat")
/** return location of summary file */
def summaryFile = (sampleId, libId) match {
......@@ -78,17 +80,22 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
add(gcBiasMetrics)
addSummarizable(gcBiasMetrics, "gc_bias")
val wgsMetrics = new CollectWgsMetrics(this)
wgsMetrics.input = inputBam
wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
add(wgsMetrics)
addSummarizable(wgsMetrics, "wgs")
if (transcriptRefFlatFile.isEmpty) {
val wgsMetrics = new CollectWgsMetrics(this)
wgsMetrics.input = inputBam
wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
add(wgsMetrics)
addSummarizable(wgsMetrics, "wgs")
}
if (rnaMetrics) {
if (transcriptRefFlatFile.isDefined) {
val rnaMetrics = new CollectRnaSeqMetrics(this)
rnaMetrics.input = inputBam
rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
rnaMetrics.refFlat = transcriptRefFlatFile.get
rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) }
rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity")
add(rnaMetrics)
addSummarizable(rnaMetrics, "rna")
}
......
......@@ -45,9 +45,10 @@ class BamMetricsTest extends TestNGSuite with Matchers {
@Test(dataProvider = "bammetricsOptions")
def testFlexiprep(rois: Int, amplicon: Boolean, rna: Boolean) = {
val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir, "rna_metrcis" -> rna
), Map(BamMetricsTest.executables.toSeq: _*)) ++
val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir),
Map(BamMetricsTest.executables.toSeq: _*)) ++
(if (amplicon) Map("amplicon_bed" -> "amplicon.bed") else Map()) ++
(if (rna) Map("transcript_refflat" -> "transcripts.refFlat") else Map()) ++
Map("regions_of_interest" -> (1 to rois).map("roi_" + _ + ".bed").toList)
val bammetrics: BamMetrics = initPipeline(map)
......@@ -59,7 +60,7 @@ class BamMetricsTest extends TestNGSuite with Matchers {
var regions: Int = rois + (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectRnaSeqMetrics]) shouldBe (if (rna) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectWgsMetrics]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[CollectWgsMetrics]) shouldBe (if (rna) 0 else 1)
bammetrics.functions.count(_.isInstanceOf[CollectMultipleMetrics]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[CalculateHsMetrics]) shouldBe (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectTargetedPcrMetrics]) shouldBe (if (amplicon) 1 else 0)
......
......@@ -15,6 +15,8 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
javaMainClass = new picard.analysis.CollectMultipleMetrics().getClass.getName
override val defaultCoreMemory = 6.0
@Input(doc = "The input SAM or BAM files to analyze", required = true)
var input: File = null
......@@ -41,7 +43,7 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
}
case _ if p == Programs.CollectInsertSizeMetrics.toString => {
outputFiles :+= new File(outputName + ".insert_size_metrics")
outputFiles :+= new File(outputName + ".insert_size_Histogram.pdf")
outputFiles :+= new File(outputName + ".insert_size_histogram.pdf")
}
case _ if p == Programs.QualityScoreDistribution.toString => {
outputFiles :+= new File(outputName + ".quality_distribution_metrics")
......@@ -85,17 +87,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
case _ => None
}
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats = stats
override def summaryFiles: Map[String, File] = Map()
}
qscript.addSummarizable(sum, p)
})
}
def summaryFiles = Map()
def summaryStats = Map()
def summaryFiles = {
program.map {
case p if p == Programs.CollectInsertSizeMetrics.toString =>
Map(
"insert_size_histogram" -> new File(outputName + ".insert_size_histogram.pdf"),
"insert_size_metrics" -> new File(outputName + ".insert_size_metrics"))
case otherwise => Map()
}.foldLeft(Map.empty[String, File]) { case (acc, m) => (acc ++ m) }
}
}
object CollectMultipleMetrics {
......
......@@ -32,7 +32,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var input: File = null
@Input(doc = "Gene annotations in refFlat form", required = true)
var refFlat: File = config("refFlat")
var refFlat: File = null
@Input(doc = "Location of rRNA sequences in interval list format", required = false)
var ribosomalIntervals: Option[File] = config("ribosomal_intervals")
......@@ -68,6 +68,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var stopAfter: Option[Long] = config("stop_after")
override def beforeGraph: Unit = {
if (refFlat == null) refFlat = config("refFlat")
val validFlags = StrandSpecificity.values.map(_.toString).toSet
strandSpecificity match {
case Some(s) => require(validFlags.contains(s),
......@@ -84,7 +85,9 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value }
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Map(
"metrics" -> Picard.getMetrics(output).getOrElse(Map()),
"histogram" -> Picard.getHistogram(output).getOrElse(Map()))
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) +
......
......@@ -82,25 +82,29 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
object Picard extends Logging {
lazy val getBiopetPicardVersion: Option[String] = {
val reader = Source.fromInputStream(getClass.getResourceAsStream("/dependency_list.txt"))
val dependencies = reader.getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
"groupId" -> line(0),
"artifactId" -> line(1),
"type" -> line(2),
"version" -> line(3),
"scope" -> line(4)
)).toList
logger.debug("dependencies: " + dependencies)
val htsjdk = dependencies.find(dep => dep("groupId") == "samtools" && dep("artifactId") == "htsjdk").collect {
case dep =>
"samtools htsjdk " + dep("version")
}
Option(getClass.getResourceAsStream("/dependency_list.txt")) match {
case Some(src) =>
val dependencies = Source.fromInputStream(src)
.getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
"groupId" -> line(0),
"artifactId" -> line(1),
"type" -> line(2),
"version" -> line(3),
"scope" -> line(4)
)).toList
logger.debug("dependencies: " + dependencies)
val htsjdk = dependencies.find(dep => dep("groupId") == "samtools" && dep("artifactId") == "htsjdk").collect {
case dep =>
"samtools htsjdk " + dep("version")
}
dependencies.find(dep => dep("groupId") == "picard" && dep("artifactId") == "picard").collect {
case dep =>
"Picard " + dep("version") + " using " + htsjdk.getOrElse("unknown htsjdk")
dependencies.find(dep => dep("groupId") == "picard" && dep("artifactId") == "picard").collect {
case dep =>
"Picard " + dep("version") + " using " + htsjdk.getOrElse("unknown htsjdk")
}
case otherwise => None
}
}
......
......@@ -36,7 +36,7 @@ class BiopetFlagstat(val root: Configurable) extends ToolCommandFuntion with Sum
@Output(doc = "summary output file", shortName = "output", required = false)
var summaryFile: File = _
override val defaultCoreMemory = 2.0
override val defaultCoreMemory = 6.0
override def commandLine = super.commandLine + required("-I", input) + required("-s", summaryFile) + " > " + required(output)
......
......@@ -32,7 +32,7 @@ class MergeTables(val root: Configurable) extends ToolCommandFuntion {
javaMainClass = getClass.getName
override val defaultCoreMemory = 2.0
override val defaultCoreMemory = 6.0
/** List of input tabular files */
@Input(doc = "Input table files", required = true)
......@@ -71,7 +71,7 @@ class MergeTables(val root: Configurable) extends ToolCommandFuntion {
required("-a", valueColumnIndex) +
optional("-n", idColumnName) +
optional("-e", fileExtension) +
optional("-h", numHeaderLines) +
optional("-m", numHeaderLines) +
optional("-f", fallbackString) +
optional("-d", delimiter) +
required("-o", output) +
......@@ -164,7 +164,7 @@ object MergeTables extends ToolCommand {
idColumnIndices: Seq[Int] = Seq.empty[Int],
valueColumnIndex: Int = -1,
fileExtension: String = "",
numHeaderLines: Int = 1,
numHeaderLines: Int = 0,
fallbackString: String = "-",
delimiter: Char = '\t',
out: File = new File("-")) extends AbstractArgs
......@@ -206,9 +206,9 @@ object MergeTables extends ToolCommand {
c.copy(fileExtension = x)
} text "Common extension of all input tables to strip (default: empty string)"
opt[Int]('h', "num_header_lines") optional () action { (x, c) =>
opt[Int]('m', "num_header_lines") optional () action { (x, c) =>
c.copy(numHeaderLines = x)
} text "The number of header lines present in all input files (default: 1; 1-line header)"
} text "The number of header lines present in all input files (default: 0; no header)"
opt[String]('f', "fallback") optional () action { (x, c) =>
c.copy(fallbackString = x)
......
......@@ -143,7 +143,7 @@ class MergeTablesTest extends TestNGSuite with MockitoSugar with Matchers {
// default arguments
parsed.fallbackString shouldBe "-"
parsed.fileExtension shouldBe ""
parsed.numHeaderLines shouldBe 1
parsed.numHeaderLines shouldBe 0
parsed.delimiter shouldBe '\t'
}
}
......@@ -53,7 +53,7 @@ class FastQCModule(object):
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__,
'[%r, ...]' % self.raw_lines[0])
'[%r, ...]' % self.raw_lines[0])
def __str__(self):
return ''.join(self.raw_lines)
......@@ -88,7 +88,7 @@ class FastQCModule(object):
self._name = name
status = tokens[-1]
assert status in ('pass', 'fail', 'warn'), "Unknown module status: %r" \
% status
% status
self._status = status
# and column names from second line
columns = self.raw_lines[1][1:].strip().split('\t')
......@@ -123,7 +123,7 @@ class FastQC(object):
'>>Sequence Duplication Levels': 'sequence_duplication_levels',
'>>Overrepresented sequences': 'overrepresented_sequences',
'>>Kmer content': 'kmer_content',
}
}
def __init__(self, fname):
"""
......@@ -299,12 +299,12 @@ class LongTable(object):
"\\hline \\hline",
"\\endhead",
"\\hline \\multicolumn{%i}{c}{\\textit{Continued on next page}}\\\\" % \
colnum,
colnum,
"\\hline",
"\\endfoot",
"\\hline",
"\\endlastfoot",
]
]
def __str__(self):
return "\n".join(self.lines)
......@@ -314,7 +314,7 @@ class LongTable(object):
def end(self):
self.lines.extend(["\\end{longtable}", "\\end{center}",
"\\addtocounter{table}{-1}"])
"\\addtocounter{table}{-1}"])
# filter functions for the jinja environment
......@@ -348,7 +348,7 @@ def float2nice_pct(num, default="None"):
# and some handy functions
def natural_sort(inlist):
key = lambda x: [int(a) if a.isdigit() else a.lower() for a in
re.split("([0-9]+)", x)]
re.split("([0-9]+)", x)]
inlist.sort(key=key)
return inlist
......@@ -383,7 +383,7 @@ def write_template(run, template_file, logo_file):
run.logo = logo_file
render_vars = {
"run": run,
}
}
rendered = jinja_template.render(**render_vars)
print(rendered, file=sys.stdout)
......@@ -417,36 +417,43 @@ class GentrapLib(object):
self.fastqc_r2_qc_files = self.flexiprep["files"]["fastqc_R2_qc"]
self.fastqc_r2_qc = FastQC(self.fastqc_r2_qc_files["fastqc_data"]["path"])
# mapping metrics settings
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("alignment_metrics", {})
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {})
for k, v in self.aln_metrics.items():
self.aln_metrics[k] = {a.lower(): b for a, b in v.items()}
# insert size metrics files
self.inserts_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("insert_size_metrics", {})
self.inserts_metrics_files = \
summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {})
# rna metrics files and stats
self.rna_metrics_files = summary.get("gentrap", {}).get("files", {}).get("rna_metrics", {})
_rmetrics = summary.get("gentrap", {}).get("stats", {}).get("rna_metrics", {})
self.rna_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("rna", {})
_rmetrics = summary.get("bammetrics", {}).get("stats", {}).get("rna", {})
if _rmetrics:
self.rna_metrics = {k: v for k, v in _rmetrics.items() }
if "metrics" in _rmetrics:
_rmetrics = _rmetrics["metrics"]
if _rmetrics:
_rmetrics = {k.lower(): v for k, v in _rmetrics.items() }
self.rna_metrics = _rmetrics
pf_bases = float(_rmetrics["pf_bases"])
exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0))
# picard uses pct_ but it's actually ratio ~ we follow their convention
pct_exonic_bases_all = exonic_bases / float(_rmetrics["pf_bases"])
pct_exonic_bases = exonic_bases / float(_rmetrics.get("pf_aligned_bases", 0))
self.rna_metrics.update({
"exonic_bases": exonic_bases,
"pct_exonic_bases_all": pct_exonic_bases_all,
"pct_exonic_bases": pct_exonic_bases,
"pct_aligned_bases": 1.0,
"pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases,
"pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases,
"pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases,
"pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases,
"pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases,
})
"exonic_bases": exonic_bases,
"pct_exonic_bases_all": pct_exonic_bases_all,
"pct_exonic_bases": pct_exonic_bases,
"pct_aligned_bases": 1.0,
"pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases,
"pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases,
"pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases,
"pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases,
"pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases,
})
if _rmetrics.get("ribosomal_bases", "") != "":
self.rna_metrics["pct_ribosomal_bases_all"] = float(_rmetrics.get("pf_ribosomal_bases", 0.0)) / pf_bases
def __repr__(self):
return "{0}(sample=\"{1}\", lib=\"{2}\")".format(
self.__class__.__name__, self.sample.name, self.name)
self.__class__.__name__, self.sample.name, self.name)
class GentrapSample(object):
......@@ -458,32 +465,36 @@ class GentrapSample(object):
self._raw = summary
self.is_paired_end = summary.get("gentrap", {}).get("stats", {}).get("pipeline", {})["all_paired"]
# mapping metrics settings
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("alignment_metrics", {})
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {})
for k, v in self.aln_metrics.items():
self.aln_metrics[k] = {a.lower(): b for a, b in v.items()}
# insert size metrics files
self.inserts_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("insert_size_metrics", {})
self.inserts_metrics_files = \
summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {})
# rna metrics files and stats
self.rna_metrics_files = summary.get("gentrap", {}).get("files", {}).get("rna_metrics", {})
_rmetrics = summary.get("gentrap", {}).get("stats", {}).get("rna_metrics", {})
self.rna_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("rna", {})
_rmetrics = summary.get("bammetrics", {}).get("stats", {}).get("rna", {})
if _rmetrics:
self.rna_metrics = {k: v for k, v in _rmetrics.items() }
if "metrics" in _rmetrics:
_rmetrics = _rmetrics["metrics"]
if _rmetrics:
_rmetrics = {k.lower(): v for k, v in _rmetrics.items() }
self.rna_metrics = _rmetrics
pf_bases = float(_rmetrics["pf_bases"])
exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0))
# picard uses pct_ but it's actually ratio ~ we follow their convention
pct_exonic_bases_all = exonic_bases / float(_rmetrics["pf_bases"])
pct_exonic_bases = exonic_bases / float(_rmetrics.get("pf_aligned_bases", 0))
self.rna_metrics.update({
"exonic_bases": exonic_bases,
"pct_exonic_bases_all": pct_exonic_bases_all,
"pct_exonic_bases": pct_exonic_bases,
"pct_aligned_bases": 1.0,
"pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases,
"pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases,
"pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases,
"pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases,
"pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases,
})
if self.run.settings["strand_protocol"] != "non_specific":
self.rna_metrics.update({
"exonic_bases": exonic_bases,
"pct_exonic_bases_all": pct_exonic_bases_all,
"pct_exonic_bases": pct_exonic_bases,
"pct_aligned_bases": 1.0,
"pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases,
"pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases,
"pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases,
"pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases,
"pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases,
})
if _rmetrics.get("ribosomal_bases", "") != "":
self.rna_metrics["pct_ribosomal_bases_all"] = float(_rmetrics.get("pf_ribosomal_bases", 0.0)) / pf_bases
......@@ -491,7 +502,7 @@ class GentrapSample(object):
self.lib_names = sorted(summary["libraries"].keys())
self.libs = \
{l: GentrapLib(self.run, self, l, summary["libraries"][l]) \
for l in self.lib_names}
for l in self.lib_names}
def __repr__(self):
return "{0}(\"{1}\")".format(self.__class__.__name__, self.name)
......@@ -521,7 +532,7 @@ class GentrapRun(object):
("tophat", "alignment"),
("star", "alignment"),
("htseqcount", "fragment counting"),
]
]
self.executables = {}
for k, desc in executables:
in_summary = self.all_executables.get(k)
......@@ -543,7 +554,7 @@ class GentrapRun(object):
self.sample_names = sorted(summary["samples"].keys())
self.samples = \
{s: GentrapSample(self, s, summary["samples"][s]) \
for s in self.sample_names}
for s in self.sample_names}
self.libs = []
for sample in self.samples.values():
self.libs.extend(sample.libs.values())
......@@ -556,19 +567,20 @@ class GentrapRun(object):
def __repr__(self):
return "{0}(\"{1}\")".format(self.__class__.__name__,
self.summary_file)
self.summary_file)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("summary_file", type=str,
help="Path to Gentrap summary file")
help="Path to Gentrap summary file")
parser.add_argument("template_file", type=str,
help="Path to main template file")