Commit 20eefdf6 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'feature-gentrap_additions' into 'develop'

Feature gentrap additions

This is the latest updates and fixes on the Gentrap pipeline to be compatible with the most recent framework updates.

In particular:

* Issue #156 is fixed ~ this was due to class initialization problems
* Issue #157 is fixed
* TopHat alignment results are now merged with its unmapped alignment file, producing the correct statistics
* The pipeline is updated with the new reference module.
* Redundant CollectRnaSeqMetrics is removed (now using the one from BamMetrics instead).

See merge request !181
parents 3ec61eee 84b4fbf2
{
"samples" : {
"sampleA" : {
"libraries" : {
"lib_1" : {
"R1" : "/path/to/inputA_R1.fq.gz",
"R2" : "/path/to/inputA_R2.fq.gz"
}
}
},
"sampleB" : {
"libraries" : {
"lib_1" : {
"R1" : "/path/to/inputB_1_R1.fq.gz",
"R2" : "/path/to/inputB_1_R2.fq.gz"
},
"lib_2": {
"R1" : "/path/to/inputB_2_R1.fq.gz",
"R2" : "/path/to/inputB_2_R2.fq.gz"
}
}
}
},
"gentrap": {
"output_dir": "/path/to/output_dir",
"expression_measures": ["fragments_per_gene", "bases_per_gene", "bases_per_exon"],
"strand_protocol": "non_specific",
"aligner": "gsnap",
"reference": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap/reference.fa",
"annotation_gtf": "/path/to/data/annotation/ucsc_refseq.gtf",
"annotation_bed": "/path/to/data/annotation/ucsc_refseq.bed",
"annotation_refflat": "/path/to/data/annotation/ucsc_refseq.refFlat",
"gsnap": {
"dir": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap",
"db": "hg19_nohap",
"quiet_if_excessive": true,
"npaths": 1
},
"cutadapt": {
"minimum_length": 20
},
"mapping": {
"flexiprep": {
"fastqc": {
"threads": 6,
"nogroup": true
}
}
},
"rawbasecounter": {
"core_memory": "20G"
}
}
}
......@@ -110,18 +110,23 @@ Thus, an example settings configuration is as follows:
}
~~~
#### Example configurations
In most cases, it's practical to combine the samples and settings configuration into one file. Here is an [example config file](/examples/gentrap_example.json) where both samples and settings are stored into one file. Note also that there are additional tool configurations in the config file.
## Running Gentrap
As with other pipelines in the Biopet suite, Gentrap can be run by specifying the pipeline after the `pipeline` subcommand:
~~~
java -jar </path/to/biopet.jar> pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
$ java -jar </path/to/biopet.jar> pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
~~~
If you already have the `biopet` environment module loaded, you can also simply call `biopet`:
You can also use the `biopet` environment module (recommended) when you are running the pipeline in SHARK:
~~~
biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
$ module load biopet/v0.3.1
$ biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run
~~~
It is also a good idea to specify retries (we recomend `-retry 3` up to `-retry 5`) so that cluster glitches do not interfere with your pipeline runs.
......
......@@ -38,7 +38,9 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
/** Bed of amplicon that is used */
var ampliconBedFile: Option[File] = config("amplicon_bed")
var rnaMetrics: Boolean = config("rna_metrcis", default = false)
/** Settings for CollectRnaSeqMetrics */
var rnaMetricsSettings: Map[String, String] = Map()
var transcriptRefFlatFile: Option[File] = config("transcript_refflat")
/** return location of summary file */
def summaryFile = (sampleId, libId) match {
......@@ -78,17 +80,22 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
add(gcBiasMetrics)
addSummarizable(gcBiasMetrics, "gc_bias")
if (transcriptRefFlatFile.isEmpty) {
val wgsMetrics = new CollectWgsMetrics(this)
wgsMetrics.input = inputBam
wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
add(wgsMetrics)
addSummarizable(wgsMetrics, "wgs")
}
if (rnaMetrics) {
if (transcriptRefFlatFile.isDefined) {
val rnaMetrics = new CollectRnaSeqMetrics(this)
rnaMetrics.input = inputBam
rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
rnaMetrics.refFlat = transcriptRefFlatFile.get
rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) }
rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity")
add(rnaMetrics)
addSummarizable(rnaMetrics, "rna")
}
......
......@@ -45,9 +45,10 @@ class BamMetricsTest extends TestNGSuite with Matchers {
@Test(dataProvider = "bammetricsOptions")
def testFlexiprep(rois: Int, amplicon: Boolean, rna: Boolean) = {
val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir, "rna_metrcis" -> rna
), Map(BamMetricsTest.executables.toSeq: _*)) ++
val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir),
Map(BamMetricsTest.executables.toSeq: _*)) ++
(if (amplicon) Map("amplicon_bed" -> "amplicon.bed") else Map()) ++
(if (rna) Map("transcript_refflat" -> "transcripts.refFlat") else Map()) ++
Map("regions_of_interest" -> (1 to rois).map("roi_" + _ + ".bed").toList)
val bammetrics: BamMetrics = initPipeline(map)
......@@ -59,7 +60,7 @@ class BamMetricsTest extends TestNGSuite with Matchers {
var regions: Int = rois + (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectRnaSeqMetrics]) shouldBe (if (rna) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectWgsMetrics]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[CollectWgsMetrics]) shouldBe (if (rna) 0 else 1)
bammetrics.functions.count(_.isInstanceOf[CollectMultipleMetrics]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[CalculateHsMetrics]) shouldBe (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectTargetedPcrMetrics]) shouldBe (if (amplicon) 1 else 0)
......
......@@ -15,6 +15,8 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
javaMainClass = new picard.analysis.CollectMultipleMetrics().getClass.getName
override val defaultCoreMemory = 6.0
@Input(doc = "The input SAM or BAM files to analyze", required = true)
var input: File = null
......@@ -41,7 +43,7 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
}
case _ if p == Programs.CollectInsertSizeMetrics.toString => {
outputFiles :+= new File(outputName + ".insert_size_metrics")
outputFiles :+= new File(outputName + ".insert_size_Histogram.pdf")
outputFiles :+= new File(outputName + ".insert_size_histogram.pdf")
}
case _ if p == Programs.QualityScoreDistribution.toString => {
outputFiles :+= new File(outputName + ".quality_distribution_metrics")
......@@ -85,17 +87,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
case _ => None
}
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats = stats
override def summaryFiles: Map[String, File] = Map()
}
qscript.addSummarizable(sum, p)
})
}
def summaryFiles = Map()
def summaryStats = Map()
def summaryFiles = {
program.map {
case p if p == Programs.CollectInsertSizeMetrics.toString =>
Map(
"insert_size_histogram" -> new File(outputName + ".insert_size_histogram.pdf"),
"insert_size_metrics" -> new File(outputName + ".insert_size_metrics"))
case otherwise => Map()
}.foldLeft(Map.empty[String, File]) { case (acc, m) => (acc ++ m) }
}
}
object CollectMultipleMetrics {
......
......@@ -32,7 +32,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var input: File = null
@Input(doc = "Gene annotations in refFlat form", required = true)
var refFlat: File = config("refFlat")
var refFlat: File = null
@Input(doc = "Location of rRNA sequences in interval list format", required = false)
var ribosomalIntervals: Option[File] = config("ribosomal_intervals")
......@@ -68,6 +68,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var stopAfter: Option[Long] = config("stop_after")
override def beforeGraph: Unit = {
if (refFlat == null) refFlat = config("refFlat")
val validFlags = StrandSpecificity.values.map(_.toString).toSet
strandSpecificity match {
case Some(s) => require(validFlags.contains(s),
......@@ -84,7 +85,9 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value }
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Map(
"metrics" -> Picard.getMetrics(output).getOrElse(Map()),
"histogram" -> Picard.getHistogram(output).getOrElse(Map()))
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) +
......
......@@ -82,8 +82,10 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
object Picard extends Logging {
lazy val getBiopetPicardVersion: Option[String] = {
val reader = Source.fromInputStream(getClass.getResourceAsStream("/dependency_list.txt"))
val dependencies = reader.getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
Option(getClass.getResourceAsStream("/dependency_list.txt")) match {
case Some(src) =>
val dependencies = Source.fromInputStream(src)
.getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
"groupId" -> line(0),
"artifactId" -> line(1),
"type" -> line(2),
......@@ -102,6 +104,8 @@ object Picard extends Logging {
case dep =>
"Picard " + dep("version") + " using " + htsjdk.getOrElse("unknown htsjdk")
}
case otherwise => None
}
}
def getMetrics(file: File, tag: String = "METRICS CLASS",
......
......@@ -36,7 +36,7 @@ class BiopetFlagstat(val root: Configurable) extends ToolCommandFuntion with Sum
@Output(doc = "summary output file", shortName = "output", required = false)
var summaryFile: File = _
override val defaultCoreMemory = 2.0
override val defaultCoreMemory = 6.0
override def commandLine = super.commandLine + required("-I", input) + required("-s", summaryFile) + " > " + required(output)
......
......@@ -32,7 +32,7 @@ class MergeTables(val root: Configurable) extends ToolCommandFuntion {
javaMainClass = getClass.getName
override val defaultCoreMemory = 2.0
override val defaultCoreMemory = 6.0
/** List of input tabular files */
@Input(doc = "Input table files", required = true)
......@@ -71,7 +71,7 @@ class MergeTables(val root: Configurable) extends ToolCommandFuntion {
required("-a", valueColumnIndex) +
optional("-n", idColumnName) +
optional("-e", fileExtension) +
optional("-h", numHeaderLines) +
optional("-m", numHeaderLines) +
optional("-f", fallbackString) +
optional("-d", delimiter) +
required("-o", output) +
......@@ -164,7 +164,7 @@ object MergeTables extends ToolCommand {
idColumnIndices: Seq[Int] = Seq.empty[Int],
valueColumnIndex: Int = -1,
fileExtension: String = "",
numHeaderLines: Int = 1,
numHeaderLines: Int = 0,
fallbackString: String = "-",
delimiter: Char = '\t',
out: File = new File("-")) extends AbstractArgs
......@@ -206,9 +206,9 @@ object MergeTables extends ToolCommand {
c.copy(fileExtension = x)
} text "Common extension of all input tables to strip (default: empty string)"
opt[Int]('h', "num_header_lines") optional () action { (x, c) =>
opt[Int]('m', "num_header_lines") optional () action { (x, c) =>
c.copy(numHeaderLines = x)
} text "The number of header lines present in all input files (default: 1; 1-line header)"
} text "The number of header lines present in all input files (default: 0; no header)"
opt[String]('f', "fallback") optional () action { (x, c) =>
c.copy(fallbackString = x)
......
......@@ -143,7 +143,7 @@ class MergeTablesTest extends TestNGSuite with MockitoSugar with Matchers {
// default arguments
parsed.fallbackString shouldBe "-"
parsed.fileExtension shouldBe ""
parsed.numHeaderLines shouldBe 1
parsed.numHeaderLines shouldBe 0
parsed.delimiter shouldBe '\t'
}
}
......@@ -417,14 +417,21 @@ class GentrapLib(object):
self.fastqc_r2_qc_files = self.flexiprep["files"]["fastqc_R2_qc"]
self.fastqc_r2_qc = FastQC(self.fastqc_r2_qc_files["fastqc_data"]["path"])
# mapping metrics settings
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("alignment_metrics", {})
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {})
for k, v in self.aln_metrics.items():
self.aln_metrics[k] = {a.lower(): b for a, b in v.items()}
# insert size metrics files
self.inserts_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("insert_size_metrics", {})
self.inserts_metrics_files = \
summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {})
# rna metrics files and stats
self.rna_metrics_files = summary.get("gentrap", {}).get("files", {}).get("rna_metrics", {})
_rmetrics = summary.get("gentrap", {}).get("stats", {}).get("rna_metrics", {})
self.rna_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("rna", {})
_rmetrics = summary.get("bammetrics", {}).get("stats", {}).get("rna", {})
if _rmetrics:
self.rna_metrics = {k: v for k, v in _rmetrics.items() }
if "metrics" in _rmetrics:
_rmetrics = _rmetrics["metrics"]
if _rmetrics:
_rmetrics = {k.lower(): v for k, v in _rmetrics.items() }
self.rna_metrics = _rmetrics
pf_bases = float(_rmetrics["pf_bases"])
exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0))
# picard uses pct_ but it's actually ratio ~ we follow their convention
......@@ -458,14 +465,21 @@ class GentrapSample(object):
self._raw = summary
self.is_paired_end = summary.get("gentrap", {}).get("stats", {}).get("pipeline", {})["all_paired"]
# mapping metrics settings
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("alignment_metrics", {})
self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {})
for k, v in self.aln_metrics.items():
self.aln_metrics[k] = {a.lower(): b for a, b in v.items()}
# insert size metrics files
self.inserts_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("insert_size_metrics", {})
self.inserts_metrics_files = \
summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {})
# rna metrics files and stats
self.rna_metrics_files = summary.get("gentrap", {}).get("files", {}).get("rna_metrics", {})
_rmetrics = summary.get("gentrap", {}).get("stats", {}).get("rna_metrics", {})
self.rna_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("rna", {})
_rmetrics = summary.get("bammetrics", {}).get("stats", {}).get("rna", {})
if _rmetrics:
if "metrics" in _rmetrics:
_rmetrics = _rmetrics["metrics"]
if _rmetrics:
self.rna_metrics = {k: v for k, v in _rmetrics.items() }
_rmetrics = {k.lower(): v for k, v in _rmetrics.items() }
self.rna_metrics = _rmetrics
pf_bases = float(_rmetrics["pf_bases"])
exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0))
# picard uses pct_ but it's actually ratio ~ we follow their convention
......@@ -482,9 +496,6 @@ class GentrapSample(object):
"pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases,
"pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases,
})
if self.run.settings["strand_protocol"] != "non_specific":
self.rna_metrics.update({
})
if _rmetrics.get("ribosomal_bases", "") != "":
self.rna_metrics["pct_ribosomal_bases_all"] = float(_rmetrics.get("pf_ribosomal_bases", 0.0)) / pf_bases
......@@ -572,3 +583,4 @@ if __name__ == "__main__":
run = GentrapRun(args.summary_file)
write_template(run, args.template_file, args.logo_file)
......@@ -47,11 +47,11 @@
% inferred insert size distribution
\subsubsection{Insert size distribution}
\IfFileExists{((( lib.inserts_metrics_files.output_histogram.path )))}
\IfFileExists{((( lib.inserts_metrics_files.insert_size_histogram.path )))}
{
\begin{figure}[h!]
\centering
\includegraphics[width=0.7\textwidth]{((( lib.inserts_metrics_files.output_histogram.path )))}
\includegraphics[width=0.7\textwidth]{((( lib.inserts_metrics_files.insert_size_histogram.path )))}
\caption{Distribution of insert size length of paired-end reads mapped to opposite strands.}
\end{figure}
}
......@@ -108,14 +108,5 @@
Ribosomal bases & ((( lib.rna_metrics.ribosomal_bases|nice_int ))) & ((( lib.rna_metrics.pct_ribosomal_bases_all|float2nice_pct )))\% & ((( lib.rna_metrics.pct_ribosomal_bases|float2nice_pct )))\% \\
((* endif *))
\hline
Median 5' bias & ((( lib.rna_metrics.median_5prime_bias ))) & - & - \\
Median 3' bias & ((( lib.rna_metrics.median_3prime_bias ))) & - & - \\
Median 5' to 3' bias & ((( lib.rna_metrics.median_5prime_to_3prime_bias ))) & - & - \\
\hline
((* if lib.run.settings.strand_protocol != "non_specific" *))
Correct strand reads & ((( lib.rna_metrics.correct_strand_reads|nice_int ))) & - & - \\
Incorrect strand reads & ((( lib.rna_metrics.incorrect_strand_reads|nice_int ))) & - & - \\
((* endif *))
\hline
\end{tabular}
\end{center}
......@@ -47,11 +47,11 @@
% inferred insert size distribution
\subsubsection{Insert size distribution}
\IfFileExists{((( sample.inserts_metrics_files.output_histogram.path )))}
\IfFileExists{((( sample.inserts_metrics_files.insert_size_histogram.path )))}
{
\begin{figure}[h!]
\centering
\includegraphics[width=0.7\textwidth]{((( sample.inserts_metrics_files.output_histogram.path )))}
\includegraphics[width=0.7\textwidth]{((( sample.inserts_metrics_files.insert_size_histogram.path )))}
\caption{Distribution of insert size length of paired-end reads mapped to opposite strands.}
\end{figure}
}
......
......@@ -87,6 +87,17 @@ class Gentrap(val root: Configurable) extends QScript
/** Whether to do simple variant calling on RNA or not */
var callVariants: Boolean = config("call_variants", default = false)
/** Settings for all Picard CollectRnaSeqMetrics runs */
private def collectRnaSeqMetricsSettings: Map[String, String] = Map(
"strand_specificity" -> (strandProtocol match {
case NonSpecific => StrandSpecificity.NONE.toString
case Dutp => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString
case otherwise => throw new IllegalStateException(otherwise.toString)
})) ++ (ribosomalRefFlat match {
case Some(rbs) => Map("ribosomal_intervals" -> rbs.toString)
case None => Map()
})
/** Default pipeline config */
override def defaults = ConfigUtils.mergeMaps(
Map(
......@@ -101,13 +112,16 @@ class Gentrap(val root: Configurable) extends QScript
"programrecordid" -> "null"
),
// disable markduplicates since it may not play well with all aligners (this can still be overriden via config)
"mapping" -> Map("skip_markduplicates" -> true)
"mapping" -> Map(
"skip_markduplicates" -> true,
"skip_metrics" -> true
)
), super.defaults)
/** Adds output merge jobs for the given expression mode */
// TODO: can we combine the enum with the file extension (to reduce duplication and potential errors)
private def makeMergeTableJob(inFunc: (Sample => Option[File]), ext: String, idCols: List[Int], valCol: Int,
numHeaderLines: Int = 1, outBaseName: String = "all_samples",
numHeaderLines: Int = 0, outBaseName: String = "all_samples",
fallback: String = "-"): Option[MergeTables] = {
val tables = samples.values.map { inFunc }.toList.flatten
tables.nonEmpty
......@@ -167,7 +181,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged gene base count table */
private lazy val geneBasesCountJob =
makeMergeTableJob((s: Sample) => s.geneBasesCount, ".bases_per_gene", List(1), 2, fallback = "0")
makeMergeTableJob((s: Sample) => s.geneBasesCount, ".bases_per_gene", List(1), 2, numHeaderLines = 1,
fallback = "0")
/** Heatmap job for gene base count */
private lazy val geneBasesCountHeatmapJob =
......@@ -175,7 +190,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged exon base count table */
private lazy val exonBasesCountJob =
makeMergeTableJob((s: Sample) => s.exonBasesCount, ".bases_per_exon", List(1), 2, fallback = "0")
makeMergeTableJob((s: Sample) => s.exonBasesCount, ".bases_per_exon", List(1), 2, numHeaderLines = 1,
fallback = "0")
/** Heatmap job for exon base count */
private lazy val exonBasesCountHeatmapJob =
......@@ -183,7 +199,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged gene FPKM table for Cufflinks, strict mode */
private lazy val geneFpkmCufflinksStrictJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksStrict, ".genes_fpkm_cufflinks_strict", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksStrict, ".genes_fpkm_cufflinks_strict", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, strict mode */
private lazy val geneFpkmCufflinksStrictHeatmapJob =
......@@ -191,7 +208,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged exon FPKM table for Cufflinks, strict mode */
private lazy val isoFpkmCufflinksStrictJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksStrict, ".isoforms_fpkm_cufflinks_strict", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksStrict, ".isoforms_fpkm_cufflinks_strict", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, strict mode */
private lazy val isoFpkmCufflinksStrictHeatmapJob =
......@@ -199,7 +217,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged gene FPKM table for Cufflinks, guided mode */
private lazy val geneFpkmCufflinksGuidedJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksGuided, ".genes_fpkm_cufflinks_guided", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksGuided, ".genes_fpkm_cufflinks_guided", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, guided mode */
private lazy val geneFpkmCufflinksGuidedHeatmapJob =
......@@ -207,7 +226,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged isoforms FPKM table for Cufflinks, guided mode */
private lazy val isoFpkmCufflinksGuidedJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksGuided, ".isoforms_fpkm_cufflinks_guided", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksGuided, ".isoforms_fpkm_cufflinks_guided", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, guided mode */
private lazy val isoFpkmCufflinksGuidedHeatmapJob =
......@@ -215,7 +235,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged gene FPKM table for Cufflinks, blind mode */
private lazy val geneFpkmCufflinksBlindJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksBlind, ".genes_fpkm_cufflinks_blind", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksBlind, ".genes_fpkm_cufflinks_blind", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, blind mode */
private lazy val geneFpkmCufflinksBlindHeatmapJob =
......@@ -223,7 +244,8 @@ class Gentrap(val root: Configurable) extends QScript
/** Merged isoforms FPKM table for Cufflinks, blind mode */
private lazy val isoFpkmCufflinksBlindJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksBlind, ".isoforms_fpkm_cufflinks_blind", List(1, 7), 10, fallback = "0.0")
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksBlind, ".isoforms_fpkm_cufflinks_blind", List(1, 7), 10,
numHeaderLines = 1, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, blind mode */
private lazy val isoFpkmCufflinksBlindHeatmapJob =
......@@ -262,6 +284,7 @@ class Gentrap(val root: Configurable) extends QScript
/** Files that will be listed in the summary file */
def summaryFiles: Map[String, File] = Map(
"reference_fasta" -> referenceFasta(),
"annotation_refflat" -> annotationRefFlat
) ++ Map(
"annotation_gtf" -> annotationGtf,
......@@ -281,6 +304,7 @@ class Gentrap(val root: Configurable) extends QScript
"strand_protocol" -> strandProtocol.toString,
"call_variants" -> callVariants,
"remove_ribosomal_reads" -> removeRibosomalReads,
"reference" -> referenceSummary,
"version" -> FullVersion
)
......@@ -301,24 +325,6 @@ class Gentrap(val root: Configurable) extends QScript
job
}
/** General function to create CollectRnaSeqMetrics job, for per-sample and per-library runs */
protected def makeCollectRnaSeqMetricsJob(alnFile: File, outMetrics: File,
outChart: Option[File] = None): CollectRnaSeqMetrics = {
val job = new CollectRnaSeqMetrics(qscript)
job.input = alnFile
job.output = outMetrics
job.refFlat = annotationRefFlat
job.chartOutput = outChart
job.assumeSorted = true
job.strandSpecificity = strandProtocol match {
case NonSpecific => Option(StrandSpecificity.NONE.toString)
case Dutp => Option(StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString)
case _ => throw new IllegalStateException
}
job.ribosomalIntervals = ribosomalRefFlat
job