Commit e28ecd68 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-546

parents 659e18de 6abbb191
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools.vcfstats
/**
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.{ File, PrintWriter }
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
import java.nio.file.Paths
......
......@@ -21,7 +21,7 @@
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.utils
import java.io.File
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.utils
/**
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.utils
import org.scalatest.Matchers
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.utils
import htsjdk.variant.variantcontext.{ Allele, GenotypeBuilder }
......
......@@ -24,7 +24,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -60,7 +60,7 @@ samples:
bam: MySecond.bam
```
Note that there is a tool called [SamplesTsvToJson](../tools/SamplesTsvToJson.md) this enables a user to get the sample config without any chance of creating a wrongly formatted JSON file.
Note that there is a tool called [SamplesTsvToConfig](../tools/SamplesTsvToConfig.md) that enables the user to get the sample config without any chance of creating a wrongly formatted file.
#### Tags
......@@ -139,3 +139,37 @@ During execution, biopet framework will resolve the value for each ConfigNamespa
To check if the created JSON file is correct their are several possibilities: the simplest way is using [this](http://jsonformatter.curiousconcept.com/)
website. It is also possible to use Python, Scala or any other programming languages for validating JSON files but this requires some more knowledge.
#Creating config files with Biopet
With the pipelines Gentrap, MultiSampleMapping and Shiva it is possible to use Biopet itself for creating the config files. Biopet should be called with the keyword *template* and the user will be then prompted to enter the values for the parameters needed by the pipelines. Biopet will generate a config file that can be used as input when running the pipelines. The purpose is to ease the step of creating the config files. It is useful especially when Biopet has been pre-configured to use a list of reference genomes. Then the user needs only to specify which refence genome he/she wants to use and the location of the reference genome files can be derived from Biopet's global configuration.
<br/>
<b> Example </b>
For viewing the pipelines for which this functionality is supported:
``` bash
biopet template
```
For getting help about using it for a specific pipeline:
``` bash
biopet template Gentrap -h
```
For running the tool:
``` bash
biopet template Gentrap -o gentrap_config.yml -s gentrap_run.sh
```
<br/>
<b> Description of the parameters </b>
| Flag (short)| Flag (long) | Type | Function |
| ------------ | ----------- | ---- | -------- |
| -o | --outputConfig | Path (**required**) | Name of the config file that gets generated.|
| -s | --outputScript | Path (optional) | Biopet can also output a script that can be directly used for running the pipeline, the call of the pipeline is generated with the config file as input. This parameter sets the name for the script file.|
| -t | --template | Path (optional) | A template file with 2 placeholders *%s* is required for generating the script. The first placeholder will be replaced with the name of the pipeline, the second with the paths to the sample and settings config files. When Biopet has been pre-configured to use the default template file, then setting this parameter is optional. |
| | --expert | | This flag enables the user to configure a more extensive list of parameters for the pipeline. |
\ No newline at end of file
......@@ -13,7 +13,7 @@ Biopet (Bio Pipeline Execution Toolkit) is the main pipeline development framewo
Biopet is available as a JAR package in SHARK. The easiest way to start using it is to activate the `biopet` environment module, which sets useful aliases and environment variables:
~~~
$ module load biopet/v0.6.0
$ module load biopet/v0.8.0
~~~
With each Biopet release, an accompanying environment module is also released. The latest release is version 0.6.0, thus `biopet/v0.6.0` is the module you would want to load.
......@@ -77,28 +77,6 @@ Biopet is based on the Queue framework developed by the Broad Institute as part
We welcome any kind of contribution, be it merge requests on the code base, documentation updates, or any kinds of other fixes! The main language we use is Scala, though the repository also contains a small bit of Python and R. Our main code repository is located at [https://github.com/biopet/biopet](https://github.com/biopet/biopet/issues), along with our issue tracker.
## Local development setup
To develop Biopet, Java 7, Maven 3.2.2, and GATK Queue 3.4 is required. Please consult the Java homepage and Maven homepage for the respective installation instruction. After you have both Java and Maven installed, you would then need to install GATK Queue. However, as the GATK Queue package is not yet available as an artifact in Maven Central, you will need to download, compile, and install GATK Queue first.
~~~
$ git clone https://github.com/broadgsa/gatk-protected
$ cd gatk-protected
$ git checkout 3.4 # the current release is based on GATK 3.4
$ mvn clean install
~~~
This will install all the required dependencies to your local maven repository. After this is done, you can clone our repository and test if everything builds fine:
~~~
$ git clone https://github.com/biopet/biopet.git
$ cd biopet
$ mvn clean install
~~~
If everything builds fine, you're good to go! Otherwise, don't hesitate to contact us or file an issue at our issue tracker.
## About
Go to the [about page](general/about.md)
......
# Release notes Biopet version 0.8.0
### Highlights
* Biopet is now fully hosted at github.
* Several major improvements on using Centrifuge for metagenomics data analysis
* Added XHMM as a tool for copy number analysis
* Added support for SLURM
* Added tool to create a graphical representation of pipeline progress.
* Added soft clipping metrics to HTML report
## Full change list
### Task
* [BIOPET-508] - Move Biopet completly to github
### Bug
* [BIOPET-337] - Rscript summary from Queue doesn't work
* [BIOPET-353] - Log outputs doesn't contain classname ( ..out issue)
* [BIOPET-369] - cn.mops fails on small chromosomes
* [BIOPET-379] - Biopet error when json file not correctly formated
* [BIOPET-383] - Sample json not checked for invalid libraries
* [BIOPET-384] - Select proper values in VcfWithVcf when when number=A
* [BIOPET-385] - Cannot combine doNotRemove in VepNormalizer with chunked Toucan
* [BIOPET-386] - VcfFilter's mustHaveVariant option ignores certain genotypes
* [BIOPET-394] - Flanking on pysvtools is required
* [BIOPET-403] - Report centrifuge does not display separate library plots when enabled
* [BIOPET-405] - Toucan with custom fields fails
* [BIOPET-409] - CatVariants failing after Delly
* [BIOPET-410] - Krona plots in GearsSingle are not correct for centrifuge
* [BIOPET-415] - SortVcf extensions does not see .tbi file as output
* [BIOPET-418] - Nullpointer when config file is empty
* [BIOPET-450] - Base counts tests are failing
* [BIOPET-458] - Pipeline help status no longer given
* [BIOPET-473] - Dustbin pipeline does not show centrifuge report
* [BIOPET-481] - Chunksize can't go higher then 2G because of limitions a INT
* [BIOPET-485] - files from fifo's from paired-end flexiprep are in the graph
* [BIOPET-486] - BreakdancerCaller does not depend on bam file in graph
* [BIOPET-488] - Bastats does not depend on index of bam file
* [BIOPET-491] - Summary of flexiprep not depens on qc_cmd
* [BIOPET-493] - FastqSplitter is disconnected from the graph
* [BIOPET-495] - Fix XHMM
* [BIOPET-496] - Alignment plot does not show stats
* [BIOPET-504] - Validate vcf step does not have an output file
* [BIOPET-507] - To much file handles for .out files
* [BIOPET-509] - VcfFilter MustHaveVariant does not check if sample exist
* [BIOPET-516] - Config value is not correct for skip_trim and skip_clip
* [BIOPET-526] - bammetrics summary fails with empty histogram array
* [BIOPET-544] - Link to assembly report is broken
### Improvement
* [BIOPET-309] - Colapse output files of vcfstats into 1 file
* [BIOPET-317] - Remove unnecessary intermediate bams to free up more space when pipeline finishes successfully
* [BIOPET-359] - Enable htseq to count multiple-alignments
* [BIOPET-374] - Add clipping stats to html report
* [BIOPET-387] - Convenience methods for semantic versions in Version
* [BIOPET-389] - Shiva is overzealous with sorting amplicon bed files
* [BIOPET-395] - Lazy dict cache in reference module
* [BIOPET-396] - Add support for multiple versions of annotations in config file
* [BIOPET-398] - Change default in Gears to centrifuge
* [BIOPET-406] - Add a better error / exception when output dir is not writable
* [BIOPET-411] - Option to send full fastq file to gears instead of only unmapped reads
* [BIOPET-412] - Make files intermediate in Gears
* [BIOPET-413] - Implementing piping for centrifuge
* [BIOPET-414] - Add all arguments to centrifuge
* [BIOPET-416] - Add stats output of centrifuge
* [BIOPET-426] - Make deps.json run in normal mode
* [BIOPET-463] - Test and update docs GEARS for Centrifuge
* [BIOPET-464] - Implement skip_flexiprep in gears
* [BIOPET-469] - Adding functional testing on XHMM
* [BIOPET-471] - Add agregated stats to BamStats
* [BIOPET-472] - Documentation for XHMM feature
* [BIOPET-475] - Reorganize .log dir
* [BIOPET-477] - Adding refcalls to MpileupToVcf
* [BIOPET-480] - Remove duplicate jobs in bam2wig
* [BIOPET-483] - Add fa.gz / samtools faidx on fa.gz to IndexReference
* [BIOPET-484] - Update documentation for biopet developper
* [BIOPET-489] - Fix compile warnings
* [BIOPET-498] - Add testing for PipelineStatus
* [BIOPET-503] - Colapse output files of bamstats into 1 file
* [BIOPET-506] - Add check if R1 and R2 are the same
* [BIOPET-525] - Adding unassigned reads to Krona plot
* [BIOPET-532] - Add jenkins setup to documentation and README
### New Feature
* [BIOPET-399] - Add walltime to core
* [BIOPET-402] - Tool for filtering fastq files based on read names
* [BIOPET-417] - Config template tools
* [BIOPET-419] - Create Tsv to Samples.yml converter for sample sheet
* [BIOPET-425] - Add main jobs to deps.json
* [BIOPET-427] - Crompress deps.json with only main jobs
* [BIOPET-428] - Generate a dot file with only main jobs
* [BIOPET-460] - Vcf/dbsnp validate step
* [BIOPET-466] - Write XHMM wrappers
* [BIOPET-467] - Write XHMMMethod in Shiva
* [BIOPET-468] - XCNV to BED conversion tool
* [BIOPET-470] - Implement DepthOfCoverage wrapper
* [BIOPET-476] - Tool to check for status of pipeline
* [BIOPET-497] - Add status to compressed plot
* [BIOPET-500] - Add support for Slurm
* [BIOPET-421] - Implement XHMM
......@@ -56,19 +56,19 @@
<artifactId>BiopetCore</artifactId>
<!--TODO: replace version of pipeline to a fixed version -->
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetExtensions</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Shiva</artifactId>
<!--TODO: replace version of pipeline to a fixed version -->
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
</dependency>
</dependencies>
......
......@@ -24,7 +24,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -21,7 +21,7 @@
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, SampleLibraryTag }
......
......@@ -26,7 +26,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -64,10 +64,10 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
val fastaFile = new File(genomeDir, "reference.fa")
val downloadFastaFile = new File(genomeDir, "download.reference.fa")
genomeConfig.get("ncbi_assembly_id") match {
case Some(assemblyID: String) =>
genomeConfig.get("ncbi_assembly_report") match {
case Some(assemblyReport: String) =>
val downloadAssembly = new DownloadNcbiAssembly(this)
downloadAssembly.assemblyId = assemblyID
downloadAssembly.assemblyReport = new File(assemblyReport)
downloadAssembly.output = downloadFastaFile
downloadAssembly.outputReport = new File(genomeDir, s"$speciesName-$genomeName.assembly.report")
downloadAssembly.nameHeader = genomeConfig.get("ncbi_assembly_header_name").map(_.toString)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment