Skip to content
Snippets Groups Projects
Commit 8b557e11 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge origin/master into peter

parents e6f07f76 8a830d49
No related branches found
No related tags found
No related merge requests found
/**
* Copyright (c) 2014 Leiden University Medical Center
*
* @author Wibowo Arindrarto
*/
package nl.lumc.sasc.biopet.function
import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Abstract class for all seqtk wrappers.
*/
abstract class Seqtk extends BiopetCommandLineFunction {
executable = config("exe", default = "seqtk", submodule = "seqtk")
override def versionCommand = executable
override val versionRegex = """Version: (.*)""".r
}
/**
* Wrapper for the seqtk seq subcommand.
* Written based on seqtk version 1.0-r63-dirty.
*/
class SeqtkSeq(val root: Configurable) extends Seqtk {
/** input file */
@Input(doc = "Input file (FASTQ or FASTA)")
var input: File = _
/** output file */
@Output(doc = "Output file")
var output: File = _
/** mask bases with quality lower than INT [0] */
var q: Option[Int] = config("q")
/** masked bases converted to CHAR; 0 for lowercase [0] */
var n: String = config("n")
/** number of residues per line; 0 for 2^32-1 [0] */
var l: Option[Int] = config("l")
/** quality shift: ASCII-INT gives base quality [33] */
var Q: Option[Int] = config("Q")
/** random seed (effective with -f) [11] */
var s: Option[Int] = config("s")
/** sample FLOAT fraction of sequences [1] */
var f: Option[Int] = config("f")
/** mask regions in BED or name list FILE [null] */
var M: File = config("M")
/** drop sequences with length shorter than INT [0] */
var L: Option[Int] = config("L")
/** mask complement region (effective with -M) */
var c: Boolean = config("c")
/** reverse complement */
var r: Boolean = config("r")
/** force FASTA output (discard quality) */
var A: Boolean = config("A")
/** drop comments at the header lines */
var C: Boolean = config("C")
/** drop sequences containing ambiguous bases */
var N: Boolean = config("N")
/** output the 2n-1 reads only */
var flag1: Boolean = config("1")
/** output the 2n reads only */
var flag2: Boolean = config("2")
/** shift quality by '(-Q) - 33' */
var V: Boolean = config("V")
def cmdLine = {
required(executable) +
" seq " +
optional("-q", q) +
optional("-n", n) +
optional("-l", l) +
optional("-Q", Q) +
optional("-s", s) +
optional("-f", f) +
optional("-M", M) +
optional("-L", L) +
conditional(c, "-c") +
conditional(r, "-r") +
conditional(A, "-A") +
conditional(C, "-C") +
conditional(N, "-N") +
conditional(flag1, "-1") +
conditional(flag2, "-2") +
conditional(V, "-V") +
required(input) +
" > " + required(output)
}
/**
* Calculates the offset required for the -Q flag for format conversion (-V flag set).
* This is required since seqtk computes the encoding offset indirectly from the input
* and output offsets.
*
* @param inQualOffset ASCII offset of the input file encoding
* @param outQualOffset ASCII offset of the output file encoding
* @return the value to be used with the -Q flag with -V set
*/
def calcQForV(inQualOffset: Int, outQualOffset: Int): Int = {
// For the input for the -Q flag for seqtk, together with -V
inQualOffset - (outQualOffset - 33)
}
}
#!/usr/bin/env python
# Adapted from: http://tech.yipit.com/2011/11/16/183772396/
# Changes by Wibowo Arindrarto
# Changes:
# - Allow code modification by linters to be comitted
# - Updated CHECKS
# - Python 3 calls + code style updates
#
# Usage: save this file into your .git/hooks directory as `pre-commit`
# and set it to executable
import os
import re
import subprocess
import sys
modified = re.compile(r"^[MA]\s+(?P<name>.*)$")
CHECKS = [
{
"exe": "scalariform",
"output": "Formatting code with scalariform ...",
# Remove lines without filenames
"command": "scalariform -s=2.11.1 -p=scalariformStyle.properties --quiet %s",
"match_files": [".*scala$"],
"print_filename": False,
"commit_changes": True,
},
]
def matches_file(file_name, match_files):
return any(re.compile(match_file).match(file_name) for match_file
in match_files)
def check_files(files, check):
result = 0
print(check["output"])
for file_name in files:
if not "match_files" in check or \
matches_file(file_name, check["match_files"]):
if not "ignore_files" in check or \
not matches_file(file_name, check["ignore_files"]):
process = subprocess.Popen(check["command"] % file_name,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
out, err = process.communicate()
if out or err:
if check["print_filename"]:
prefix = "\t%s:" % file_name
else:
prefix = "\t"
output_lines = ["%s%s" % (prefix, line) for
line in out.splitlines()]
print("\n".join(output_lines))
if err:
print(err)
result = 1
elif check["commit_changes"]:
p = subprocess.Popen(["git", "add", file_name],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p.communicate()
return result
def main(all_files):
# Check that the required linters and code checkers are all present
for check in CHECKS:
p = subprocess.Popen(["which", check["exe"]], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
if not out:
print("Required commit hook executable '%s' not found." % check["exe"])
sys.exit(1)
# Stash any changes to the working tree that are not going to be committed
subprocess.call(["git", "stash", "-u", "--keep-index"], stdout=subprocess.PIPE)
files = []
if all_files:
for root, dirs, file_names in os.walk("."):
for file_name in file_names:
files.append(os.path.join(root, file_name))
else:
p = subprocess.Popen(["git", "status", "--porcelain"],
stdout=subprocess.PIPE)
out, err = p.communicate()
for line in out.splitlines():
match = modified.match(line)
if match:
files.append(match.group("name"))
result = 0
for check in CHECKS:
result = check_files(files, check) or result
# Strategy:
# - Check if the linters made any changes
# - If there are no changes, pop the stash and commit
# - Otherwise:
# - Stash the change
# - Pop stash@{1}
# - Checkout stash@{0}
# - Drop stash@{0} (cannot pop directly since stash may conflict)
# - Commit
# This is because the initial stash will conflict with any possible
# changes made by the linters
p = subprocess.Popen(["git", "status", "--porcelain"],
stdout=subprocess.PIPE)
out, err = p.communicate()
if not out.strip():
subprocess.call(["git", "stash", "pop"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
subprocess.call(["git", "stash"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
subprocess.call(["git", "stash", "pop", "--quiet", "--index", "stash@{1}"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
subprocess.call(["git", "checkout", "stash", "--", "."],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
subprocess.call(["git", "stash", "drop"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sys.exit(result)
if __name__ == "__main__":
all_files = False
if len(sys.argv) > 1 and sys.argv[1] == "--all-files":
all_files = True
main(all_files)
......@@ -19,6 +19,9 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
@Output(doc = "Output fastq file")
var fastq_output: File = _
@Output(doc = "Output statistics file")
var stats_output: File = _
executable = config("exe", default = "cutadapt")
override def versionCommand = executable + " --version"
override val versionRegex = """(.*)""".r
......@@ -49,7 +52,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
optional("-M", opt_maximum_length) +
// input / output
required(fastq_input) +
" > " + required(fastq_output)
required("--output", fastq_output) +
" > " + required(stats_output)
} else {
analysisName = getClass.getSimpleName + "-ln"
"ln -sf " +
......
......@@ -11,25 +11,25 @@ import nl.lumc.sasc.biopet.core.config._
class Sickle(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "R1 input")
var input_R1: File = null
var input_R1: File = _
@Input(doc = "R2 input", required = false)
var input_R2: File = null
var input_R2: File = _
@Input(doc = "qualityType file", required = false)
var qualityTypeFile: File = _
@Output(doc = "R1 output")
var output_R1: File = null
var output_R1: File = _
@Output(doc = "R2 output", required = false)
var output_R2: File = null
var output_R2: File = _
@Output(doc = "singles output", required = false)
var output_singles: File = null
var output_singles: File = _
@Output(doc = "stats output")
var output_stats: File = null
var output_stats: File = _
executable = config("exe", default = "sickle")
var qualityType: String = config("qualitytype")
......
......@@ -128,13 +128,19 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
var R2: File = new File(R2_in)
if (!skipClip) { // Adapter clipping
val cutadapt_R1 = new Cutadapt(this)
if (!skipTrim || paired) cutadapt_R1.isIntermediate = true
cutadapt_R1.fastq_input = R1
cutadapt_R1.fastq_output = swapExt(outDir, R1, R1_ext, ".clip" + R1_ext)
cutadapt_R1.stats_output = swapExt(outDir, R1, R1_ext, ".clip.stats")
if (outputFiles.contains("contams_R1")) cutadapt_R1.contams_file = outputFiles("contams_R1")
add(cutadapt_R1)
R1 = cutadapt_R1.fastq_output
if (paired) {
val cutadapt_R2 = new Cutadapt(this)
if (!skipTrim || paired) cutadapt_R2.isIntermediate = true
......
......@@ -9,6 +9,7 @@ formatXml=true
indentLocalDefs=false
indentPackageBlocks=true
indentSpaces=2
placeScaladocAsterisksBeneathSecondAsterisk=false
preserveDanglingCloseParenthesis=false
preserveSpaceBeforeArguments=false
rewriteArrowSymbols=false
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment