Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
1ddd45ad
Commit
1ddd45ad
authored
Aug 25, 2014
by
Peter van 't Hof
Browse files
Added Sage pipeline
parent
6b69a6d5
Changes
9
Hide whitespace changes
Inline
Side-by-side
biopet-framework/examples/sage.json
0 → 100644
View file @
1ddd45ad
{
"bedtools"
:
{
"exe"
:
"test"
},
"samtools"
:
{
"exe"
:
"test"
},
"reference"
:
"/blabla/blabla.fa"
,
"fastqc"
:
{
"exe"
:
"/home/pjvan_thof/Downloads/FastQC/fastqc"
},
"bwa"
:
{
"exe"
:
"test"
},
"flexiprep"
:
{
"fastqc"
:
{
"exe"
:
"/home/pjvan_thof/Downloads/FastQC/fastqc"
},
"seqtk"
:
{
"exe"
:
"/data/DIV5/SASC/common/programs/seqtk/seqtk/seqtk"
},
"cutadapt"
:
{
"exe"
:
"/home/pjvan_thof/.local/bin/cutadapt"
},
"sickle"
:
{
"exe"
:
"/data/DIV5/SASC/pjvan_thof/bin/sickle"
}
}
}
biopet-framework/pom.xml
View file @
1ddd45ad
...
...
@@ -91,6 +91,8 @@
<filter>
<artifact>
org.broadinstitute.gatk:gatk-queue-package-distribution
</artifact>
<includes>
<include>
com/sun/**
</include>
<include>
org/ggf/**
</include>
<include>
picard/**
</include>
<include>
htsjdk/**
</include>
<include>
javassist/**
</include>
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala
View file @
1ddd45ad
...
...
@@ -9,6 +9,7 @@ import nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling
import
nl.lumc.sasc.biopet.pipelines.gatk.GatkVcfSampleCompare
import
nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import
nl.lumc.sasc.biopet.pipelines.sage.Sage
object
BiopetExecutable
{
...
...
@@ -21,7 +22,8 @@ object BiopetExecutable {
"gatk-genotyping"
->
GatkGenotyping
,
"gatk-variantcalling"
->
GatkVariantcalling
,
"gatk-pipeline"
->
GatkPipeline
,
"gatk-vcf-sample-compare"
->
GatkVcfSampleCompare
"gatk-vcf-sample-compare"
->
GatkVcfSampleCompare
,
"sage"
->
Sage
)
/**
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
1ddd45ad
...
...
@@ -73,9 +73,9 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
def
biopetScript
()
{
runInitialJobs
()
if
(
paired
)
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputFiles
(
"fastq_input_R2"
),
outputDir
)
else
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputDir
)
val
out
=
if
(
paired
)
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputFiles
(
"fastq_input_R2"
),
outputDir
)
else
runTrimClip
(
outputFiles
(
"fastq_input_R1"
),
outputDir
)
runFinalize
(
List
(
outputFiles
(
"output_R1"
)),
if
(
outputFiles
.
contains
(
"output_R2"
))
List
(
outputFiles
(
"output_R2"
))
else
List
())
}
...
...
@@ -115,13 +115,13 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
return
fastqcToContams
.
out
}
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
,
chunk
:
String
)
{
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
,
chunk
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
new
File
(
""
),
outDir
,
chunk
)
}
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
)
{
def
runTrimClip
(
R1_in
:
File
,
outDir
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
new
File
(
""
),
outDir
,
""
)
}
def
runTrimClip
(
R1_in
:
File
,
R2_in
:
File
,
outDir
:
String
)
{
def
runTrimClip
(
R1_in
:
File
,
R2_in
:
File
,
outDir
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
runTrimClip
(
R1_in
,
R2_in
,
outDir
,
""
)
}
def
runTrimClip
(
R1_in
:
File
,
R2_in
:
File
,
outDir
:
String
,
chunkarg
:
String
)
:
(
File
,
File
,
List
[
File
])
=
{
...
...
@@ -227,8 +227,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
add
(
Gzip
(
this
,
fastq_R1
,
R1
))
if
(
paired
)
add
(
Gzip
(
this
,
fastq_R2
,
R2
))
outputFiles
+=
(
"output_R1"
->
R1
)
if
(
paired
)
outputFiles
+=
(
"output_R2"
->
R2
)
outputFiles
+=
(
"output_R1
_gzip
"
->
R1
)
if
(
paired
)
outputFiles
+=
(
"output_R2
_gzip
"
->
R2
)
if
(!
skipTrim
||
!
skipClip
)
{
val
md5sum_R1
=
Md5sum
(
this
,
R1
,
outputDir
)
...
...
@@ -239,15 +239,13 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
add
(
md5sum_R2
)
summary
.
addMd5sum
(
md5sum_R2
,
R2
=
true
,
after
=
true
)
}
fastqc_R1_after
=
Fastqc
(
this
,
outputFiles
(
"output_R1"
)
,
outputDir
+
"/"
+
R1_name
+
".qc.fastqc/"
)
fastqc_R1_after
=
Fastqc
(
this
,
R1
,
outputDir
+
"/"
+
R1_name
+
".qc.fastqc/"
)
add
(
fastqc_R1_after
)
summary
.
addFastqc
(
fastqc_R1_after
,
after
=
true
)
outputFiles
+=
(
"fastqc_R1_final"
->
fastqc_R1_after
.
output
)
if
(
paired
)
{
fastqc_R2_after
=
Fastqc
(
this
,
outputFiles
(
"output_R2"
)
,
outputDir
+
"/"
+
R2_name
+
".qc.fastqc/"
)
fastqc_R2_after
=
Fastqc
(
this
,
R2
,
outputDir
+
"/"
+
R2_name
+
".qc.fastqc/"
)
add
(
fastqc_R2_after
)
summary
.
addFastqc
(
fastqc_R2_after
,
R2
=
true
,
after
=
true
)
outputFiles
+=
(
"fastqc_R2_final"
->
fastqc_R2_after
.
output
)
}
}
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala
0 → 100644
View file @
1ddd45ad
package
nl.lumc.sasc.biopet.pipelines.sage
import
nl.lumc.sasc.biopet.core.
{
MultiSampleQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.Cat
import
nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsCoverage
import
nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import
nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import
nl.lumc.sasc.biopet.scripts.PrefixFastq
import
nl.lumc.sasc.biopet.scripts.SquishBed
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.function._
class
Sage
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"countBed"
,
required
=
false
)
var
countBed
:
File
=
_
@Input
(
doc
=
"squishedCountBed, by suppling this file the auto squish job will be skipped"
,
required
=
false
)
var
squishedCountBed
:
File
=
_
def
init
()
{
for
(
file
<-
configfiles
)
globalConfig
.
loadConfigFile
(
file
)
if
(
countBed
==
null
&&
squishedCountBed
==
null
)
throw
new
IllegalStateException
(
"No bedfile supplied, please add a countBed or squishedCountBed"
)
}
def
biopetScript
()
{
if
(
squishedCountBed
==
null
)
{
val
squishBed
=
SquishBed
.
apply
(
this
,
countBed
,
outputDir
)
add
(
squishBed
)
squishedCountBed
=
squishBed
.
output
}
// Tag library creation
runSamplesJobs
}
// Called for each sample
def
runSingleSampleJobs
(
sampleConfig
:
Map
[
String
,
Any
])
:
Map
[
String
,
List
[
File
]]
=
{
var
outputFiles
:
Map
[
String
,
List
[
File
]]
=
Map
()
var
libraryBamfiles
:
List
[
File
]
=
List
()
var
libraryFastqFiles
:
List
[
File
]
=
List
()
val
sampleID
:
String
=
sampleConfig
(
"ID"
).
toString
val
sampleDir
:
String
=
outputDir
+
sampleID
+
"/"
for
((
library
,
libraryFiles
)
<-
runLibraryJobs
(
sampleConfig
))
{
libraryFastqFiles
+:=
libraryFiles
(
"prefix_fastq"
)
libraryBamfiles
+:=
libraryFiles
(
"FinalBam"
)
}
val
bamFile
:
File
=
if
(
libraryBamfiles
.
size
==
1
)
libraryBamfiles
.
head
else
if
(
libraryBamfiles
.
size
>
1
)
{
val
mergeSamFiles
=
MergeSamFiles
(
this
,
libraryBamfiles
,
sampleDir
)
add
(
mergeSamFiles
)
mergeSamFiles
.
output
}
else
null
val
fastqFile
:
File
=
if
(
libraryBamfiles
.
size
==
1
)
libraryBamfiles
.
head
else
if
(
libraryBamfiles
.
size
>
1
)
{
val
cat
=
Cat
.
apply
(
this
,
libraryBamfiles
,
sampleDir
+
sampleID
+
".fastq"
)
add
(
cat
)
cat
.
output
}
else
null
this
.
addBedtoolsCounts
(
bamFile
,
sampleID
,
sampleDir
)
return
outputFiles
}
// Called for each run from a sample
def
runSingleLibraryJobs
(
runConfig
:
Map
[
String
,
Any
],
sampleConfig
:
Map
[
String
,
Any
])
:
Map
[
String
,
File
]
=
{
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
val
runID
:
String
=
runConfig
(
"ID"
).
toString
val
sampleID
:
String
=
sampleConfig
(
"ID"
).
toString
val
runDir
:
String
=
outputDir
+
sampleID
+
"/run_"
+
runID
+
"/"
if
(
runConfig
.
contains
(
"R1"
))
{
val
flexiprep
=
new
Flexiprep
(
this
)
flexiprep
.
outputDir
=
runDir
+
"flexiprep/"
flexiprep
.
input_R1
=
new
File
(
runConfig
(
"R1"
).
toString
)
flexiprep
.
skipClip
=
true
flexiprep
.
skipTrim
=
true
flexiprep
.
sampleName
=
sampleID
flexiprep
.
libraryName
=
runID
flexiprep
.
init
flexiprep
.
biopetScript
addAll
(
flexiprep
.
functions
)
val
flexiprepOutput
=
for
((
key
,
file
)
<-
flexiprep
.
outputFiles
if
key
.
endsWith
(
"output_R1"
))
yield
file
val
prefixFastq
=
PrefixFastq
.
apply
(
this
,
flexiprepOutput
.
head
,
runDir
)
prefixFastq
.
prefix
=
config
(
"sage_tag"
,
default
=
"CATG"
)
add
(
prefixFastq
)
outputFiles
+=
(
"prefix_fastq"
->
prefixFastq
.
output
)
val
mapping
=
new
Mapping
(
this
)
mapping
.
skipFlexiprep
=
true
mapping
.
input_R1
=
prefixFastq
.
output
mapping
.
RGLB
=
runConfig
(
"ID"
).
toString
mapping
.
RGSM
=
sampleConfig
(
"ID"
).
toString
if
(
runConfig
.
contains
(
"PL"
))
mapping
.
RGPL
=
runConfig
(
"PL"
).
toString
if
(
runConfig
.
contains
(
"PU"
))
mapping
.
RGPU
=
runConfig
(
"PU"
).
toString
if
(
runConfig
.
contains
(
"CN"
))
mapping
.
RGCN
=
runConfig
(
"CN"
).
toString
mapping
.
outputDir
=
runDir
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
if
(
config
(
"library_counts"
,
default
=
false
).
getBoolean
)
this
.
addBedtoolsCounts
(
mapping
.
outputFiles
(
"finalBamFile"
),
sampleID
+
"-"
+
runID
,
runDir
)
outputFiles
+=
(
"FinalBam"
->
mapping
.
outputFiles
(
"finalBamFile"
))
}
else
this
.
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run: "
+
runConfig
)
return
outputFiles
}
def
addBedtoolsCounts
(
bamFile
:
File
,
outputPrefix
:
String
,
outputDir
:
String
)
{
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".sense.count"
,
depth
=
false
,
sameStrand
=
true
,
diffStrand
=
false
))
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".antisense.count"
,
depth
=
false
,
sameStrand
=
false
,
diffStrand
=
true
))
add
(
BedtoolsCoverage
(
this
,
bamFile
,
squishedCountBed
,
outputDir
+
outputPrefix
+
".count"
,
depth
=
false
,
sameStrand
=
false
,
diffStrand
=
false
))
}
}
object
Sage
extends
PipelineCommand
{
override
val
pipeline
=
"/nl/lumc/sasc/biopet/pipelines/sage/Sage.class"
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala
0 → 100644
View file @
1ddd45ad
package
nl.lumc.sasc.biopet.scripts
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
java.io.File
class
SquishBed
(
val
root
:
Configurable
)
extends
PythonCommandLineFunction
{
setPythonScript
(
"bed_squish.py"
)
@Input
(
doc
=
"Input file"
)
var
input
:
File
=
_
@Output
(
doc
=
"output File"
)
var
output
:
File
=
_
def
cmdLine
=
getPythonCommand
+
required
(
input
)
+
required
(
output
)
}
object
SquishBed
{
def
apply
(
root
:
Configurable
,
input
:
File
,
outputDir
:
String
)
:
SquishBed
=
{
val
squishBed
=
new
SquishBed
(
root
)
squishBed
.
input
=
input
squishBed
.
output
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bed"
)
+
".squish.bed"
)
return
squishBed
}
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/prefixFastq.scala
0 → 100644
View file @
1ddd45ad
package
nl.lumc.sasc.biopet.scripts
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
import
java.io.File
class
PrefixFastq
(
val
root
:
Configurable
)
extends
PythonCommandLineFunction
{
setPythonScript
(
"prefixFastq.py"
)
@Input
(
doc
=
"Input file"
)
var
input
:
File
=
_
@Output
(
doc
=
"output File"
)
var
output
:
File
=
_
@Argument
(
doc
=
"Prefix sequence"
)
var
prefix
:
String
=
"CATG"
@Argument
(
doc
=
"Input file is gziped"
,
required
=
false
)
var
gzip
:
Boolean
=
_
override
def
beforeCmd
{
if
(
input
.
getName
.
endsWith
(
".gzip"
)
||
input
.
getName
.
endsWith
(
"gz"
))
gzip
=
true
}
def
cmdLine
=
getPythonCommand
+
required
(
"-o"
,
output
)
+
required
(
"--prefix"
,
prefix
)
+
required
(
input
)
}
object
PrefixFastq
{
def
apply
(
root
:
Configurable
,
input
:
File
,
outputDir
:
String
)
:
PrefixFastq
=
{
val
prefixFastq
=
new
PrefixFastq
(
root
)
prefixFastq
.
input
=
input
prefixFastq
.
output
=
new
File
(
outputDir
,
input
.
getName
+
".prefix.fastq"
)
return
prefixFastq
}
}
biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/bed_squish.py
0 → 100755
View file @
1ddd45ad
#!/usr/bin/env python2
"""
Overlapping regions removal in a single BED file.
The script will adjust feature coordinates so that no overlaps are present. If
a feature is enveloped entirely by another feature, the smaller feature will be
removed and the enveloping feature split into two.
Input BED files must be position-sorted and only contain the first six fields.
Strands are taken into account when removing regions.
Requirements:
* Python == 2.7.x
* track >= 1.1.0 <http://xapple.github.io/track/>
Copyright (c) 2013 Wibowo Arindrarto <w.arindrarto@lumc.nl>
Copyright (c) 2013 LUMC Sequencing Analysis Support Core <sasc@lumc.nl>
MIT License <http://opensource.org/licenses/MIT>
"""
import
argparse
import
os
import
track
class
BEDCoord
(
object
):
"""Class representing a BED feature coordinate."""
__slots__
=
(
'feature'
,
'kind'
,
'point'
,
'start'
,
'strand'
)
def
__init__
(
self
,
feature
,
kind
,
point
,
start
,
strand
):
"""
:param feature: name of BED feature
:type feature: str
:param kind: type of coordinate, 'start' or 'end'
:type kind: str
:param point: coordinate point
:type point: int
:param start: start coordinate of the coordinate with this feature
:type start: int
:param strand: strand of the feature, 1 or -1
:type strand: int
"""
self
.
feature
=
feature
assert
kind
in
(
'start'
,
'end'
)
self
.
kind
=
kind
self
.
point
=
point
self
.
start
=
start
self
.
strand
=
strand
def
__repr__
(
self
):
return
'{0}{1}'
.
format
(
self
.
point
,
self
.
kind
[
0
].
upper
())
def
__gt__
(
self
,
other
):
if
self
.
point
==
other
.
point
:
return
self
.
start
>
other
.
start
return
self
.
point
>
other
.
point
def
__lt__
(
self
,
other
):
if
self
.
point
==
other
.
point
:
return
self
.
start
<
other
.
start
return
self
.
point
<
other
.
point
def
__ge__
(
self
,
other
):
return
self
.
point
>=
other
.
point
def
__le__
(
self
,
other
):
return
self
.
point
<=
other
.
point
def
squish_track_records
(
chrom_recs
):
"""Given an iterator for `track` records, yield squished `track` features.
:param chrom_feats: iterator returning `track` records for one chromosome
:type chrom_feats: iterator
:returns: (generator) single `track` records
:rtype: `track.pyrow.SuperRow`
"""
# Algorithm:
# 1. Flatten all coordinate points into a single list
# 2. Sort by point, resolve same point by comparing feature starts
# (already defined in BEDCoord's `__lt__` and `__gt__`)
# 3. Walk through the sorted points while keeping track of overlaps using
# a level' counter for each strand
# 4. Start coordinates increase level counters, end coordinates decrease
# them
# 5. Start coordinates of the squished features are:
# * start coordinates in the array when level == 1
# * end coordinates in the array when level == 1
# 6. End coordinates of the squished features are:
# * start coordinates in the array when level == 2
# * end coordinates in the array when level == 0
# 7. As additional checks, make sure that:
# * when yielding a record, its start coordinate <= its end coordinate
# * the level counter never falls below 0 (this doesn't make sense)
# * after all iterations are finished, the level counter == 0
# Assumes:
# 1. Input BED file is position-sorted
# 2. Coordinate points all denote closed intervals (this is handled by
# `track` for BED files already)
flat_coords
=
[]
for
rec
in
chrom_recs
:
flat_coords
.
append
(
BEDCoord
(
rec
[
2
],
'start'
,
rec
[
0
],
rec
[
0
],
rec
[
4
]))
flat_coords
.
append
(
BEDCoord
(
rec
[
2
],
'end'
,
rec
[
1
],
rec
[
0
],
rec
[
4
]))
flat_coords
.
sort
()
plus_level
,
minus_level
=
0
,
0
plus_row
=
[
0
,
0
,
""
,
0
,
1
]
minus_row
=
[
0
,
0
,
""
,
0
,
-
1
]
for
coord
in
flat_coords
:
if
coord
.
strand
==
1
:
if
coord
.
kind
==
'start'
:
plus_level
+=
1
if
plus_level
==
1
:
plus_row
[
0
]
=
coord
.
point
plus_row
[
2
]
=
coord
.
feature
elif
plus_level
==
2
:
plus_row
[
1
]
=
coord
.
point
# track uses closed coordinates already
assert
plus_row
[
0
]
<=
plus_row
[
1
]
yield
plus_row
else
:
plus_level
-=
1
if
plus_level
==
0
:
plus_row
[
1
]
=
coord
.
point
plus_row
[
2
]
=
coord
.
feature
assert
plus_row
[
0
]
<=
plus_row
[
1
]
yield
plus_row
elif
plus_level
==
1
:
plus_row
[
0
]
=
coord
.
point
assert
plus_level
>=
0
,
'Unexpected feature level: {0}'
.
format
(
plus_level
)
elif
coord
.
strand
==
-
1
:
if
coord
.
kind
==
'start'
:
minus_level
+=
1
if
minus_level
==
1
:
minus_row
[
0
]
=
coord
.
point
minus_row
[
2
]
=
coord
.
feature
elif
minus_level
==
2
:
minus_row
[
1
]
=
coord
.
point
assert
minus_row
[
0
]
<=
minus_row
[
1
]
yield
minus_row
else
:
minus_level
-=
1
if
minus_level
==
0
:
minus_row
[
1
]
=
coord
.
point
minus_row
[
2
]
=
coord
.
feature
assert
minus_row
[
0
]
<=
minus_row
[
1
]
yield
minus_row
elif
minus_level
==
1
:
minus_row
[
0
]
=
coord
.
point
assert
minus_level
>=
0
,
'Unexpected feature level: {0}'
.
format
(
minus_level
)
assert
plus_level
==
0
,
'Unexpected end plus feature level: '
\
'{0}'
.
format
(
plus_level
)
assert
minus_level
==
0
,
'Unexpected end minus feature level: '
\
'{0}'
.
format
(
minus_level
)
def
squish_bed
(
in_file
,
out_file
):
"""Removes all overlapping regions in the input BED file, writing to the
output BED file.
:param in_file: path to input BED file
:type in_file: str
:param out_file: path to output BED file
:type out_file: str
"""
# check for input file presence, remove output file if it already exists
assert
os
.
path
.
exists
(
in_file
),
'Required input file {0} does not '
\
'exist'
.
format
(
in_file
)
if
os
.
path
.
exists
(
out_file
):
os
.
unlink
(
out_file
)
with
track
.
load
(
in_file
,
readonly
=
True
)
as
in_track
,
\
track
.
new
(
out_file
,
format
=
'bed'
)
as
out_track
:
for
chrom
in
in_track
.
chromosomes
:
chrom_rec
=
in_track
.
read
(
chrom
)
out_track
.
write
(
chrom
,
squish_track_records
(
chrom_rec
))
if
__name__
==
'__main__'
:
usage
=
__doc__
.
split
(
'
\n\n\n
'
)
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
usage
[
0
],
epilog
=
usage
[
1
])
parser
.
add_argument
(
'input'
,
type
=
str
,
help
=
'Path to input BED file'
)
parser
.
add_argument
(
'output'
,
type
=
str
,
help
=
'Path to output BED file'
)
args
=
parser
.
parse_args
()
squish_bed
(
args
.
input
,
args
.
output
)
biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/prefixFastq.py
0 → 100755
View file @
1ddd45ad
#!/usr/bin/env python
# prep_deepsage.py
#
# Script for preprocessing deepSAGE sequencing samples.
#
# Adapted from: http://galaxy.nbic.nl/u/pacthoen/w/deepsagelumc
import
argparse
from
Bio
import
SeqIO
PREFIX
=
'CATG'
def
prep_deepsage
(
input_fastq
,
output_fastq
,
is_gzip
,
seq
):
if
is_gzip
:
import
gzip
opener
=
gzip
.
GzipFile
else
:
opener
=
open
with
opener
(
input_fastq
,
'r'
)
as
in_handle
,
open
(
output_fastq
,
'w'
)
as
\
out_handle
:
# adapted from fastools.fastools.add
# not importing since we also need to cut away parts of the sequence
for
rec
in
SeqIO
.
parse
(
in_handle
,
'fastq'
):
qual
=
rec
.
letter_annotations
[
'phred_quality'
]
rec
.
letter_annotations
=
{}
rec
.
seq
=
seq
+
rec
.
seq
rec
.
letter_annotations
=
{
'phred_quality'
:
[
40
]
*
len
(
seq
)
+
qual
}
SeqIO
.
write
(
rec
,
out_handle
,
"fastq"
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'input_fastq'
,
type
=
str
,
help
=
"Path to input FASTQ file"
)
parser
.
add_argument
(
'-o'
,
'--output'
,
type
=
str
,
dest
=
'output_fastq'
,
default
=
"prep_deepsage_output.fq"
,
help
=
"Path to output FASTQ file"
)
parser
.
add_argument
(
'--gzip'
,
dest
=
'gzip'
,
action
=
'store_true'
,
help
=
"Whether input FASTQ file is gzipped or not."
)
parser
.
add_argument
(
'--prefix'
,
dest
=
'prefix'
,
action
=
'store_true'
,
default
=
PREFIX
,
help
=
"Whether input FASTQ file is gzipped or not."
)
args
=
parser
.
parse_args
()
prep_deepsage
(
args
.
input_fastq
,
args
.
output_fastq
,
args
.
gzip
,
args
.
prefix
)