Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
f697f3e0
Commit
f697f3e0
authored
May 30, 2017
by
Peter van 't Hof
Browse files
Adding a tool to generate a contig map
parent
8452cc90
Changes
7
Hide whitespace changes
Inline
Side-by-side
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cp.scala
0 → 100644
View file @
f697f3e0
package
nl.lumc.sasc.biopet.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* Created by pjvanthof on 30/05/2017.
*/
class
Cp
(
val
parent
:
Configurable
)
extends
BiopetCommandLineFunction
{
@Input
(
doc
=
"Source file"
,
required
=
true
)
var
source
:
File
=
_
@Output
(
doc
=
"Target file"
,
required
=
true
)
var
target
:
File
=
_
executable
=
config
(
"exe"
,
default
=
"cp"
)
/** Returns command to execute */
def
cmdLine
=
required
(
executable
)
+
required
(
source
)
+
required
(
target
)
}
object
Cp
{
def
apply
(
parent
:
Configurable
,
source
:
File
,
target
:
File
)
:
Cp
=
{
val
cp
=
new
Cp
(
parent
)
cp
.
source
=
source
cp
.
target
=
target
cp
}
}
\ No newline at end of file
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala
View file @
f697f3e0
...
...
@@ -20,9 +20,6 @@ import nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* @deprecated Use picard.util.BedToIntervalList instead
*/
class
DownloadNcbiAssembly
(
val
parent
:
Configurable
)
extends
ToolCommandFunction
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
DownloadNcbiAssembly
...
...
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/NcbiReportToContigMap.scala
0 → 100644
View file @
f697f3e0
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.tools
import
java.io.File
import
nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
class
NcbiReportToContigMap
(
val
parent
:
Configurable
)
extends
ToolCommandFunction
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
NcbiReportToContigMap
@Output
(
doc
=
"Output fasta file"
,
required
=
true
)
var
contigMap
:
File
=
_
var
outputReport
:
File
=
_
@Input
(
required
=
true
)
var
assemblyReport
:
File
=
_
var
nameHeader
:
String
=
_
override
def
defaultCoreMemory
=
4.0
override
def
cmdLine
=
super
.
cmdLine
+
required
(
"-a"
,
assemblyReport
)
+
required
(
"--report"
,
outputReport
)
+
required
(
"-o"
,
contigMap
)
+
required
(
"--nameHeader"
,
nameHeader
)
}
biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
View file @
f697f3e0
...
...
@@ -56,6 +56,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfWithVcf
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VepNormalizer
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
WipeReads
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
NcbiReportToContigMap
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
DownloadNcbiAssembly
)
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/NcbiReportToContigMap.scala
0 → 100644
View file @
f697f3e0
package
nl.lumc.sasc.biopet.tools
import
java.io.
{
File
,
PrintWriter
}
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.io.Source
/**
* Created by pjvanthof on 30/05/2017.
*/
object
NcbiReportToContigMap
extends
ToolCommand
{
case
class
Args
(
assemblyReport
:
File
=
null
,
outputFile
:
File
=
null
,
reportFile
:
Option
[
File
]
=
None
,
contigNameHeader
:
String
=
null
,
names
:
List
[
String
]
=
List
(
"Sequence-Name"
,
"UCSC-style-name"
,
"GenBank-Accn"
,
"RefSeq-Accn"
))
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'a'
,
"assembly_report"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
assemblyReport
=
x
)
}
text
"refseq ID from NCBI"
opt
[
File
](
'o'
,
"output"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
x
)
}
text
"output Fasta file"
opt
[
String
](
"nameHeader"
)
required
()
unbounded
()
valueName
"<string>"
action
{
(
x
,
c
)
=>
c
.
copy
(
contigNameHeader
=
x
)
}
text
"""
| What column to use from the NCBI report for the name of the contigs.
| All columns in the report can be used but this are the most common field to choose from:
| - 'Sequence-Name': Name of the contig within the assembly
| - 'UCSC-style-name': Name of the contig used by UCSC ( like hg19 )
| - 'RefSeq-Accn': Unique name of the contig at RefSeq (default for NCBI)"""
.
stripMargin
opt
[
String
](
"names"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
names
=
x
.
split
(
","
).
toList
)
}
text
"output Fasta file"
}
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
argsParser
=
new
OptParser
val
cmdargs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
(
throw
new
IllegalArgumentException
)
logger
.
info
(
s
"Reading ${cmdargs.assemblyReport}"
)
val
reader
=
Source
.
fromFile
(
cmdargs
.
assemblyReport
)
val
assamblyReport
=
reader
.
getLines
().
toList
reader
.
close
()
cmdargs
.
reportFile
.
foreach
{
file
=>
val
writer
=
new
PrintWriter
(
file
)
assamblyReport
.
foreach
(
writer
.
println
)
writer
.
close
()
}
val
headers
=
assamblyReport
.
filter
(
_
.
startsWith
(
"#"
))
.
last
.
stripPrefix
(
"# "
)
.
split
(
"\t"
)
.
zipWithIndex
.
toMap
val
allContigs
=
assamblyReport
.
filter
(!
_
.
startsWith
(
"#"
))
.
map
(
_
.
split
(
"\t"
))
val
altNameIds
=
cmdargs
.
names
.
filter
(
_
!=
cmdargs
.
contigNameHeader
).
map
(
headers
)
val
nameId
=
headers
(
cmdargs
.
contigNameHeader
)
val
writer
=
new
PrintWriter
(
cmdargs
.
outputFile
)
for
(
line
<-
assamblyReport
.
filter
(!
_
.
startsWith
(
"#"
)))
{
val
values
=
line
.
split
(
"\t"
)
val
altNames
=
altNameIds
.
map
(
i
=>
values
(
i
)).
filter
(
_
!=
"na"
).
distinct
writer
.
println
(
values
(
nameId
)
+
"\t"
+
altNames
.
mkString
(
";"
))
}
writer
.
close
()
}
}
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/NcbiReportToContigMapTest.scala
0 → 100644
View file @
f697f3e0
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.nio.file.Paths
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by pjvanthof on 30/05/2017.
*/
class
NcbiReportToContigMapTest
extends
TestNGSuite
with
Matchers
{
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
@Test
def
test
:
Unit
=
{
val
report
=
new
File
(
resourcePath
(
"/GCF_000844745.1.report"
))
val
output
=
File
.
createTempFile
(
"test."
,
".tsv"
)
output
.
deleteOnExit
()
NcbiReportToContigMap
.
main
(
Array
(
"-a"
,
report
.
getAbsolutePath
,
"-o"
,
output
.
getAbsolutePath
,
"--nameHeader"
,
"Sequence-Name"
))
}
}
generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomes.scala
View file @
f697f3e0
...
...
@@ -22,7 +22,7 @@ import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand}
import
nl.lumc.sasc.biopet.extensions._
import
nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import
nl.lumc.sasc.biopet.extensions.picard.NormalizeFasta
import
nl.lumc.sasc.biopet.extensions.tools.DownloadNcbiAssembly
import
nl.lumc.sasc.biopet.extensions.tools.
{
DownloadNcbiAssembly
,
NcbiReportToContigMap
}
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
...
...
@@ -79,14 +79,29 @@ class DownloadGenomes(val parent: Configurable) extends QScript with BiopetQScri
genomeConfig
.
get
(
"ncbi_assembly_report"
)
match
{
case
Some
(
assemblyReport
:
String
)
=>
val
ncbiAssemblyReport
=
new
File
(
genomeDir
,
assemblyReport
.
getName
)
add
(
Cp
(
this
,
new
File
(
assemblyReport
),
ncbiAssemblyReport
))
renderReadme
.
ncbiAssemblyReport
=
Some
(
ncbiAssemblyReport
)
val
ncbiReportToContigMap
=
new
NcbiReportToContigMap
(
this
)
ncbiReportToContigMap
.
assemblyReport
=
ncbiAssemblyReport
ncbiReportToContigMap
.
contigMap
=
new
File
(
genomeDir
,
"contig.map.tsv"
)
ncbiReportToContigMap
.
nameHeader
=
genomeConfig
.
get
(
"ncbi_assembly_header_name"
)
.
map
(
_
.
toString
)
.
getOrElse
(
"RefSeq-Accn"
)
add
(
ncbiReportToContigMap
)
val
downloadAssembly
=
new
DownloadNcbiAssembly
(
this
)
downloadAssembly
.
assemblyReport
=
new
File
(
assemblyReport
)
renderReadme
.
ncbiAssemblyReport
=
Some
(
downloadAssembly
.
assemblyReport
)
downloadAssembly
.
assemblyReport
=
ncbiAssemblyReport
downloadAssembly
.
output
=
downloadFastaFile
downloadAssembly
.
outputReport
=
new
File
(
genomeDir
,
s
"$speciesName-$genomeName.assembly.report"
)
downloadAssembly
.
nameHeader
=
genomeConfig
.
get
(
"ncbi_assembly_header_name"
).
map
(
_
.
toString
)
downloadAssembly
.
nameHeader
=
Some
(
genomeConfig
.
get
(
"ncbi_assembly_header_name"
)
.
map
(
_
.
toString
)
.
getOrElse
(
"RefSeq-Accn"
))
downloadAssembly
.
mustHaveOne
=
genomeConfig
.
get
(
"ncbi_assembly_must_have_one"
)
.
map
(
_
.
asInstanceOf
[
util.ArrayList
[
util.LinkedHashMap
[
String
,
String
]]])
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment