Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
dee3612b
Commit
dee3612b
authored
May 27, 2016
by
Peter van 't Hof
Browse files
Extract impute to vcf as separated pipeline
parent
ece09a76
Changes
3
Hide whitespace changes
Inline
Side-by-side
biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala
View file @
dee3612b
...
...
@@ -34,6 +34,7 @@ object BiopetExecutableMain extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gears
.
GearsSingle
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gears
.
Gears
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gwastest
.
GwasTest
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gwastest
.
impute
.
Impute2Vcf
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
shiva
.
ShivaVariantcalling
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
basty
.
Basty
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
shiva
.
Shiva
,
...
...
gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala
View file @
dee3612b
...
...
@@ -15,97 +15,35 @@
package
nl.lumc.sasc.biopet.pipelines.gwastest
import
java.io.File
import
java.util
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.Snptest
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CatVariants
,
SelectVariants
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GensToVcf
,
SnptestToVcf
}
import
nl.lumc.sasc.biopet.pipelines.gwastest.impute.ImputeOutput
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.extensions.tools.SnptestToVcf
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
/**
* Created by pjvanthof on 16/03/16.
*/
class
GwasTest
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
with
Reference
{
def
this
()
=
this
(
null
)
import
GwasTest._
val
inputVcf
:
Option
[
File
]
=
config
(
"input_vcf"
)
val
inputVcf
:
File
=
config
(
"input_vcf"
)
val
phenotypeFile
:
File
=
config
(
"phenotype_file"
)
val
specsFile
:
Option
[
File
]
=
config
(
"imute_specs_file"
)
val
inputGens
:
List
[
GensInput
]
=
if
(
inputVcf
.
isDefined
)
List
[
GensInput
]()
else
config
(
"input_gens"
,
default
=
Nil
).
asList
.
map
{
case
value
:
Map
[
String
,
Any
]
=>
GensInput
(
new
File
(
value
(
"genotypes"
).
toString
),
value
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
(
"contig"
).
toString
)
case
value
:
util.LinkedHashMap
[
String
,
_
]
=>
GensInput
(
new
File
(
value
.
get
(
"genotypes"
).
toString
),
value
.
toMap
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
.
get
(
"contig"
).
toString
)
case
_
=>
throw
new
IllegalArgumentException
}
++
(
specsFile
match
{
case
Some
(
file
)
=>
imputeSpecsToGensInput
(
file
,
config
(
"validate_specs"
,
default
=
true
))
case
_
=>
Nil
})
override
def
dictRequired
=
true
override
def
defaults
=
Map
(
"snptest"
->
Map
(
"genotype_field"
->
"GP"
))
/** Init for pipeline */
def
init
()
:
Unit
=
{
inputGens
.
foreach
{
g
=>
val
referenceDict
=
new
FastaSequenceFile
(
referenceFasta
(),
true
).
getSequenceDictionary
if
(
referenceDict
.
getSequenceIndex
(
g
.
contig
)
==
-
1
)
Logging
.
addError
(
s
"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}"
)
}
}
/** Pipeline itself */
def
biopetScript
()
:
Unit
=
{
val
(
vcfFile
,
chrVcfFiles
)
:
(
File
,
Map
[
String
,
File
])
=
inputVcf
.
map
((
_
,
Map
[
String
,
File
]())).
getOrElse
{
require
(
inputGens
.
nonEmpty
,
"No vcf file or gens files defined in config"
)
val
outputDirGens
=
new
File
(
outputDir
,
"gens_to_vcf"
)
val
cvTotal
=
new
CatVariants
(
this
)
cvTotal
.
assumeSorted
=
true
cvTotal
.
outputFile
=
new
File
(
outputDirGens
,
"merge.gens.vcf.gz"
)
val
chrGens
=
inputGens
.
groupBy
(
_
.
contig
).
map
{
case
(
contig
,
gens
)
=>
val
cvChr
=
new
CatVariants
(
this
)
cvChr
.
assumeSorted
=
true
//cvChr.isIntermediate = true
cvChr
.
outputFile
=
new
File
(
outputDirGens
,
s
"$contig.merge.gens.vcf.gz"
)
gens
.
zipWithIndex
.
foreach
{
gen
=>
val
gensToVcf
=
new
GensToVcf
(
this
)
gensToVcf
.
inputGens
=
gen
.
_1
.
genotypes
gensToVcf
.
inputInfo
=
gen
.
_1
.
info
gensToVcf
.
contig
=
gen
.
_1
.
contig
gensToVcf
.
samplesFile
=
phenotypeFile
gensToVcf
.
outputVcf
=
new
File
(
outputDirGens
,
gen
.
_1
.
genotypes
.
getName
+
s
".${gen._2}.vcf.gz"
)
gensToVcf
.
isIntermediate
=
true
add
(
gensToVcf
)
cvChr
.
variant
:+=
gensToVcf
.
outputVcf
}
add
(
cvChr
)
cvTotal
.
variant
:+=
cvChr
.
outputFile
contig
->
cvChr
.
outputFile
}
add
(
cvTotal
)
(
cvTotal
.
outputFile
,
chrGens
)
}
val
snpTests
=
BedRecordList
.
fromReference
(
referenceFasta
())
.
scatter
(
config
(
"bin_size"
,
default
=
1000000
))
.
allRecords
.
map
{
region
=>
...
...
@@ -119,7 +57,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
bedFile
.
deleteOnExit
()
val
sv
=
new
SelectVariants
(
this
)
sv
.
variant
=
chrVcfFiles
.
getOrElse
(
region
.
chr
,
vcfFile
)
sv
.
variant
=
inputVcf
sv
.
out
=
new
File
(
regionDir
,
s
"$name.vcf.gz"
)
sv
.
intervals
:+=
bedFile
sv
.
isIntermediate
=
true
...
...
@@ -147,11 +85,4 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
}
}
object
GwasTest
extends
PipelineCommand
{
case
class
GensInput
(
genotypes
:
File
,
info
:
Option
[
File
],
contig
:
String
)
def
imputeSpecsToGensInput
(
specsFile
:
File
,
validate
:
Boolean
=
true
)
:
List
[
GensInput
]
=
{
ImputeOutput
.
readSpecsFile
(
specsFile
,
validate
)
.
map
(
x
=>
GensInput
(
x
.
gens
,
Some
(
x
.
gensInfo
),
x
.
chromosome
))
}
}
\ No newline at end of file
object
GwasTest
extends
PipelineCommand
\ No newline at end of file
gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Impute2Vcf.scala
0 → 100644
View file @
dee3612b
package
nl.lumc.sasc.biopet.pipelines.gwastest.impute
import
java.io.File
import
java.util
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.gatk.CatVariants
import
nl.lumc.sasc.biopet.extensions.tools.GensToVcf
import
nl.lumc.sasc.biopet.pipelines.gwastest.impute.Impute2Vcf.GensInput
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 27-5-16.
*/
class
Impute2Vcf
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
with
Reference
{
def
this
()
=
this
(
null
)
val
specsFile
:
Option
[
File
]
=
config
(
"imute_specs_file"
)
val
phenotypeFile
:
File
=
config
(
"phenotype_file"
)
val
inputGens
:
List
[
GensInput
]
=
config
(
"input_gens"
,
default
=
Nil
).
asList
.
map
{
case
value
:
Map
[
String
,
Any
]
=>
GensInput
(
new
File
(
value
(
"genotypes"
).
toString
),
value
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
(
"contig"
).
toString
)
case
value
:
util.LinkedHashMap
[
String
,
_
]
=>
GensInput
(
new
File
(
value
.
get
(
"genotypes"
).
toString
),
value
.
toMap
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
.
get
(
"contig"
).
toString
)
case
_
=>
throw
new
IllegalArgumentException
}
++
(
specsFile
match
{
case
Some
(
file
)
=>
Impute2Vcf
.
imputeSpecsToGensInput
(
file
,
config
(
"validate_specs"
,
default
=
true
))
case
_
=>
Nil
})
/** Init for pipeline */
def
init
()
:
Unit
=
{
inputGens
.
foreach
{
g
=>
val
referenceDict
=
new
FastaSequenceFile
(
referenceFasta
(),
true
).
getSequenceDictionary
if
(
referenceDict
.
getSequenceIndex
(
g
.
contig
)
==
-
1
)
Logging
.
addError
(
s
"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}"
)
}
}
/** Pipeline itself */
def
biopetScript
()
:
Unit
=
{
require
(
inputGens
.
nonEmpty
,
"No vcf file or gens files defined in config"
)
val
cvTotal
=
new
CatVariants
(
this
)
cvTotal
.
assumeSorted
=
true
cvTotal
.
outputFile
=
new
File
(
outputDir
,
"merge.gens.vcf.gz"
)
val
chrGens
=
inputGens
.
groupBy
(
_
.
contig
).
map
{
case
(
contig
,
gens
)
=>
val
cvChr
=
new
CatVariants
(
this
)
cvChr
.
assumeSorted
=
true
//cvChr.isIntermediate = true
cvChr
.
outputFile
=
new
File
(
outputDir
,
s
"$contig.merge.gens.vcf.gz"
)
gens
.
zipWithIndex
.
foreach
{
gen
=>
val
gensToVcf
=
new
GensToVcf
(
this
)
gensToVcf
.
inputGens
=
gen
.
_1
.
genotypes
gensToVcf
.
inputInfo
=
gen
.
_1
.
info
gensToVcf
.
contig
=
gen
.
_1
.
contig
gensToVcf
.
samplesFile
=
phenotypeFile
gensToVcf
.
outputVcf
=
new
File
(
outputDir
,
gen
.
_1
.
genotypes
.
getName
+
s
".${gen._2}.vcf.gz"
)
gensToVcf
.
isIntermediate
=
true
add
(
gensToVcf
)
cvChr
.
variant
:+=
gensToVcf
.
outputVcf
}
add
(
cvChr
)
cvTotal
.
variant
:+=
cvChr
.
outputFile
contig
->
cvChr
.
outputFile
}
add
(
cvTotal
)
}
}
object
Impute2Vcf
extends
PipelineCommand
{
case
class
GensInput
(
genotypes
:
File
,
info
:
Option
[
File
],
contig
:
String
)
def
imputeSpecsToGensInput
(
specsFile
:
File
,
validate
:
Boolean
=
true
)
:
List
[
GensInput
]
=
{
ImputeOutput
.
readSpecsFile
(
specsFile
,
validate
)
.
map
(
x
=>
GensInput
(
x
.
gens
,
Some
(
x
.
gensInfo
),
x
.
chromosome
))
}
}
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment