Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
75834095
Commit
75834095
authored
Jun 02, 2016
by
bow
Browse files
Merge branch 'feature-imptute_to_vcf' into 'develop'
Extract impute to vcf as separated pipeline See merge request !413
parents
4c3c746b
0e2beeb1
Changes
7
Hide whitespace changes
Inline
Side-by-side
biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala
View file @
75834095
...
...
@@ -34,6 +34,7 @@ object BiopetExecutableMain extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gears
.
GearsSingle
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gears
.
Gears
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gwastest
.
GwasTest
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
gwastest
.
impute
.
Impute2Vcf
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
shiva
.
ShivaVariantcalling
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
basty
.
Basty
,
nl
.
lumc
.
sasc
.
biopet
.
pipelines
.
shiva
.
Shiva
,
...
...
gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala
View file @
75834095
...
...
@@ -15,97 +15,35 @@
package
nl.lumc.sasc.biopet.pipelines.gwastest
import
java.io.File
import
java.util
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.Snptest
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CatVariants
,
SelectVariants
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GensToVcf
,
SnptestToVcf
}
import
nl.lumc.sasc.biopet.pipelines.gwastest.impute.ImputeOutput
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.extensions.tools.SnptestToVcf
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
/**
* Created by pjvanthof on 16/03/16.
*/
class
GwasTest
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
with
Reference
{
def
this
()
=
this
(
null
)
import
GwasTest._
val
inputVcf
:
Option
[
File
]
=
config
(
"input_vcf"
)
val
inputVcf
:
File
=
config
(
"input_vcf"
)
val
phenotypeFile
:
File
=
config
(
"phenotype_file"
)
val
specsFile
:
Option
[
File
]
=
config
(
"imute_specs_file"
)
val
inputGens
:
List
[
GensInput
]
=
if
(
inputVcf
.
isDefined
)
List
[
GensInput
]()
else
config
(
"input_gens"
,
default
=
Nil
).
asList
.
map
{
case
value
:
Map
[
String
,
Any
]
=>
GensInput
(
new
File
(
value
(
"genotypes"
).
toString
),
value
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
(
"contig"
).
toString
)
case
value
:
util.LinkedHashMap
[
String
,
_
]
=>
GensInput
(
new
File
(
value
.
get
(
"genotypes"
).
toString
),
value
.
toMap
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
.
get
(
"contig"
).
toString
)
case
_
=>
throw
new
IllegalArgumentException
}
++
(
specsFile
match
{
case
Some
(
file
)
=>
imputeSpecsToGensInput
(
file
,
config
(
"validate_specs"
,
default
=
true
))
case
_
=>
Nil
})
override
def
dictRequired
=
true
override
def
defaults
=
Map
(
"snptest"
->
Map
(
"genotype_field"
->
"GP"
))
/** Init for pipeline */
def
init
()
:
Unit
=
{
inputGens
.
foreach
{
g
=>
val
referenceDict
=
new
FastaSequenceFile
(
referenceFasta
(),
true
).
getSequenceDictionary
if
(
referenceDict
.
getSequenceIndex
(
g
.
contig
)
==
-
1
)
Logging
.
addError
(
s
"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}"
)
}
}
/** Pipeline itself */
def
biopetScript
()
:
Unit
=
{
val
(
vcfFile
,
chrVcfFiles
)
:
(
File
,
Map
[
String
,
File
])
=
inputVcf
.
map
((
_
,
Map
[
String
,
File
]())).
getOrElse
{
require
(
inputGens
.
nonEmpty
,
"No vcf file or gens files defined in config"
)
val
outputDirGens
=
new
File
(
outputDir
,
"gens_to_vcf"
)
val
cvTotal
=
new
CatVariants
(
this
)
cvTotal
.
assumeSorted
=
true
cvTotal
.
outputFile
=
new
File
(
outputDirGens
,
"merge.gens.vcf.gz"
)
val
chrGens
=
inputGens
.
groupBy
(
_
.
contig
).
map
{
case
(
contig
,
gens
)
=>
val
cvChr
=
new
CatVariants
(
this
)
cvChr
.
assumeSorted
=
true
//cvChr.isIntermediate = true
cvChr
.
outputFile
=
new
File
(
outputDirGens
,
s
"$contig.merge.gens.vcf.gz"
)
gens
.
zipWithIndex
.
foreach
{
gen
=>
val
gensToVcf
=
new
GensToVcf
(
this
)
gensToVcf
.
inputGens
=
gen
.
_1
.
genotypes
gensToVcf
.
inputInfo
=
gen
.
_1
.
info
gensToVcf
.
contig
=
gen
.
_1
.
contig
gensToVcf
.
samplesFile
=
phenotypeFile
gensToVcf
.
outputVcf
=
new
File
(
outputDirGens
,
gen
.
_1
.
genotypes
.
getName
+
s
".${gen._2}.vcf.gz"
)
gensToVcf
.
isIntermediate
=
true
add
(
gensToVcf
)
cvChr
.
variant
:+=
gensToVcf
.
outputVcf
}
add
(
cvChr
)
cvTotal
.
variant
:+=
cvChr
.
outputFile
contig
->
cvChr
.
outputFile
}
add
(
cvTotal
)
(
cvTotal
.
outputFile
,
chrGens
)
}
val
snpTests
=
BedRecordList
.
fromReference
(
referenceFasta
())
.
scatter
(
config
(
"bin_size"
,
default
=
1000000
))
.
allRecords
.
map
{
region
=>
...
...
@@ -119,7 +57,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
bedFile
.
deleteOnExit
()
val
sv
=
new
SelectVariants
(
this
)
sv
.
variant
=
chrVcfFiles
.
getOrElse
(
region
.
chr
,
vcfFile
)
sv
.
variant
=
inputVcf
sv
.
out
=
new
File
(
regionDir
,
s
"$name.vcf.gz"
)
sv
.
intervals
:+=
bedFile
sv
.
isIntermediate
=
true
...
...
@@ -147,11 +85,4 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
}
}
object
GwasTest
extends
PipelineCommand
{
case
class
GensInput
(
genotypes
:
File
,
info
:
Option
[
File
],
contig
:
String
)
def
imputeSpecsToGensInput
(
specsFile
:
File
,
validate
:
Boolean
=
true
)
:
List
[
GensInput
]
=
{
ImputeOutput
.
readSpecsFile
(
specsFile
,
validate
)
.
map
(
x
=>
GensInput
(
x
.
gens
,
Some
(
x
.
gensInfo
),
x
.
chromosome
))
}
}
\ No newline at end of file
object
GwasTest
extends
PipelineCommand
\ No newline at end of file
gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Impute2Vcf.scala
0 → 100644
View file @
75834095
package
nl.lumc.sasc.biopet.pipelines.gwastest.impute
import
java.io.File
import
java.util
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.gatk.CatVariants
import
nl.lumc.sasc.biopet.extensions.tools.GensToVcf
import
nl.lumc.sasc.biopet.pipelines.gwastest.impute.Impute2Vcf.GensInput
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 27-5-16.
*/
class
Impute2Vcf
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
with
Reference
{
def
this
()
=
this
(
null
)
val
specsFile
:
Option
[
File
]
=
config
(
"imute_specs_file"
)
val
phenotypeFile
:
File
=
config
(
"phenotype_file"
)
val
inputGens
:
List
[
GensInput
]
=
config
(
"input_gens"
,
default
=
Nil
).
asList
.
map
{
case
value
:
Map
[
String
,
Any
]
=>
GensInput
(
new
File
(
value
(
"genotypes"
).
toString
),
value
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
(
"contig"
).
toString
)
case
value
:
util.LinkedHashMap
[
String
,
_
]
=>
GensInput
(
new
File
(
value
.
get
(
"genotypes"
).
toString
),
value
.
toMap
.
get
(
"info"
).
map
(
x
=>
new
File
(
x
.
toString
)),
value
.
get
(
"contig"
).
toString
)
case
_
=>
throw
new
IllegalArgumentException
}
++
(
specsFile
match
{
case
Some
(
file
)
=>
Impute2Vcf
.
imputeSpecsToGensInput
(
file
,
config
(
"validate_specs"
,
default
=
true
))
case
_
=>
Nil
})
/** Init for pipeline */
def
init
()
:
Unit
=
{
inputGens
.
foreach
{
g
=>
val
referenceDict
=
new
FastaSequenceFile
(
referenceFasta
(),
true
).
getSequenceDictionary
if
(
referenceDict
.
getSequenceIndex
(
g
.
contig
)
==
-
1
)
Logging
.
addError
(
s
"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}"
)
}
}
/** Pipeline itself */
def
biopetScript
()
:
Unit
=
{
require
(
inputGens
.
nonEmpty
,
"No vcf file or gens files defined in config"
)
val
cvTotal
=
new
CatVariants
(
this
)
cvTotal
.
assumeSorted
=
true
cvTotal
.
outputFile
=
new
File
(
outputDir
,
"merge.gens.vcf.gz"
)
val
chrGens
=
inputGens
.
groupBy
(
_
.
contig
).
map
{
case
(
contig
,
gens
)
=>
val
cvChr
=
new
CatVariants
(
this
)
cvChr
.
assumeSorted
=
true
//cvChr.isIntermediate = true
cvChr
.
outputFile
=
new
File
(
outputDir
,
s
"$contig.merge.gens.vcf.gz"
)
gens
.
zipWithIndex
.
foreach
{
gen
=>
val
gensToVcf
=
new
GensToVcf
(
this
)
gensToVcf
.
inputGens
=
gen
.
_1
.
genotypes
gensToVcf
.
inputInfo
=
gen
.
_1
.
info
gensToVcf
.
contig
=
gen
.
_1
.
contig
gensToVcf
.
samplesFile
=
phenotypeFile
gensToVcf
.
outputVcf
=
new
File
(
outputDir
,
gen
.
_1
.
genotypes
.
getName
+
s
".${gen._2}.vcf.gz"
)
gensToVcf
.
isIntermediate
=
true
add
(
gensToVcf
)
cvChr
.
variant
:+=
gensToVcf
.
outputVcf
}
add
(
cvChr
)
cvTotal
.
variant
:+=
cvChr
.
outputFile
contig
->
cvChr
.
outputFile
}
add
(
cvTotal
)
}
}
object
Impute2Vcf
extends
PipelineCommand
{
case
class
GensInput
(
genotypes
:
File
,
info
:
Option
[
File
],
contig
:
String
)
def
imputeSpecsToGensInput
(
specsFile
:
File
,
validate
:
Boolean
=
true
)
:
List
[
GensInput
]
=
{
ImputeOutput
.
readSpecsFile
(
specsFile
,
validate
)
.
map
(
x
=>
GensInput
(
x
.
gens
,
Some
(
x
.
gensInfo
),
x
.
chromosome
))
}
}
\ No newline at end of file
gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala
View file @
75834095
...
...
@@ -18,6 +18,7 @@ import java.io.File
import
java.nio.file.Paths
import
com.google.common.io.Files
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Config
import
org.broadinstitute.gatk.queue.QSettings
import
org.scalatest.Matchers
...
...
@@ -39,6 +40,7 @@ class GwasTestTest extends TestNGSuite with Matchers {
@Test
def
testFromVcf
:
Unit
=
{
Logging
.
errors
.
clear
()
val
pipeline
=
initPipeline
(
GwasTestTest
.
config
++
Map
(
"input_vcf"
->
GwasTestTest
.
vcfFile
.
toString
)
...
...
@@ -46,34 +48,6 @@ class GwasTestTest extends TestNGSuite with Matchers {
pipeline
.
script
()
}
@Test
def
testFromGens
:
Unit
=
{
val
pipeline
=
initPipeline
(
GwasTestTest
.
config
++
Map
(
"input_gens"
->
List
(
Map
(
"genotypes"
->
GwasTestTest
.
vcfFile
,
"contig"
->
"chrQ"
))
)
)
pipeline
.
script
()
}
@Test
def
testWrongContig
:
Unit
=
{
val
pipeline
=
initPipeline
(
GwasTestTest
.
config
++
Map
(
"input_gens"
->
List
(
Map
(
"genotypes"
->
GwasTestTest
.
vcfFile
,
"contig"
->
"chrBla"
))
)
)
intercept
[
IllegalStateException
]
{
pipeline
.
script
()
}
}
@Test
def
testFromSpecs
:
Unit
=
{
val
pipeline
=
initPipeline
(
GwasTestTest
.
config
++
Map
(
"imute_specs_file"
->
GwasTestTest
.
resourcePath
(
"/specs/files.specs"
))
)
pipeline
.
script
()
}
@Test
def
testEmpty
:
Unit
=
{
val
pipeline
=
initPipeline
(
GwasTestTest
.
config
)
...
...
@@ -101,8 +75,8 @@ object GwasTestTest {
}
val
config
=
Map
(
"reference_fasta"
->
GwasTestTest
.
reference
.
toString
,
"phenotype_file"
->
GwasTestTest
.
phenotypeFile
.
toString
,
"reference_fasta"
->
reference
.
toString
,
"phenotype_file"
->
phenotypeFile
.
toString
,
"output_dir"
->
outputDir
,
"snptest"
->
Map
(
"exe"
->
"test"
),
"md5sum"
->
Map
(
"exe"
->
"test"
),
...
...
gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Impute2VcfTest.scala
0 → 100644
View file @
75834095
package
nl.lumc.sasc.biopet.pipelines.gwastest.impute
import
java.io.File
import
java.nio.file.Paths
import
com.google.common.io.Files
import
nl.lumc.sasc.biopet.utils.config.Config
import
org.broadinstitute.gatk.queue.QSettings
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by pjvan_thof on 31-5-16.
*/
class
Impute2VcfTest
extends
TestNGSuite
with
Matchers
{
def
initPipeline
(
map
:
Map
[
String
,
Any
])
:
Impute2Vcf
=
{
new
Impute2Vcf
{
override
def
configNamespace
=
"impute2vcf"
override
def
globalConfig
=
new
Config
(
map
)
qSettings
=
new
QSettings
qSettings
.
runName
=
"test"
}
}
@Test
def
testFromGens
:
Unit
=
{
val
pipeline
=
initPipeline
(
Impute2VcfTest
.
config
++
Map
(
"input_gens"
->
List
(
Map
(
"genotypes"
->
Impute2VcfTest
.
vcfFile
,
"contig"
->
"chrQ"
))
)
)
pipeline
.
script
()
}
@Test
def
testWrongContig
:
Unit
=
{
val
pipeline
=
initPipeline
(
Impute2VcfTest
.
config
++
Map
(
"input_gens"
->
List
(
Map
(
"genotypes"
->
Impute2VcfTest
.
vcfFile
,
"contig"
->
"chrBla"
))
)
)
intercept
[
IllegalStateException
]
{
pipeline
.
script
()
}
}
@Test
def
testFromSpecs
:
Unit
=
{
val
pipeline
=
initPipeline
(
Impute2VcfTest
.
config
++
Map
(
"imute_specs_file"
->
Impute2VcfTest
.
resourcePath
(
"/specs/files.specs"
))
)
pipeline
.
script
()
}
@Test
def
testEmpty
:
Unit
=
{
val
pipeline
=
initPipeline
(
Impute2VcfTest
.
config
)
intercept
[
IllegalArgumentException
]
{
pipeline
.
script
()
}
}
}
object
Impute2VcfTest
{
val
vcfFile
=
File
.
createTempFile
(
"gwas."
,
".vcf"
)
Files
.
touch
(
vcfFile
)
vcfFile
.
deleteOnExit
()
val
phenotypeFile
=
File
.
createTempFile
(
"gwas."
,
".txt"
)
phenotypeFile
.
deleteOnExit
()
val
outputDir
=
Files
.
createTempDir
()
outputDir
.
deleteOnExit
()
val
reference
=
new
File
(
resourcePath
(
"/fake_chrQ.fa"
))
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
val
config
=
Map
(
"reference_fasta"
->
reference
.
toString
,
"phenotype_file"
->
phenotypeFile
.
toString
,
"output_dir"
->
outputDir
,
"md5sum"
->
Map
(
"exe"
->
"test"
),
"gatk_jar"
->
"test"
)
}
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala
View file @
75834095
...
...
@@ -20,6 +20,7 @@
package
nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import
nl.lumc.sasc.biopet.extensions.gatk
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
/** Allele mode for Haplotypecaller */
...
...
@@ -27,9 +28,11 @@ class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller {
val
name
=
"haplotypecaller_allele"
protected
def
defaultPrio
=
5
val
alleles
:
File
=
config
(
"input_alleles"
)
def
biopetScript
()
{
val
hc
=
gatk
.
HaplotypeCaller
(
this
,
inputBams
.
values
.
toList
,
outputFile
)
hc
.
alleles
=
config
(
"input_
alleles
"
)
hc
.
alleles
=
Some
(
alleles
)
hc
.
genotyping_mode
=
Some
(
"GENOTYPE_GIVEN_ALLELES"
)
add
(
hc
)
}
...
...
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala
View file @
75834095
...
...
@@ -27,9 +27,11 @@ class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller {
val
name
=
"unifiedgenotyper_allele"
protected
def
defaultPrio
=
9
val
alleles
:
File
=
config
(
"input_alleles"
)
def
biopetScript
()
{
val
ug
=
gatk
.
UnifiedGenotyper
(
this
,
inputBams
.
values
.
toList
,
outputFile
)
ug
.
alleles
=
config
(
"input_
alleles
"
)
ug
.
alleles
=
Some
(
alleles
)
ug
.
genotyping_mode
=
Some
(
"GENOTYPE_GIVEN_ALLELES"
)
add
(
ug
)
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment