Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
63547f9b
Commit
63547f9b
authored
Jul 13, 2017
by
Peter van 't Hof
Browse files
Code warning found biopet tools
parent
9ca759c7
Changes
76
Hide whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala
View file @
63547f9b
...
...
@@ -22,6 +22,7 @@ import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
import
picard.annotation.
{
Gene
,
GeneAnnotationReader
}
import
scala.collection.JavaConversions._
import
scala.collection.immutable
/**
* This tool will generate Base count based on a bam file and a refflat file
...
...
@@ -277,7 +278,7 @@ object BaseCounter extends ToolCommand {
val
intronSense
=
geneCount
.
intronCounts
.
map
(
_
.
counts
.
senseBases
).
sum
val
intronAntiSense
=
geneCount
.
intronCounts
.
map
(
_
.
counts
.
antiSenseBases
).
sum
geneIntronSenseCounts
+=
intronSense
gene
Exonic
AntiSenseCounts
+=
intronAntiSense
gene
Intron
AntiSenseCounts
+=
intronAntiSense
geneIntronicWriter
.
println
(
geneCount
.
gene
.
getName
+
"\t"
+
(
intronSense
+
intronAntiSense
))
geneIntronicSenseWriter
.
println
(
geneCount
.
gene
.
getName
+
"\t"
+
intronSense
)
geneIntronicAntiSenseWriter
.
println
(
geneCount
.
gene
.
getName
+
"\t"
+
intronAntiSense
)
...
...
@@ -451,7 +452,7 @@ object BaseCounter extends ToolCommand {
bamReader
.
close
()
counter
+=
1
if
(
counter
%
1000
==
0
)
logger
.
info
(
s
"$
{
counter
}
chunks done"
)
if
(
counter
%
1000
==
0
)
logger
.
info
(
s
"$counter chunks done"
)
ThreadOutput
(
counts
.
values
.
toList
,
metaExons
,
plusMetaExons
:::
minMetaExons
)
}
...
...
@@ -512,7 +513,7 @@ object BaseCounter extends ToolCommand {
overlap
}
def
groupGenesOnOverlap
(
genes
:
Iterable
[
Gene
])
=
{
def
groupGenesOnOverlap
(
genes
:
Iterable
[
Gene
])
:
Map
[
String
,
List
[
List
[
Gene
]]]
=
{
genes
.
groupBy
(
_
.
getContig
)
.
map
{
...
...
@@ -531,13 +532,13 @@ object BaseCounter extends ToolCommand {
class
Counts
{
var
senseBases
=
0L
var
antiSenseBases
=
0L
def
totalBases
=
senseBases
+
antiSenseBases
def
totalBases
:
Long
=
senseBases
+
antiSenseBases
var
senseReads
=
0L
var
antiSenseReads
=
0L
def
totalReads
=
senseReads
+
antiSenseReads
def
totalReads
:
Long
=
senseReads
+
antiSenseReads
}
def
generateMergedExonRegions
(
gene
:
Gene
)
=
def
generateMergedExonRegions
(
gene
:
Gene
)
:
BedRecordList
=
BedRecordList
.
fromList
(
gene
...
...
@@ -548,17 +549,18 @@ object BaseCounter extends ToolCommand {
class
GeneCount
(
val
gene
:
Gene
)
{
val
counts
=
new
Counts
val
transcripts
=
gene
.
iterator
().
map
(
new
TranscriptCount
(
_
)).
toList
def
intronRegions
=
val
transcripts
:
List
[
TranscriptCount
]
=
gene
.
iterator
().
map
(
new
TranscriptCount
(
_
)).
toList
def
intronRegions
:
BedRecordList
=
BedRecordList
.
fromList
(
BedRecord
(
gene
.
getContig
,
gene
.
getStart
-
1
,
gene
.
getEnd
)
::
generateMergedExonRegions
(
gene
).
allRecords
.
toList
)
.
squishBed
(
strandSensitive
=
false
,
nameSensitive
=
false
)
val
exonCounts
=
val
exonCounts
:
immutable.Iterable
[
RegionCount
]
=
generateMergedExonRegions
(
gene
).
allRecords
.
map
(
e
=>
new
RegionCount
(
e
.
start
+
1
,
e
.
end
))
val
intronCounts
=
intronRegions
.
allRecords
.
map
(
e
=>
new
RegionCount
(
e
.
start
+
1
,
e
.
end
))
val
intronCounts
:
immutable.Iterable
[
RegionCount
]
=
intronRegions
.
allRecords
.
map
(
e
=>
new
RegionCount
(
e
.
start
+
1
,
e
.
end
))
def
addRecord
(
samRecord
:
SAMRecord
,
sense
:
Boolean
)
:
Unit
=
{
bamRecordBasesOverlap
(
samRecord
,
gene
.
getStart
,
gene
.
getEnd
,
counts
,
sense
)
...
...
@@ -570,7 +572,7 @@ object BaseCounter extends ToolCommand {
class
TranscriptCount
(
val
transcript
:
Gene
#
Transcript
)
{
val
counts
=
new
Counts
def
intronRegions
=
def
intronRegions
:
BedRecordList
=
BedRecordList
.
fromList
(
BedRecord
(
transcript
.
getGene
.
getContig
,
transcript
.
start
()
-
1
,
transcript
.
end
())
::
...
...
@@ -579,9 +581,9 @@ object BaseCounter extends ToolCommand {
.
toList
)
.
squishBed
(
strandSensitive
=
false
,
nameSensitive
=
false
)
val
exonCounts
=
transcript
.
exons
.
map
(
new
RegionCount
(
_
))
val
intronCounts
=
if
(
transcript
.
exons
.
size
>
1
)
val
exonCounts
:
Array
[
RegionCount
]
=
transcript
.
exons
.
map
(
new
RegionCount
(
_
))
val
intronCounts
:
List
[
RegionCount
]
=
if
(
transcript
.
exons
.
length
>
1
)
intronRegions
.
allRecords
.
map
(
e
=>
new
RegionCount
(
e
.
start
+
1
,
e
.
end
)).
toList
else
Nil
def
addRecord
(
samRecord
:
SAMRecord
,
sense
:
Boolean
)
:
Unit
=
{
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala
View file @
63547f9b
...
...
@@ -61,7 +61,7 @@ object BastyGenerateFasta extends ToolCommand {
(
x
,
c
)
=>
c
.
copy
(
outputConsensusVariants
=
x
)
}
text
"Consensus fasta from bam with variants from vcf file, always reference bases else 'N'"
opt
[
Unit
](
"snpsOnly"
)
unbounded
()
action
{
(
x
,
c
)
=>
opt
[
Unit
](
"snpsOnly"
)
unbounded
()
action
{
(
_
,
c
)
=>
c
.
copy
(
snpsOnly
=
true
)
}
text
"Only use snps from vcf file"
opt
[
String
](
"sampleName"
)
unbounded
()
action
{
(
x
,
c
)
=>
...
...
@@ -87,7 +87,7 @@ object BastyGenerateFasta extends ToolCommand {
val
err
:
ListBuffer
[
String
]
=
ListBuffer
()
if
(
c
.
outputConsensus
!=
null
||
c
.
outputConsensusVariants
!=
null
)
{
if
(
c
.
reference
==
null
)
err
.
add
(
"No reference suplied"
)
err
.
add
(
"No reference sup
p
lied"
)
else
{
val
index
=
new
File
(
c
.
reference
.
getAbsolutePath
+
".fai"
)
if
(!
index
.
exists
)
err
.
add
(
"Reference does not have index"
)
...
...
@@ -178,10 +178,10 @@ object BastyGenerateFasta extends ToolCommand {
if
(
variant
.
isDefined
)
{
logger
.
info
(
variant
.
get
.
_2
)
val
stripPrefix
=
if
(
variant
.
get
.
_1
.
_1
<
begin
)
begin
-
variant
.
get
.
_1
.
_1
else
0
val
stripSufix
=
if
(
variant
.
get
.
_1
.
_2
>
end
)
variant
.
get
.
_1
.
_2
-
end
else
0
val
stripSuf
f
ix
=
if
(
variant
.
get
.
_1
.
_2
>
end
)
variant
.
get
.
_1
.
_2
-
end
else
0
val
allele
=
getMaxAllele
(
variant
.
get
.
_2
)
consensusPos
+=
variant
.
get
.
_2
.
getReference
.
getBases
.
length
buffer
.
append
(
allele
.
substring
(
stripPrefix
,
allele
.
length
-
stripSufix
))
buffer
.
append
(
allele
.
substring
(
stripPrefix
,
allele
.
length
-
stripSuf
f
ix
))
}
else
{
buffer
.
append
(
consensus
(
consensusPos
))
consensusPos
+=
1
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala
View file @
63547f9b
...
...
@@ -64,7 +64,7 @@ object CheckAllelesVcfInBam extends ToolCommand {
opt
[
File
](
'b'
,
"bam"
)
unbounded
()
minOccurs
1
action
{
(
x
,
c
)
=>
c
.
copy
(
bamFiles
=
x
::
c
.
bamFiles
)
}
text
"bam file, from which the variants (VCF files) were called"
opt
[
Int
](
'm'
,
"min_mapping_quality"
)
maxOccurs
1
action
{
(
x
,
c
)
=>
opt
[
Int
](
'm'
,
"min_mapping_quality"
)
maxOccurs
1
action
{
(
_
,
c
)
=>
c
.
copy
(
minMapQual
=
c
.
minMapQual
)
}
text
"minimum mapping quality score for a read to be taken into account"
}
...
...
@@ -120,28 +120,6 @@ object CheckAllelesVcfInBam extends ToolCommand {
vcfRecord
.
getStart
+
refAllele
.
length
-
1
)
val
bamIter
=
bamReader
.
query
(
Array
(
queryInterval
),
false
)
def
filterRead
(
samRecord
:
SAMRecord
)
:
Boolean
=
{
if
(
samRecord
.
getDuplicateReadFlag
)
{
countReports
(
sample
).
duplicateReads
+=
1
return
true
}
if
(
samRecord
.
getSupplementaryAlignmentFlag
)
return
true
if
(
samRecord
.
getNotPrimaryAlignmentFlag
)
return
true
if
(
samRecord
.
getMappingQuality
<
commandArgs
.
minMapQual
)
{
countReports
(
sample
).
lowMapQualReads
+=
1
return
true
}
false
}
val
counts
=
for
(
samRecord
<-
bamIter
if
!
filterRead
(
samRecord
))
{
checkAlleles
(
samRecord
,
vcfRecord
)
match
{
case
Some
(
a
)
=>
if
(
countReports
(
sample
).
aCounts
.
contains
(
a
))
countReports
(
sample
).
aCounts
(
a
)
+=
1
else
countReports
(
sample
).
aCounts
+=
(
a
->
1
)
case
_
=>
countReports
(
sample
).
notFound
+=
1
}
}
bamIter
.
close
()
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssembly.scala
View file @
63547f9b
...
...
@@ -68,14 +68,14 @@ object DownloadNcbiAssembly extends ToolCommand {
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
argsParser
=
new
OptParser
val
cmd
a
rgs
val
cmd
A
rgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
(
throw
new
IllegalArgumentException
)
logger
.
info
(
s
"Reading ${cmd
a
rgs.assemblyReport}"
)
val
reader
=
Source
.
fromFile
(
cmd
a
rgs
.
assemblyReport
)
logger
.
info
(
s
"Reading ${cmd
A
rgs.assemblyReport}"
)
val
reader
=
Source
.
fromFile
(
cmd
A
rgs
.
assemblyReport
)
val
assamblyReport
=
reader
.
getLines
().
toList
reader
.
close
()
cmd
a
rgs
.
reportFile
.
foreach
{
file
=>
cmd
A
rgs
.
reportFile
.
foreach
{
file
=>
val
writer
=
new
PrintWriter
(
file
)
assamblyReport
.
foreach
(
writer
.
println
)
writer
.
close
()
...
...
@@ -88,12 +88,12 @@ object DownloadNcbiAssembly extends ToolCommand {
.
split
(
"\t"
)
.
zipWithIndex
.
toMap
val
nameId
=
cmd
a
rgs
.
contigNameHeader
.
map
(
x
=>
headers
(
x
))
val
nameId
=
cmd
A
rgs
.
contigNameHeader
.
map
(
x
=>
headers
(
x
))
val
lengthId
=
headers
.
get
(
"Sequence-Length"
)
val
baseUrlEutils
=
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
val
fastaWriter
=
new
PrintWriter
(
cmd
a
rgs
.
outputFile
)
val
fastaWriter
=
new
PrintWriter
(
cmd
A
rgs
.
outputFile
)
val
allContigs
=
assamblyReport
.
filter
(!
_
.
startsWith
(
"#"
))
...
...
@@ -101,23 +101,23 @@ object DownloadNcbiAssembly extends ToolCommand {
val
totalLength
=
lengthId
.
map
(
id
=>
allContigs
.
map
(
_
.
apply
(
id
).
toLong
).
sum
)
logger
.
info
(
s
"${allContigs.size} contigs found"
)
totalLength
.
foreach
(
l
=>
logger
.
info
(
s
"Total length: $
{l}
"
))
totalLength
.
foreach
(
l
=>
logger
.
info
(
s
"Total length: $
l
"
))
val
filterContigs
=
allContigs
.
filter
(
values
=>
cmd
a
rgs
.
mustNotHave
.
forall
(
x
=>
values
(
headers
(
x
.
_1
))
!=
x
.
_2
))
.
filter
(
values
=>
cmd
A
rgs
.
mustNotHave
.
forall
(
x
=>
values
(
headers
(
x
.
_1
))
!=
x
.
_2
))
.
filter
(
values
=>
cmd
a
rgs
.
mustHaveOne
.
exists
(
x
=>
values
(
headers
(
x
.
_1
))
==
x
.
_2
)
||
cmd
a
rgs
.
mustHaveOne
.
isEmpty
)
cmd
A
rgs
.
mustHaveOne
.
exists
(
x
=>
values
(
headers
(
x
.
_1
))
==
x
.
_2
)
||
cmd
A
rgs
.
mustHaveOne
.
isEmpty
)
val
filterLength
=
lengthId
.
map
(
id
=>
filterContigs
.
map
(
_
.
apply
(
id
).
toLong
).
sum
)
logger
.
info
(
s
"${filterContigs.size} contigs left after filtering"
)
filterLength
.
foreach
(
l
=>
logger
.
info
(
s
"Filtered length: $
{l}
"
))
filterLength
.
foreach
(
l
=>
logger
.
info
(
s
"Filtered length: $
l
"
))
filterContigs
.
foreach
{
values
=>
val
id
=
if
(
values
(
6
)
==
"na"
)
values
(
4
)
else
values
(
6
)
logger
.
info
(
s
"Start download $
{
id
}
"
)
val
fastaReader
=
Source
.
fromURL
(
s
"$
{
baseUrlEutils
}
/efetch.fcgi?db=nuccore&id=$
{
id
}
&retmode=text&rettype=fasta"
)
logger
.
info
(
s
"Start download $id"
)
val
fastaReader
=
Source
.
fromURL
(
s
"$baseUrlEutils/efetch.fcgi?db=nuccore&id=$id&retmode=text&rettype=fasta"
)
fastaReader
.
getLines
()
.
map
(
x
=>
nameId
.
map
(
y
=>
x
.
replace
(
">"
,
s
">${values(y)} "
)).
getOrElse
(
x
))
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
View file @
63547f9b
...
...
@@ -30,7 +30,7 @@ object ExtractAlignedFastq extends ToolCommand {
type
FastqInput
=
(
FastqRecord
,
Option
[
FastqRecord
])
/** Get the FastqRecord ID */
def
fastqId
(
rec
:
FastqRecord
)
=
rec
.
getReadHeader
.
split
(
" "
)(
0
)
def
fastqId
(
rec
:
FastqRecord
)
:
String
=
rec
.
getReadHeader
.
split
(
" "
)(
0
)
/**
* Function to create iterator over Interval given input interval string
...
...
@@ -97,7 +97,7 @@ object ExtractAlignedFastq extends ToolCommand {
inAlnReader
.
getFileHeader
.
getSequenceIndex
(
name
)
match
{
case
x
if
x
>=
0
=>
x
case
otherwise
=>
case
_
=>
throw
new
IllegalArgumentException
(
"Chromosome "
+
name
+
" is not found in the alignment file"
)
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractTagsFromGtf.scala
View file @
63547f9b
...
...
@@ -8,7 +8,7 @@ import nl.lumc.sasc.biopet.utils.annotation.Feature
import
scala.io.Source
/**
* Created by pjvan
_
thof on 8-6-17.
* Created by pjvanthof on 8-6-17.
*/
object
ExtractTagsFromGtf
extends
ToolCommand
{
case
class
Args
(
outputFile
:
File
=
null
,
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqFilter.scala
View file @
63547f9b
...
...
@@ -60,9 +60,8 @@ object FastqFilter extends ToolCommand {
var
total
=
0
var
kept
=
0
for
(
record
<-
reader
.
iterator
())
{
if
(
cmdArgs
.
idRegex
.
map
(
_
.
findFirstIn
(
record
.
getReadHeader
.
takeWhile
(
_
!=
' '
)).
isDefined
)
.
getOrElse
(
true
))
{
if
(
cmdArgs
.
idRegex
.
forall
(
_
.
findFirstIn
(
record
.
getReadHeader
.
takeWhile
(
_
!=
' '
)).
isDefined
))
{
writer
.
write
(
record
)
kept
+=
1
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala
View file @
63547f9b
...
...
@@ -58,7 +58,7 @@ object FastqSplitter extends ToolCommand {
var
counter
:
Long
=
0
while
(
reader
.
hasNext
)
{
for
(
writer
<-
output
)
{
for
(
t
<-
1
to
groupSize
if
reader
.
hasNext
)
{
for
(
_
<-
1
to
groupSize
if
reader
.
hasNext
)
{
writer
.
write
(
reader
.
next
())
counter
+=
1
if
(
counter
%
1000000
==
0
)
logger
.
info
(
counter
+
" reads processed"
)
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala
View file @
63547f9b
...
...
@@ -29,7 +29,7 @@ object FastqSync extends ToolCommand {
/** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */
private
implicit
class
FastqPair
(
fq
:
FastqRecord
)
{
lazy
val
fragId
=
idRegex
.
split
(
fq
.
getReadHeader
.
split
(
" "
)(
0
))(
0
)
lazy
val
fragId
:
String
=
idRegex
.
split
(
fq
.
getReadHeader
.
split
(
" "
)(
0
))(
0
)
}
/**
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindOverlapMatch.scala
View file @
63547f9b
...
...
@@ -47,7 +47,7 @@ object FindOverlapMatch extends ToolCommand {
opt
[
Double
](
'c'
,
"cutoff"
)
required
()
unbounded
()
valueName
"<value>"
action
{
(
x
,
c
)
=>
c
.
copy
(
cutoff
=
x
)
}
text
"minimum value to report it as pair"
opt
[
Unit
](
"use_same_names"
)
unbounded
()
valueName
"<value>"
action
{
(
x
,
c
)
=>
opt
[
Unit
](
"use_same_names"
)
unbounded
()
valueName
"<value>"
action
{
(
_
,
c
)
=>
c
.
copy
(
filterSameNames
=
false
)
}
text
"Do not compare samples with the same name"
opt
[
String
](
"rowSampleRegex"
)
unbounded
()
valueName
"<regex>"
action
{
(
x
,
c
)
=>
...
...
@@ -82,12 +82,11 @@ object FindOverlapMatch extends ToolCommand {
case
_
=>
sys
.
process
.
stdout
}
for
(
columnSample
<-
samplesColumnHeader
if
cmdArgs
.
columnSampleRegex
.
map
(
_
.
findFirstIn
(
columnSample
.
_1
).
isDefined
)
.
getOrElse
(
true
))
{
for
(
columnSample
<-
samplesColumnHeader
if
cmdArgs
.
columnSampleRegex
.
forall
(
_
.
findFirstIn
(
columnSample
.
_1
).
isDefined
))
{
val
buffer
=
ListBuffer
[(
String
,
Double
)]()
for
(
rowSample
<-
samplesRowHeader
if
cmdArgs
.
rowSampleRegex
.
map
(
_
.
findFirstIn
(
rowSample
.
_1
).
isDefined
)
.
getOrElse
(
true
)
)
{
if
cmdArgs
.
rowSampleRegex
.
forall
(
_
.
findFirstIn
(
rowSample
.
_1
).
isDefined
))
{
val
value
=
data
(
columnSample
.
_2
)(
rowSample
.
_2
).
toDouble
if
(
value
>=
cmdArgs
.
cutoff
&&
(!
cmdArgs
.
filterSameNames
||
columnSample
.
_2
!=
rowSample
.
_2
))
{
buffer
.+=((
rowSample
.
_1
,
value
))
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala
View file @
63547f9b
...
...
@@ -100,7 +100,7 @@ object FindRepeatsPacBio extends ToolCommand {
}
bamIter
.
close
()
commandArgs
.
outputFile
match
{
case
Some
(
file
)
=>
{
case
Some
(
file
)
=>
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
header
.
mkString
(
"\t"
))
writer
.
println
(
...
...
@@ -119,8 +119,7 @@ object FindRepeatsPacBio extends ToolCommand {
notSpan
).
mkString
(
"\t"
))
writer
.
close
()
}
case
_
=>
{
case
_
=>
println
(
header
.
mkString
(
"\t"
))
println
(
List
(
...
...
@@ -137,7 +136,6 @@ object FindRepeatsPacBio extends ToolCommand {
deletions
.
mkString
(
"/"
),
notSpan
).
mkString
(
"\t"
))
}
}
}
}
...
...
@@ -152,7 +150,7 @@ object FindRepeatsPacBio extends ToolCommand {
var
ins
:
List
[
Ins
]
=
Nil
var
samRecord
:
SAMRecord
=
_
override
def
toString
=
{
override
def
toString
:
String
=
{
"id: "
+
samRecord
.
getReadName
+
" beginDel: "
+
beginDel
+
" endDel: "
+
endDel
+
" dels: "
+
dels
+
" ins: "
+
ins
}
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala
View file @
63547f9b
...
...
@@ -62,7 +62,7 @@ object GensToVcf extends ToolCommand {
opt
[
String
](
'c'
,
"contig"
)
required
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
contig
=
x
)
}
text
"contig of impute file"
opt
[
Unit
](
"sortInput"
)
maxOccurs
1
action
{
(
x
,
c
)
=>
opt
[
Unit
](
"sortInput"
)
maxOccurs
1
action
{
(
_
,
c
)
=>
c
.
copy
(
sortInput
=
true
)
}
text
"In memory sorting"
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GtfToRefflat.scala
View file @
63547f9b
...
...
@@ -174,7 +174,7 @@ object GtfToRefflat extends ToolCommand {
val
writer
=
new
PrintWriter
(
refflatFile
)
for
{
(
contig
,
genes
)
<-
genesBuffer
.
values
.
toList
.
groupBy
(
_
.
contig
).
toList
.
sortBy
(
_
.
_1
)
(
_
,
genes
)
<-
genesBuffer
.
values
.
toList
.
groupBy
(
_
.
contig
).
toList
.
sortBy
(
_
.
_1
)
gene
<-
genes
.
sortBy
(
_
.
start
)
transcript
<-
gene
.
transcripts
}
{
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala
View file @
63547f9b
...
...
@@ -25,6 +25,7 @@ import nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.collection.mutable
import
scala.collection.mutable.ListBuffer
import
scala.io.Source
import
scala.util.matching.Regex
object
KrakenReportToJson
extends
ToolCommand
{
...
...
@@ -53,7 +54,7 @@ object KrakenReportToJson extends ToolCommand {
}
var
cladeIDs
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
32
)(
0
)
val
spacePattern
=
"^( +)"
.
r
val
spacePattern
:
Regex
=
"^( +)"
.
r
private
var
lines
:
Map
[
Long
,
KrakenHit
]
=
Map
.
empty
case
class
Args
(
krakenreport
:
File
=
null
,
...
...
@@ -118,7 +119,7 @@ object KrakenReportToJson extends ToolCommand {
cladeIDs
(
cladeLevel
+
1
)
=
values
(
4
).
toLong
Map
(
values
(
4
).
toLong
->
new
KrakenHit
(
values
(
4
).
toLong
->
KrakenHit
(
taxonomyID
=
values
(
4
).
toLong
,
taxonomyName
=
if
(
skipNames
)
""
else
scientificName
.
trim
,
cladeCount
=
values
(
2
).
toLong
,
...
...
@@ -168,7 +169,7 @@ object KrakenReportToJson extends ToolCommand {
}
})
val
result
=
Map
(
"unclassified"
->
lines
(
0
).
toJSON
(
withChildren
=
false
),
val
result
=
Map
(
"unclassified"
->
lines
(
0
).
toJSON
(),
"classified"
->
lines
(
1
).
toJSON
(
withChildren
=
true
))
mapToJson
(
result
).
spaces2
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala
View file @
63547f9b
...
...
@@ -86,7 +86,7 @@ object MergeAlleles extends ToolCommand {
else
output
+=
variant
.
getStart
->
List
(
variant
)
}
for
((
k
,
v
)
<-
SortedMap
(
output
.
toSeq
:
_
*
))
{
for
((
_
,
v
)
<-
SortedMap
(
output
.
toSeq
:
_
*
))
{
writer
.
add
(
mergeAlleles
(
v
))
}
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala
View file @
63547f9b
...
...
@@ -101,12 +101,11 @@ object MergeTables extends ToolCommand {
.
sorted
output
.
write
((
featureName
+:
samples
).
mkString
(
"\t"
)
+
"\n"
)
features
.
foreach
{
case
feature
=>
// get feature values for each sample (order == order of samples in header)
val
line
=
feature
+:
samples
.
map
(
results
(
_
).
getOrElse
(
feature
,
fallback
))
output
.
write
(
line
.
mkString
(
"\t"
)
+
"\n"
)
features
.
foreach
{
feature
=>
// get feature values for each sample (order == order of samples in header)
val
line
=
feature
+:
samples
.
map
(
results
(
_
).
getOrElse
(
feature
,
fallback
))
output
.
write
(
line
.
mkString
(
"\t"
)
+
"\n"
)
}
output
.
flush
()
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala
View file @
63547f9b
...
...
@@ -62,7 +62,7 @@ object MpileupToVcf extends ToolCommand {
opt
[
Double
](
"seqError"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
seqError
=
x
)
}
opt
[
Unit
](
"refCalls"
)
action
{
(
x
,
c
)
=>
opt
[
Unit
](
"refCalls"
)
action
{
(
_
,
c
)
=>
c
.
copy
(
refCalls
=
true
)
}
}
...
...
@@ -204,9 +204,9 @@ object MpileupToVcf extends ToolCommand {
for
(
a
<-
ad
.
indices
if
ad
(
a
)
>
(
if
(
max
>=
0
)
ad
(
max
)
else
-
1
)
&&
!
gt
.
contains
(
a
))
max
=
a
val
f
=
ad
(
max
).
toDouble
/
left
for
(
a
<-
0
to
floor
(
f
).
toInt
if
gt
.
size
<
commandArgs
.
ploidy
)
gt
.
append
(
max
)
for
(
_
<-
0
to
floor
(
f
).
toInt
if
gt
.
size
<
commandArgs
.
ploidy
)
gt
.
append
(
max
)
if
(
f
-
floor
(
f
)
>=
commandArgs
.
homoFraction
)
{
for
(
b
<-
p
to
commandArgs
.
ploidy
if
gt
.
size
<
commandArgs
.
ploidy
)
gt
.
append
(
max
)
for
(
_
<-
p
to
commandArgs
.
ploidy
if
gt
.
size
<
commandArgs
.
ploidy
)
gt
.
append
(
max
)
}
left
-=
ad
(
max
)
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MultiCoverage.scala
View file @
63547f9b
...
...
@@ -33,7 +33,7 @@ object MultiCoverage extends ToolCommand {
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
x
)
}
text
"output file"
opt
[
Unit
](
"mean"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
opt
[
Unit
](
"mean"
)
unbounded
()
valueName
"<file>"
action
{
(
_
,
c
)
=>
c
.
copy
(
mean
=
true
)
}
text
"By default total bases is outputed, enable this option make the output relative to region length"
}
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/NcbiReportToContigMap.scala
View file @
63547f9b
...
...
@@ -67,10 +67,6 @@ object NcbiReportToContigMap extends ToolCommand {
.
zipWithIndex
.
toMap
val
allContigs
=
assamblyReport
.
filter
(!
_
.
startsWith
(
"#"
))
.
map
(
_
.
split
(
"\t"
))
val
altNameIds
=
cmdargs
.
names
.
filter
(
_
!=
cmdargs
.
contigNameHeader
).
map
(
headers
)
val
nameId
=
headers
(
cmdargs
.
contigNameHeader
)
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
View file @
63547f9b
...
...
@@ -43,7 +43,7 @@ object RegionAfCount extends ToolCommand {
(
x
,
c
)
=>
c
.
copy
(
outputPrefix
=
x
)
}
opt
[
Unit
](
's'
,
"scatterPlot"
)
unbounded
()
action
{
(
x
,
c
)
=>
opt
[
Unit
](
's'
,
"scatterPlot"
)
unbounded
()
action
{
(
_
,
c
)
=>
c
.
copy
(
scatterpPlot
=
true
)
}
opt
[
File
](
'V'
,
"vcfFile"
)
unbounded
()
minOccurs
1
action
{
(
x
,
c
)
=>
...
...
@@ -116,7 +116,7 @@ object RegionAfCount extends ToolCommand {
afCounts
.
toMap
}).
toMap
logger
.
info
(
s
"Done reading, $
{c}
regions"
)
logger
.
info
(
s
"Done reading, $
c
regions"
)
logger
.
info
(
"Writing output files"
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment