Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
33c611e0
Commit
33c611e0
authored
May 26, 2016
by
Sander Bollen
Browse files
Merge branch 'develop' into fix-bedtools_coverage_sorted
Conflicts: bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
parents
c708df45
160ed2d3
Changes
124
Hide whitespace changes
Inline
Side-by-side
bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
View file @
33c611e0
...
...
@@ -17,15 +17,19 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import
java.io.File
import
nl.lumc.sasc.biopet.core.annotations.
{
AnnotationRefFlat
,
RibosomalRefFlat
}
import
nl.lumc.sasc.biopet.core.annotations.
{
AnnotationRefFlat
,
RibosomalRefFlat
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.core.
{
BiopetFifoPipe
,
PipelineCommand
,
Reference
,
SampleLibraryTag
}
import
nl.lumc.sasc.biopet.extensions.bedtools.
{
BedtoolsCoverage
,
BedtoolsIntersect
}
import
nl.lumc.sasc.biopet.core.
{
BiopetFifoPipe
,
PipelineCommand
,
Reference
,
SampleLibraryTag
}
import
nl.lumc.sasc.biopet.extensions.bedtools.
{
BedtoolsCoverage
,
BedtoolsIntersect
,
BedtoolsSort
}
import
nl.lumc.sasc.biopet.extensions.picard._
import
nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import
nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import
nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import
nl.lumc.sasc.biopet.utils.intervals.BedCheck
import
org.broadinstitute.gatk.queue.QScript
class
BamMetrics
(
val
root
:
Configurable
)
extends
QScript
...
...
@@ -74,6 +78,8 @@ class BamMetrics(val root: Configurable) extends QScript
/** executed before script */
def
init
()
:
Unit
=
{
inputFiles
:+=
new
InputFile
(
inputBam
)
ampliconBedFile
.
foreach
(
BedCheck
.
checkBedFileToReference
(
_
,
referenceFasta
(),
biopetError
=
true
))
roiBedFiles
.
foreach
(
BedCheck
.
checkBedFileToReference
(
_
,
referenceFasta
(),
biopetError
=
true
))
}
/** Script to add jobs */
...
...
bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala
View file @
33c611e0
...
...
@@ -61,9 +61,9 @@ class BamMetricsTest extends TestNGSuite with Matchers {
def
testBamMetrics
(
rois
:
Int
,
amplicon
:
Boolean
,
rna
:
Boolean
,
wgs
:
Boolean
)
=
{
val
map
=
ConfigUtils
.
mergeMaps
(
Map
(
"output_dir"
->
BamMetricsTest
.
outputDir
,
"rna_metrics"
->
rna
,
"wgs_metrics"
->
wgs
),
Map
(
BamMetricsTest
.
executables
.
toSeq
:
_
*
))
++
(
if
(
amplicon
)
Map
(
"amplicon_bed"
->
"amplicon.bed"
)
else
Map
())
++
(
if
(
amplicon
)
Map
(
"amplicon_bed"
->
BamMetricsTest
.
ampliconBed
.
getAbsolutePath
)
else
Map
())
++
(
if
(
rna
)
Map
(
"annotation_refflat"
->
"transcripts.refFlat"
)
else
Map
())
++
Map
(
"regions_of_interest"
->
(
1
to
rois
).
map
(
"roi_"
+
_
+
".bed"
).
toList
)
Map
(
"regions_of_interest"
->
(
1
to
rois
).
map
(
BamMetricsTest
.
roi
(
_
).
getAbsolutePath
).
toList
)
val
bammetrics
:
BamMetrics
=
initPipeline
(
map
)
bammetrics
.
inputBam
=
BamMetricsTest
.
bam
...
...
@@ -94,6 +94,14 @@ object BamMetricsTest {
val
bam
=
new
File
(
outputDir
,
"input"
+
File
.
separator
+
"bla.bam"
)
Files
.
touch
(
bam
)
val
ampliconBed
=
new
File
(
outputDir
,
"input"
+
File
.
separator
+
"amplicon_bed.bed"
)
Files
.
touch
(
ampliconBed
)
def
roi
(
i
:
Int
)
:
File
=
{
val
roi
=
new
File
(
outputDir
,
"input"
+
File
.
separator
+
s
"roi${i}.bed"
)
Files
.
touch
(
roi
)
roi
}
private
def
copyFile
(
name
:
String
)
:
Unit
=
{
val
is
=
getClass
.
getResourceAsStream
(
"/"
+
name
)
...
...
basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
View file @
33c611e0
...
...
@@ -26,7 +26,6 @@ import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import
nl.lumc.sasc.biopet.extensions.
{
Cat
,
Raxml
,
RunGubbins
}
import
nl.lumc.sasc.biopet.pipelines.shiva.Shiva
import
nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
...
...
biopet-aggregate/pom.xml
View file @
33c611e0
...
...
@@ -17,6 +17,7 @@
<modules>
<module>
../biopet-core
</module>
<module>
../generate-indexes
</module>
<module>
../biopet-package
</module>
<module>
../bammetrics
</module>
<module>
../flexiprep
</module>
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala
View file @
33c611e0
...
...
@@ -73,6 +73,8 @@ class BiopetFifoPipe(val root: Configurable,
_pipesJobs
:::=
commands
_pipesJobs
=
_pipesJobs
.
distinct
analysisName
=
commands
.
map
(
_
.
analysisName
).
mkString
(
"_"
)
}
override
def
beforeCmd
()
:
Unit
=
{
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala
View file @
33c611e0
...
...
@@ -27,7 +27,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*/
class
BiopetPipe
(
val
commands
:
List
[
BiopetCommandLineFunction
])
extends
BiopetCommandLineFunction
{
@Input
@Input
(
required
=
false
)
lazy
val
input
:
List
[
File
]
=
try
{
commands
.
flatMap
(
_
.
inputs
)
}
catch
{
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala
View file @
33c611e0
...
...
@@ -132,7 +132,7 @@ trait ReportBuilder extends ToolCommand {
logger
.
info
(
"Start"
)
val
argsParser
=
new
OptParser
val
cmdArgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
val
cmdArgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
(
throw
new
IllegalArgumentException
)
require
(
cmdArgs
.
outputDir
.
exists
(),
"Output dir does not exist"
)
require
(
cmdArgs
.
outputDir
.
isDirectory
,
"Output dir is not a directory"
)
...
...
@@ -205,7 +205,7 @@ trait ReportBuilder extends ToolCommand {
val
pageOutputDir
=
new
File
(
outputDir
,
path
.
mkString
(
File
.
separator
))
pageOutputDir
.
mkdirs
()
val
rootPath
=
"./"
+
Array
.
fill
(
path
.
size
)(
"
src/main
"
).
mkString
(
""
)
val
rootPath
=
"./"
+
Array
.
fill
(
path
.
size
)(
"
../
"
).
mkString
val
pageArgs
=
args
++
page
.
args
++
Map
(
"page"
->
page
,
"path"
->
path
,
...
...
biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py
View file @
33c611e0
...
...
@@ -30,19 +30,21 @@ import csv
import
datetime
def
main
(
tsvfile
,
vcffile
):
def
main
(
tsvfile
,
vcffile
,
samplename
):
'''
:param tsvfile: filename of input file.tsv
:type tsvfile: string
:param vcffile: filename of output file.vcf
:type vcffile: string
:param samplename: Name of the sample
:type samplename: string
'''
with
open
(
tsvfile
)
as
reader
:
# Parse file
dictreader
=
_parse_tsvfile
(
reader
)
# Write out file
_format_vcffile
(
dictreader
,
vcffile
)
_format_vcffile
(
dictreader
,
vcffile
,
samplename
)
def
_parse_tsvfile
(
readable
):
'''
...
...
@@ -92,11 +94,11 @@ _tsv_fields = ('Chr1', 'Pos1', 'Orientation1',
_vcf_fields
=
(
'CHROM'
,
'POS'
,
'ID'
,
'REF'
,
'ALT'
,
'QUAL'
,
'FILTER'
,
'INFO'
,
'FORMAT'
,
'default'
)
_vcf_fields
=
[
'CHROM'
,
'POS'
,
'ID'
,
'REF'
,
'ALT'
,
'QUAL'
,
'FILTER'
,
'INFO'
,
'FORMAT'
]
TS_NOW
=
datetime
.
datetime
.
now
()
VCF_HEADER
=
"""##fileformat=VCFv4.
1
VCF_HEADER
=
"""##fileformat=VCFv4.
2
##fileDate={filedate}
##source=breakdancer-max
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
...
...
@@ -106,6 +108,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVMETHOD,Number=0,Type=String,Description="Program called with">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
...
...
@@ -138,7 +141,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"""
.
format
(
filedate
=
TS_NOW
.
strftime
(
"%Y%m%d"
)
)
def
_format_vcffile
(
dictreader
,
vcffile
):
def
_format_vcffile
(
dictreader
,
vcffile
,
samplename
):
'''
Create a pseudo .vcf file based on values read from DictReader instance.
:param dictreader: DictReader instance to read data from
...
...
@@ -148,22 +151,22 @@ def _format_vcffile(dictreader, vcffile):
'''
FORMAT
=
"GT:DP"
with
open
(
vcffile
,
mode
=
'w'
)
as
writer
:
writer
.
write
(
'{header}
\n
#{columns}
\n
'
.
format
(
header
=
VCF_HEADER
,
columns
=
'
\t
'
.
join
(
_vcf_fields
)))
writer
.
write
(
'{header}
\n
#{columns}
\n
'
.
format
(
header
=
VCF_HEADER
,
columns
=
'
\t
'
.
join
(
_vcf_fields
+
[
samplename
]
)))
output_vcf
=
[]
for
line
in
dictreader
:
CHROM
=
line
[
'Chr1'
]
# TODO Figure out whether we have zero or one based positioning
POS
=
int
(
line
[
'Pos1'
])
ALT
=
'
.'
ALT
=
'
<{}>'
.
format
(
line
[
'Type'
])
SVEND
=
int
(
line
[
'Pos2'
])
INFO
=
'
PROGRAM
=breakdancer;SVTYPE={}'
.
format
(
line
[
'Type'
])
INFO
=
'
SVMETHOD
=breakdancer;SVTYPE={}'
.
format
(
line
[
'Type'
])
if
line
[
'Type'
]
not
in
[
'CTX'
]:
INFO
+=
';SVLEN={}'
.
format
(
int
(
line
[
'Size'
]))
INFO
+=
";SVEND={}"
.
format
(
SVEND
)
INFO
+=
";END={}"
.
format
(
SVEND
)
# write alternate ALT field for Intrachromosomal translocations
if
line
[
'Type'
]
in
[
'CTX'
]:
ALT
=
"N[{}:{}["
.
format
(
line
[
'Chr2'
],
line
[
'Pos2'
])
...
...
@@ -172,7 +175,7 @@ def _format_vcffile(dictreader, vcffile):
SAMPLEINFO
=
"{}:{}"
.
format
(
'1/.'
,
line
[
'num_Reads'
]
)
# Create record
output_vcf
.
append
([
CHROM
,
POS
,
'.'
,
'
.
'
,
ALT
,
'.'
,
'PASS'
,
INFO
,
FORMAT
,
SAMPLEINFO
])
output_vcf
.
append
([
CHROM
,
POS
,
'.'
,
'
N
'
,
ALT
,
'.'
,
'PASS'
,
INFO
,
FORMAT
,
SAMPLEINFO
])
# Sort all results
output_vcf
.
sort
()
...
...
@@ -184,9 +187,11 @@ def _format_vcffile(dictreader, vcffile):
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-i'
,
'--breakdancertsv'
,
dest
=
'breakdancertsv'
,
type
=
str
,
help
=
'Breakdancer TSV outputfile'
)
help
=
'Breakdancer TSV outputfile'
)
parser
.
add_argument
(
'-o'
,
'--outputvcf'
,
dest
=
'outputvcf'
,
type
=
str
,
help
=
'Output vcf to'
)
help
=
'Output vcf to'
)
parser
.
add_argument
(
'-s'
,
'--sample'
,
dest
=
'sample'
,
type
=
str
,
help
=
'sample name'
)
args
=
parser
.
parse_args
()
main
(
args
.
breakdancertsv
,
args
.
outputvcf
)
main
(
args
.
breakdancertsv
,
args
.
outputvcf
,
args
.
sample
)
biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops.R
0 → 100644
View file @
33c611e0
#!/usr/bin/env Rscript
suppressPackageStartupMessages
(
library
(
'cn.mops'
))
suppressPackageStartupMessages
(
library
(
'optparse'
))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list
<-
list
(
make_option
(
c
(
"--rawoutput"
),
dest
=
"rawoutput"
),
make_option
(
c
(
"--cnv"
),
dest
=
"cnv"
),
make_option
(
c
(
"--cnr"
),
dest
=
"cnr"
),
make_option
(
c
(
"--chr"
),
dest
=
"chr"
),
make_option
(
c
(
"--threads"
),
dest
=
"threads"
,
default
=
8
,
type
=
"integer"
)
)
parser
<-
OptionParser
(
usage
=
"%prog [options] file"
,
option_list
=
option_list
)
arguments
=
parse_args
(
parser
,
positional_arguments
=
TRUE
)
opt
=
arguments
$
options
args
=
arguments
$
args
chromosome
<-
opt
$
chr
CNVoutput
<-
opt
$
cnv
CNRoutput
<-
opt
$
cnr
bamFile
<-
args
BAMFiles
<-
c
(
bamFile
)
bamDataRanges
<-
getReadCountsFromBAM
(
BAMFiles
,
mode
=
"paired"
,
refSeqName
=
chromosome
,
WL
=
1000
,
parallel
=
opt
$
threads
)
write.table
(
as.data.frame
(
bamDataRanges
),
quote
=
FALSE
,
opt
$
rawoutput
,
row.names
=
FALSE
)
res
<-
cn.mops
(
bamDataRanges
)
res
<-
calcIntegerCopyNumbers
(
res
)
write.table
(
as.data.frame
(
cnvs
(
res
)),
quote
=
FALSE
,
CNVoutput
,
row.names
=
FALSE
)
write.table
(
as.data.frame
(
cnvr
(
res
)),
quote
=
FALSE
,
CNRoutput
,
row.names
=
FALSE
)
ppi
<-
300
plot_margins
<-
c
(
3
,
4
,
1
,
2
)
+0.1
label_positions
<-
c
(
2
,
0.5
,
0
)
dir.create
(
chromosome
,
showWarnings
=
FALSE
,
recursive
=
TRUE
,
mode
=
"0744"
)
# Plot chromosome per sample.
for
(
i
in
1
:
length
(
BAMFiles
)){
png
(
file
=
paste
(
chromosome
,
"/"
,
chromosome
,
"-segplot-"
,
i
,
".png"
,
sep
=
""
),
width
=
16
*
ppi
,
height
=
10
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
1
,
1
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
segplot
(
res
,
sampleIdx
=
i
)
dev.off
()
}
# Plot cnvr regions.
for
(
i
in
1
:
nrow
(
as.data.frame
(
cnvr
(
res
))))
{
png
(
file
=
paste
(
chromosome
,
"/"
,
chromosome
,
"-cnv-"
,
i
,
".png"
,
sep
=
""
),
width
=
16
*
ppi
,
height
=
10
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
1
,
1
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
plot
(
res
,
which
=
i
,
toFile
=
TRUE
)
dev.off
()
}
biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops_wes.R
0 → 100644
View file @
33c611e0
#!/usr/bin/env Rscript
suppressPackageStartupMessages
(
library
(
'cn.mops'
))
suppressPackageStartupMessages
(
library
(
'optparse'
))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list
<-
list
(
make_option
(
c
(
"--rawoutput"
),
dest
=
"rawoutput"
),
make_option
(
c
(
"--cnv"
),
dest
=
"cnv"
),
make_option
(
c
(
"--cnr"
),
dest
=
"cnr"
),
make_option
(
c
(
"--chr"
),
dest
=
"chr"
),
make_option
(
c
(
"--targetBed"
),
dest
=
"targetBed"
),
make_option
(
c
(
"--threads"
),
dest
=
"threads"
,
default
=
8
,
type
=
"integer"
)
)
parser
<-
OptionParser
(
usage
=
"%prog [options] file"
,
option_list
=
option_list
)
arguments
=
parse_args
(
parser
,
positional_arguments
=
TRUE
)
opt
=
arguments
$
options
args
=
arguments
$
args
chromosome
<-
opt
$
chr
CNVoutput
<-
opt
$
cnv
CNRoutput
<-
opt
$
cnr
bamFile
<-
args
BAMFiles
<-
c
(
bamFile
)
### WES Specific code
segments
<-
read.table
(
opt
$
targetBed
,
sep
=
"\t"
,
as.is
=
TRUE
)
# filter the segments by the requested chromosome
segments
<-
segments
[
segments
[,
1
]
==
chromosome
,
]
gr
<-
GRanges
(
segments
[,
1
],
IRanges
(
segments
[,
2
],
segments
[,
3
]))
### END WES Specific code
bamDataRanges
<-
getSegmentReadCountsFromBAM
(
BAMFiles
,
GR
=
gr
,
mode
=
"paired"
,
parallel
=
opt
$
threads
)
write.table
(
as.data.frame
(
bamDataRanges
),
quote
=
FALSE
,
opt
$
rawoutput
,
row.names
=
FALSE
)
res
<-
exomecn.mops
(
bamDataRanges
)
res
<-
calcIntegerCopyNumbers
(
res
)
write.table
(
as.data.frame
(
cnvs
(
res
)),
quote
=
FALSE
,
CNVoutput
,
row.names
=
FALSE
)
write.table
(
as.data.frame
(
cnvr
(
res
)),
quote
=
FALSE
,
CNRoutput
,
row.names
=
FALSE
)
ppi
<-
300
plot_margins
<-
c
(
3
,
4
,
1
,
2
)
+0.1
label_positions
<-
c
(
2
,
0.5
,
0
)
dir.create
(
chromosome
,
showWarnings
=
FALSE
,
recursive
=
TRUE
,
mode
=
"0744"
)
# Plot chromosome per sample.
for
(
i
in
1
:
length
(
BAMFiles
)){
png
(
file
=
paste
(
chromosome
,
"/"
,
chromosome
,
"-segplot-"
,
i
,
".png"
,
sep
=
""
),
width
=
16
*
ppi
,
height
=
10
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
1
,
1
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
segplot
(
res
,
sampleIdx
=
i
)
dev.off
()
}
# Plot cnvr regions.
for
(
i
in
1
:
nrow
(
as.data.frame
(
cnvr
(
res
))))
{
png
(
file
=
paste
(
chromosome
,
"/"
,
chromosome
,
"-cnv-"
,
i
,
".png"
,
sep
=
""
),
width
=
16
*
ppi
,
height
=
10
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
1
,
1
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
plot
(
res
,
which
=
i
,
toFile
=
TRUE
)
dev.off
()
}
biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R
0 → 100644
View file @
33c611e0
library
(
'optparse'
)
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list
<-
list
(
make_option
(
c
(
"-i"
,
"--input"
),
dest
=
"input"
),
make_option
(
c
(
"-o"
,
"--output"
),
dest
=
"output"
)
)
parser
<-
OptionParser
(
usage
=
"%prog [options] file"
,
option_list
=
option_list
)
opt
=
parse_args
(
parser
)
#
# Load Data
#
dataTable
<-
read.table
(
opt
$
input
,
header
=
TRUE
);
BAF
<-
data.frame
(
dataTable
)
chromosomes
<-
levels
(
dataTable
$
Chromosome
)
ppi
<-
300
plot_margins
<-
c
(
3
,
4
,
1
,
2
)
+0.1
label_positions
<-
c
(
2
,
0.5
,
0
)
png
(
filename
=
opt
$
output
,
width
=
16
*
ppi
,
height
=
10
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
6
,
4
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
for
(
i
in
chromosomes
)
{
tt
<-
which
(
BAF
$
Chromosome
==
i
)
if
(
length
(
tt
)
>
0
){
lBAF
<-
BAF
[
tt
,]
plot
(
lBAF
$
Position
,
lBAF
$
BAF
,
ylim
=
c
(
-0.1
,
1.1
),
xlab
=
paste
(
"position, chr"
,
i
),
ylab
=
"BAF"
,
pch
=
"."
,
col
=
colors
()[
1
])
tt
<-
which
(
lBAF
$
A
==
0.5
)
points
(
lBAF
$
Position
[
tt
],
lBAF
$
BAF
[
tt
],
pch
=
"."
,
col
=
colors
()[
92
])
tt
<-
which
(
lBAF
$
A
!=
0.5
&
lBAF
$
A
>=
0
)
points
(
lBAF
$
Position
[
tt
],
lBAF
$
BAF
[
tt
],
pch
=
"."
,
col
=
colors
()[
62
])
tt
<-
1
pres
<-
1
if
(
length
(
lBAF
$
A
)
>
4
)
{
for
(
j
in
c
(
2
:
(
length
(
lBAF
$
A
)
-
pres
-1
)))
{
if
(
lBAF
$
A
[
j
]
==
lBAF
$
A
[
j
+
pres
])
{
tt
[
length
(
tt
)
+1
]
<-
j
}
}
points
(
lBAF
$
Position
[
tt
],
lBAF
$
A
[
tt
],
pch
=
"."
,
col
=
colors
()[
24
],
cex
=
4
)
points
(
lBAF
$
Position
[
tt
],
lBAF
$
B
[
tt
],
pch
=
"."
,
col
=
colors
()[
24
],
cex
=
4
)
}
tt
<-
1
pres
<-
1
if
(
length
(
lBAF
$
FittedA
)
>
4
)
{
for
(
j
in
c
(
2
:
(
length
(
lBAF
$
FittedA
)
-
pres
-1
)))
{
if
(
lBAF
$
FittedA
[
j
]
==
lBAF
$
FittedA
[
j
+
pres
])
{
tt
[
length
(
tt
)
+1
]
<-
j
}
}
points
(
lBAF
$
Position
[
tt
],
lBAF
$
FittedA
[
tt
],
pch
=
"."
,
col
=
colors
()[
463
],
cex
=
4
)
points
(
lBAF
$
Position
[
tt
],
lBAF
$
FittedB
[
tt
],
pch
=
"."
,
col
=
colors
()[
463
],
cex
=
4
)
}
}
}
dev.off
()
biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R
0 → 100644
View file @
33c611e0
library
(
'optparse'
)
library
(
'naturalsort'
)
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list
<-
list
(
make_option
(
c
(
"-m"
,
"--mappability"
),
dest
=
"mappability"
),
make_option
(
c
(
"-p"
,
"--ploidy"
),
default
=
2
,
type
=
"integer"
,
dest
=
"ploidy"
),
make_option
(
c
(
"-i"
,
"--input"
),
dest
=
"input"
),
make_option
(
c
(
"-o"
,
"--output"
),
dest
=
"output"
)
)
parser
<-
OptionParser
(
usage
=
"%prog [options] file"
,
option_list
=
option_list
)
opt
=
parse_args
(
parser
)
#
# Load mappability track
#
mappabilityFile
<-
opt
$
mappability
mappabilityTrack
<-
read.table
(
mappabilityFile
,
header
=
FALSE
,
col.names
=
c
(
"chrom"
,
"start"
,
"end"
,
"score"
))
mappabilityTrack
$
Start
<-
mappabilityTrack
$
start
+1
mappabilityTrack
$
Chromosome
<-
gsub
(
"chr"
,
""
,
mappabilityTrack
$
chrom
)
#
# Load Data
#
dataTable
<-
read.table
(
opt
$
input
,
header
=
TRUE
)
input_ratio
<-
data.frame
(
dataTable
)
chromosomes
<-
naturalsort
(
levels
(
input_ratio
$
Chromosome
))
input_ratio
$
Chromosome
<-
factor
(
input_ratio
$
Chromosome
,
levels
=
chromosomes
,
ordered
=
T
)
sorted_ratio
<-
input_ratio
[
order
(
input_ratio
$
Chromosome
),]
ratio
<-
merge
(
sorted_ratio
,
mappabilityTrack
,
sort
=
TRUE
)
ratio
<-
ratio
[
order
(
ratio
$
Chromosome
,
ratio
$
Start
),]
ploidy
<-
opt
$
ploidy
ppi
<-
300
plot_margins
<-
c
(
3
,
4
,
1
,
2
)
+0.1
label_positions
<-
c
(
2
,
0.5
,
0
)
maxLevelToPlot
<-
3
for
(
i
in
c
(
1
:
length
(
ratio
$
Ratio
)))
{
if
(
ratio
$
Ratio
[
i
]
>
maxLevelToPlot
)
{
ratio
$
Ratio
[
i
]
=
maxLevelToPlot
}
}
#
# Plot the graphs per chromosome
#
for
(
i
in
chromosomes
)
{
png
(
filename
=
paste
(
opt
$
output
,
"."
,
i
,
".png"
,
sep
=
""
),
width
=
4
*
ppi
,
height
=
2.5
*
ppi
,
res
=
ppi
,
bg
=
"white"
)
par
(
mfrow
=
c
(
1
,
1
))
par
(
mar
=
plot_margins
)
par
(
mgp
=
label_positions
)
tt
<-
which
(
ratio
$
Chromosome
==
i
)
if
(
length
(
tt
)
>
0
)
{
plot
(
ratio
$
Start
[
tt
],
ratio
$
Ratio
[
tt
]
*
ploidy
,
ylim
=
c
(
0
,
maxLevelToPlot
*
ploidy
),
xlab
=
paste
(
"position, chr"
,
i
),
ylab
=
"normalized CN"
,
pch
=
"."
,
col
=
colors
()[
88
])
title
(
outer
=
TRUE
)
tt
<-
which
(
ratio
$
Chromosome
==
i
&
ratio
$
CopyNumber
>
ploidy
)
points
(
ratio
$
Start
[
tt
],
ratio
$
Ratio
[
tt
]
*
ploidy
,
pch
=
"."
,
col
=
colors
()[
136
])
tt
<-
which
(
ratio
$
Chromosome
==
i
&
ratio
$
Ratio
==
maxLevelToPlot
&
ratio
$
CopyNumber
>
ploidy
)
points
(
ratio
$
Start
[
tt
],
ratio
$
Ratio
[
tt
]
*
ploidy
,
pch
=
"."
,
col
=
colors
()[
136
],
cex
=
4
)
tt
<-
which
(
ratio
$
Chromosome
==
i
&
ratio
$
CopyNumber
<
ploidy
&
ratio
$
CopyNumber
!=
-1
)
points
(
ratio
$
Start
[
tt
],
ratio
$
Ratio
[
tt
]
*
ploidy
,
pch
=
"."
,
col
=
colors
()[
461
])
tt
<-
which
(
ratio
$
Chromosome
==
i
)
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
}
#tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
dev.off
()
}