Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d452d397
Commit
d452d397
authored
Jul 24, 2017
by
Peter van 't Hof
Committed by
GitHub
Jul 24, 2017
Browse files
Merge pull request #148 from biopet/fix-BIOPET-732
Fix fastq sync prefixes
parents
9b83c7aa
1c4febed
Changes
4
Hide whitespace changes
Inline
Side-by-side
biopet-core/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala
View file @
d452d397
...
...
@@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.extensions.tools
import
java.io.File
import
nl.lumc.sasc.biopet.core.
{
BiopetCommandLineFunction
,
ToolCommandFunction
}
import
nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.core.summary.Summarizable
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
...
...
@@ -35,7 +35,11 @@ class FastqSync(val parent: Configurable) extends ToolCommandFunction with Summa
/** Original FASTQ file (read 1 or 2) */
@Input
(
required
=
true
)
var
refFastq
:
File
=
_
var
refFastq1
:
File
=
_
/** Original FASTQ file (read 1 or 2) */
@Input
(
required
=
true
)
var
refFastq2
:
File
=
_
/** "Input read 1 FASTQ file" */
@Input
(
required
=
true
)
...
...
@@ -61,7 +65,8 @@ class FastqSync(val parent: Configurable) extends ToolCommandFunction with Summa
override
def
cmdLine
:
String
=
super
.
cmdLine
+
required
(
"-r"
,
refFastq
)
+
required
(
"-r"
,
refFastq1
)
+
required
(
"--ref2"
,
refFastq2
)
+
required
(
"-i"
,
inputFastq1
)
+
required
(
"-j"
,
inputFastq2
)
+
required
(
"-o"
,
outputFastq1
)
+
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala
View file @
d452d397
...
...
@@ -24,12 +24,13 @@ import scala.collection.JavaConverters._
object
FastqSync
extends
ToolCommand
{
/** Regex for capturing read ID ~ taking into account its read pair mark (if present) */
private
val
idRegex
=
"""[_/][12]$"""
.
r
/** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */
private
implicit
class
FastqPair
(
fq
:
FastqRecord
)
{
lazy
val
fragId
:
String
=
idRegex
.
split
(
fq
.
getReadHeader
.
split
(
" "
)(
0
))(
0
)
lazy
val
fragId
:
String
=
fq
.
getReadHeader
.
split
(
" "
).
head
match
{
case
x
if
x
.
endsWith
(
idSufixes
.
_1
)
=>
x
.
stripSuffix
(
idSufixes
.
_1
)
case
x
if
x
.
endsWith
(
idSufixes
.
_2
)
=>
x
.
stripSuffix
(
idSufixes
.
_2
)
case
x
=>
x
}
}
/**
...
...
@@ -106,11 +107,12 @@ object FastqSync extends ToolCommand {
(
numDiscA
,
numDiscB
,
numKept
)
}
case
class
Args
(
refFastq
:
File
=
new
File
(
""
),
inputFastq1
:
File
=
new
File
(
""
),
inputFastq2
:
File
=
new
File
(
""
),
outputFastq1
:
File
=
new
File
(
""
),
outputFastq2
:
File
=
new
File
(
""
))
case
class
Args
(
refFastq1
:
File
=
null
,
refFastq2
:
File
=
null
,
inputFastq1
:
File
=
null
,
inputFastq2
:
File
=
null
,
outputFastq1
:
File
=
null
,
outputFastq2
:
File
=
null
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
...
...
@@ -122,29 +124,35 @@ object FastqSync extends ToolCommand {
|file will be gzipped when the input is also gzipped.
"""
.
stripMargin
)
opt
[
File
](
'r'
,
"ref
"
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
refFastq
=
x
)
opt
[
File
](
'r'
,
"ref
1"
)
unbounded
(
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
refFastq
1
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"Reference FASTQ file not found"
)
}
text
"Reference FASTQ file"
}
text
"Reference
R1
FASTQ file"
opt
[
File
](
'i'
,
"in1"
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
opt
[
File
](
"ref2"
)
unbounded
()
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
refFastq2
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"Reference FASTQ file not found"
)
}
text
"Reference R2 FASTQ file"
opt
[
File
](
'i'
,
"in1"
)
unbounded
()
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFastq1
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"Input FASTQ file 1 not found"
)
}
text
"Input FASTQ file 1"
opt
[
File
](
'j'
,
"in2"
)
required
()
valueName
"<fastq[.gz]>"
action
{
(
x
,
c
)
=>
opt
[
File
](
'j'
,
"in2"
)
unbounded
()
required
()
valueName
"<fastq[.gz]>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFastq2
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"Input FASTQ file 2 not found"
)
}
text
"Input FASTQ file 2"
opt
[
File
](
'o'
,
"out1"
)
required
()
valueName
"<fastq[.gz]>"
action
{
(
x
,
c
)
=>
opt
[
File
](
'o'
,
"out1"
)
unbounded
()
required
()
valueName
"<fastq[.gz]>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFastq1
=
x
)
}
text
"Output FASTQ file 1"
opt
[
File
](
'p'
,
"out2"
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
opt
[
File
](
'p'
,
"out2"
)
unbounded
()
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFastq2
=
x
)
}
text
"Output FASTQ file 2"
}
...
...
@@ -164,7 +172,9 @@ object FastqSync extends ToolCommand {
val
commandArgs
:
Args
=
parseArgs
(
args
)
val
refReader
=
new
FastqReader
(
commandArgs
.
refFastq
)
idSufixes
=
findR1R2Suffixes
(
commandArgs
.
refFastq1
,
commandArgs
.
refFastq2
)
val
refReader
=
new
FastqReader
(
commandArgs
.
refFastq1
)
val
AReader
=
new
FastqReader
(
commandArgs
.
inputFastq1
)
val
BReader
=
new
FastqReader
(
commandArgs
.
inputFastq2
)
val
AWriter
=
new
AsyncFastqWriter
(
new
BasicFastqWriter
(
commandArgs
.
outputFastq1
),
3000
)
...
...
@@ -183,4 +193,28 @@ object FastqSync extends ToolCommand {
BWriter
.
close
()
}
}
/**
* This method will look up the unique suffix for R1 and R2
*
* @param fastqR1 input R1 file
* @param fastqR2 Input R2 file
* @return suffix for (R1, R2)
*/
def
findR1R2Suffixes
(
fastqR1
:
File
,
fastqR2
:
File
)
:
(
String
,
String
)
=
{
val
refReader1
=
new
FastqReader
(
fastqR1
)
val
refReader2
=
new
FastqReader
(
fastqR2
)
val
r1Name
=
refReader1
.
next
().
getReadHeader
.
split
(
" "
).
head
val
r2Name
=
refReader2
.
next
().
getReadHeader
.
split
(
" "
).
head
refReader1
.
close
()
refReader2
.
close
()
val
genericName
=
new
String
(
r1Name
.
zip
(
r2Name
).
takeWhile
(
x
=>
x
.
_1
==
x
.
_2
).
map
(
_
.
_1
).
toArray
)
(
r1Name
.
stripPrefix
(
genericName
),
r2Name
.
stripPrefix
(
genericName
))
}
/** Regex for capturing read ID ~ taking into account its read pair mark (if present) */
private
[
tools
]
var
idSufixes
:
(
String
,
String
)
=
_
}
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala
View file @
d452d397
...
...
@@ -59,6 +59,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
...
...
@@ -105,6 +106,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
()
...
...
@@ -123,6 +125,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
...
...
@@ -141,6 +144,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"2"
,
"3"
)
...
...
@@ -168,6 +172,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
...
...
@@ -195,6 +200,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"2"
,
"3"
)
...
...
@@ -245,6 +251,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1/1"
,
"2/1"
,
"3/1"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"2/1"
,
"3/1"
)
...
...
@@ -267,6 +274,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"_1"
,
"_2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1_1"
,
"2_1"
,
"3_1"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"2_1"
,
"3_1"
)
...
...
@@ -289,6 +297,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1 desc1b"
,
"2 desc2b"
,
"3 desc3b"
)
when
(
aMock
.
iterator
)
thenReturn
recordsOver
(
"2 desc2a"
,
"3 desc3a"
)
...
...
@@ -311,6 +320,7 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
bMock
:
FastqReader
,
aOutMock
:
AsyncFastqWriter
,
bOutMock
:
AsyncFastqWriter
)
:
Unit
=
{
FastqSync
.
idSufixes
=
(
"/1"
,
"/2"
)
when
(
refMock
.
iterator
)
thenReturn
recordsOver
(
"1/2 yep"
,
"2/2 yep"
,
...
...
@@ -338,6 +348,8 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
val
args
=
Array
(
"-r"
,
resourcePath
(
"/paired01a.fq"
),
"--ref2"
,
resourcePath
(
"/paired01b.fq"
),
"-i"
,
resourcePath
(
"/paired01a.fq"
),
"-j"
,
...
...
@@ -348,10 +360,34 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
"/tmp/mockout2.fq"
)
val
parsed
=
parseArgs
(
args
)
parsed
.
refFastq
shouldBe
resourceFile
(
"/paired01a.fq"
)
parsed
.
refFastq
1
shouldBe
resourceFile
(
"/paired01a.fq"
)
parsed
.
inputFastq1
shouldBe
resourceFile
(
"/paired01a.fq"
)
parsed
.
inputFastq2
shouldBe
resourceFile
(
"/paired01b.fq"
)
parsed
.
outputFastq1
shouldBe
new
File
(
"/tmp/mockout1.fq"
)
parsed
.
outputFastq2
shouldBe
new
File
(
"/tmp/mockout2.fq"
)
}
@Test
def
testMain
:
Unit
=
{
val
r1Output
=
File
.
createTempFile
(
"temp."
,
".fq"
)
r1Output
.
deleteOnExit
()
val
r2Output
=
File
.
createTempFile
(
"temp."
,
".fq"
)
r2Output
.
deleteOnExit
()
val
args
=
Array
(
"-r"
,
resourcePath
(
"/paired01a.fq"
),
"--ref2"
,
resourcePath
(
"/paired01b.fq"
),
"-i"
,
resourcePath
(
"/paired01a.fq"
),
"-j"
,
resourcePath
(
"/paired01b.fq"
),
"-o"
,
r1Output
.
getAbsolutePath
,
"-p"
,
r2Output
.
getAbsolutePath
)
FastqSync
.
main
(
args
)
}
}
flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
d452d397
...
...
@@ -208,7 +208,8 @@ class Flexiprep(val parent: Configurable)
qcCmdR2
.
compress
=
false
val
fqSync
=
new
FastqSync
(
this
)
fqSync
.
refFastq
=
R1_in
fqSync
.
refFastq1
=
R1_in
fqSync
.
refFastq2
=
R2_in
.
get
fqSync
.
inputFastq1
=
qcCmdR1
.
output
fqSync
.
inputFastq2
=
qcCmdR2
.
output
fqSync
.
outputFastq1
=
new
File
(
outDir
,
fastqR1Qc
.
getName
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment