Merge branch 'update-issue371' into 'develop'

Add updates to addres issue #371 See #371 for reference Fixes #371 See merge request !438

Merge branch 'update-issue371' into 'develop'
7bb6eb02 · Peter van 't Hof · e1fe4046 · 00ba1cf6 · 7bb6eb02 · 7bb6eb02
Commit 7bb6eb02 authored 8 years ago by Peter van 't Hof
--- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
+++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
@@ -192,7 +192,7 @@ object ExtractAlignedFastq extends ToolCommand {
    opt[String]('r', "interval") required () unbounded () valueName "<interval>" action { (x, c) =>
      // yes, we are appending and yes it's O(n) ~ preserving order is more important than speed here
      c.copy(intervals = c.intervals :+ x)
-    } text "Interval strings"
+    } text "Interval strings (e.g. chr1:1-100)"
    opt[File]('i', "in1") required () valueName "<fastq>" action { (x, c) =>
      c.copy(inputFastq1 = x)
@@ -220,7 +220,11 @@ object ExtractAlignedFastq extends ToolCommand {
    opt[Int]('s', "read_suffix_length") optional () action { (x, c) =>
      c.copy(commonSuffixLength = x)
-    } text "Length of common suffix from each read pair (default: 0)"
+    } text
+      """Length of suffix mark from each read pair (default: 0). This is used for distinguishing read pairs with
+         different suffices. For example, if your FASTQ records end with `/1` for the first pair and `/2` for the
+         second pair, the value of `read_suffix_length` should be 2."
+      """.stripMargin
    note(
      """

--- a/docs/tools/ExtractAlignedFastq.md
+++ b/docs/tools/ExtractAlignedFastq.md
@@ -23,7 +23,7 @@ Usage: ExtractAlignedFastq [options]
  -I <bam> | --input_file <bam>
        Input BAM file
  -r <interval> | --interval <interval>
-        Interval strings
+        Interval strings (e.g. chr1:1-100)
  -i <fastq> | --in1 <fastq>
        Input FASTQ file 1
  -j <fastq> | --in2 <fastq>
@@ -35,15 +35,20 @@ Usage: ExtractAlignedFastq [options]
  -Q <value> | --min_mapq <value>
        Minimum MAPQ of reads in target region to remove (default: 0)
  -s <value> | --read_suffix_length <value>
-        Length of common suffix from each read pair (default: 0)
+        Length of suffix mark from each read pair (default: 0). This is used for distinguishing read pairs with
+        different suffices. For example, if your FASTQ records end with `/1` for the first pair and `/2` for the
-This tool creates FASTQ file(s) containing reads mapped to the given alignment intervals.
+        second pair, the value of `read_suffix_length` should be 2.
+This tool creates FASTQ file(s) containing reads mapped to the given alignment intervals. A set of FASTQ files that was
+used in creating the BAM file is also required since this is used for retrieving full sequences of FASTQ records which
+map to the given region. This is useful since some of the records may have undergone modifications such as quality
+trimming before alignment. In this case, retrieving the aligned SAM records will only give the modified sequence.
 ~~~
 To run the tool:
 ~~~
 biopet tool ExtractAlignedFastq \
--input_file myBam.bam --in1 myFastq_R1.fastq --out1 myOutFastq_R1.fastq --interval myTarget.bed
+--input_file myBam.bam --in1 myFastq_R1.fastq --out1 myOutFastq_R1.fastq --interval chr5:100-200
 ~~~
 * Note that this tool works for single end and paired end data. The above example can be easily extended for paired end data.
 The only thing one should add is: `--in2 myFastq_R2.fastq --out2 myOutFastq_R2.fastq`