diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 2cbf52f44fbaf85495ea1af0bc58f28d17c1e0b3..13622ef41b3691b0959f43211ac232668d32f760 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -192,7 +192,7 @@ object ExtractAlignedFastq extends ToolCommand { opt[String]('r', "interval") required () unbounded () valueName "<interval>" action { (x, c) => // yes, we are appending and yes it's O(n) ~ preserving order is more important than speed here c.copy(intervals = c.intervals :+ x) - } text "Interval strings" + } text "Interval strings (e.g. chr1:1-100)" opt[File]('i', "in1") required () valueName "<fastq>" action { (x, c) => c.copy(inputFastq1 = x) @@ -220,7 +220,11 @@ object ExtractAlignedFastq extends ToolCommand { opt[Int]('s', "read_suffix_length") optional () action { (x, c) => c.copy(commonSuffixLength = x) - } text "Length of common suffix from each read pair (default: 0)" + } text + """Length of suffix mark from each read pair (default: 0). This is used for distinguishing read pairs with + different suffices. For example, if your FASTQ records end with `/1` for the first pair and `/2` for the + second pair, the value of `read_suffix_length` should be 2." + """.stripMargin note( """ diff --git a/docs/tools/ExtractAlignedFastq.md b/docs/tools/ExtractAlignedFastq.md index c1f069e6bab80ae5a1d3cfe760121e778289e3c4..412fda6a65df71fb3a1a6358815890eb453bba47 100644 --- a/docs/tools/ExtractAlignedFastq.md +++ b/docs/tools/ExtractAlignedFastq.md @@ -23,7 +23,7 @@ Usage: ExtractAlignedFastq [options] -I <bam> | --input_file <bam> Input BAM file -r <interval> | --interval <interval> - Interval strings + Interval strings (e.g. chr1:1-100) -i <fastq> | --in1 <fastq> Input FASTQ file 1 -j <fastq> | --in2 <fastq> @@ -35,15 +35,20 @@ Usage: ExtractAlignedFastq [options] -Q <value> | --min_mapq <value> Minimum MAPQ of reads in target region to remove (default: 0) -s <value> | --read_suffix_length <value> - Length of common suffix from each read pair (default: 0) - -This tool creates FASTQ file(s) containing reads mapped to the given alignment intervals. + Length of suffix mark from each read pair (default: 0). This is used for distinguishing read pairs with + different suffices. For example, if your FASTQ records end with `/1` for the first pair and `/2` for the + second pair, the value of `read_suffix_length` should be 2. + +This tool creates FASTQ file(s) containing reads mapped to the given alignment intervals. A set of FASTQ files that was +used in creating the BAM file is also required since this is used for retrieving full sequences of FASTQ records which +map to the given region. This is useful since some of the records may have undergone modifications such as quality +trimming before alignment. In this case, retrieving the aligned SAM records will only give the modified sequence. ~~~ To run the tool: ~~~ biopet tool ExtractAlignedFastq \ ---input_file myBam.bam --in1 myFastq_R1.fastq --out1 myOutFastq_R1.fastq --interval myTarget.bed +--input_file myBam.bam --in1 myFastq_R1.fastq --out1 myOutFastq_R1.fastq --interval chr5:100-200 ~~~ * Note that this tool works for single end and paired end data. The above example can be easily extended for paired end data. The only thing one should add is: `--in2 myFastq_R2.fastq --out2 myOutFastq_R2.fastq`