Commit 487c2c6c authored by rhpvorderman's avatar rhpvorderman

Created documentation and updated mkdocs.yml

parent 3237ec9c
#!/bin/bash
#Script to document not yet documented tools in biopet
#Declare variables
BIOPET_DIR=~/biopet
TOOLS_DIR=$BIOPET_DIR/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools
DOCS_DIR=$BIOPET_DIR/docs/tools
TEMPLATE=$BIOPET_DIR/biopet-autodoc/tools_doc_template.j2
BIOPET="java -jar $BIOPET_JAR tool" #Make sure the BIOPET_JAR environment variable is defined!
for file in $TOOLS_DIR/*.scala #For loop checking all scala files in the tools section
do
file_no_path=${file##*/}
file_no_extension=${file_no_path%.scala}
file_with_md=${file_no_extension}.md
destfile=$DOCS_DIR/$file_with_md
if [ ! -f $destfile ] #Checks if (tool_name).md is already present. Existing files are skipped
then
eval "$BIOPET $file_no_extension -h 2> help.txt"
if grep -Fxq "ERROR: command '$file_no_extension' does not exist in module 'tool'" help.txt
then
echo "The tool $file_no_extension does not exist in the latest compiled version of biopet"
else
./templateparser.py -t "$TEMPLATE" -o "destfile" -N "$file_no_extension" -L "help.txt"
echo "$destfile created"
fi
rm help.txt
fi
done
#! /usr/bin/env python2
#Script to document new tools using a jinja2 template
from jinja2 import Environment, FileSystemLoader
import os
import argparse
import ast
import yaml
if __name__ == "__main__":
#Block with command line options
parser=argparse.ArgumentParser(description="")
parser.add_argument("-t", type=str, nargs=1, help="Templatefile to be used")
parser.add_argument("-o", type=str, nargs=1, help="output file")
parser.add_argument("-N","--tool-name", type=str, nargs=1, help="name of the tool")
parser.add_argument("-O", "--tool-output", type=str, nargs='?', default='')
parser.add_argument("-R", "--tool-run", type=str, nargs='?', default='')
parser.add_argument("-L", "--tool-option-list", type=str, nargs='?', default= '', help="Text file containing the --help output of the tool")
#parse arguments
arguments=parser.parse_args()
template=arguments.t[0]
output_file=arguments.o[0]
tool=dict()
tool['name']=arguments.tool_name[0]
tool['output']=arguments.tool_output
tool['run']=arguments.tool_run
help_file=open(arguments.tool_option_list, 'r')
tool['option_list']=help_file.read() #This reads the option list file to a raw string
# Open jinja2 template and render it with variables
j2_env = Environment(loader=FileSystemLoader("/"))
rendered = j2_env.get_template(template).render(tool=tool)
# Write the template results to the output file
file=open(output_file, "w")
file.seek(0)
file.write(rendered)
file.close()
# {{tool.name}}
## Introduction
{{tool.introduction|default('')}}
## Example
The help menu:
~~~
{{tool.option_list|default('')}}
~~~
To run the tool use:
~~~
biopet tool {{tool.name}} {{tool.run|default('')}}
~~~
## Output
{{tool.output|default('')}}
# DownloadNcbiAssembly
## Introduction
## Example
The help menu:
~~~
Usage: DownloadNcbiAssembly [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-a <file> | --assembly_report <file>
refseq ID from NCBI
-o <file> | --output <file>
output Fasta file
--report <file>
where to write report from ncbi
--nameHeader <string>
What column to use from the NCBI report for the name of the contigs.
All columns in the report can be used but this are the most common field to choose from:
- 'Sequence-Name': Name of the contig within the assembly
- 'UCSC-style-name': Name of the contig used by UCSC ( like hg19 )
- 'RefSeq-Accn': Unique name of the contig at RefSeq (default for NCBI)
--mustHaveOne:<key>=<column_name=regex>
This can be used to filter based on the NCBI report, multiple conditions can be given, at least 1 should be true
--mustNotHave:<key>=<column_name=regex>
This can be used to filter based on the NCBI report, multiple conditions can be given, all should be false
~~~
To run the tool use:
~~~
biopet tool DownloadNcbiAssembly
~~~
## Output
# FastqFilter
## Introduction
## Example
The help menu:
~~~
Usage: FastqFilter [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-I <file> | --inputFile <file>
Path to input file
-o <file> | --output <file>
Path to output file
--idRegex <file>
Regex to match ID
~~~
To run the tool use:
~~~
biopet tool FastqFilter
~~~
## Output
# FastqSync
## Introduction
## Example
The help menu:
~~~
FastqSync - Sync paired-end FASTQ files.
This tool works with gzipped or non-gzipped FASTQ files. The output
file will be gzipped when the input is also gzipped.
Usage: FastqSync [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-r <fastq> | --ref <fastq>
Reference FASTQ file
-i <fastq> | --in1 <fastq>
Input FASTQ file 1
-j <fastq[.gz]> | --in2 <fastq[.gz]>
Input FASTQ file 2
-o <fastq[.gz]> | --out1 <fastq[.gz]>
Output FASTQ file 1
-p <fastq> | --out2 <fastq>
Output FASTQ file 2
~~~
To run the tool use:
~~~
biopet tool FastqSync
~~~
## Output
# FindOverlapMatch
## Introduction
## Example
The help menu:
~~~
Usage: FindOverlapMatch [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <file> | --input <file>
Input should be a table where the first row and column have the ID's, those can be different
-o <file> | --output <file>
default to stdout
-c <value> | --cutoff <value>
minimum value to report it as pair
--use_same_names
Do not compare samples with the same name
--rowSampleRegex <regex>
Samples in the row should match this regex
--columnSampleRegex <regex>
Samples in the column should match this regex
~~~
To run the tool use:
~~~
biopet tool FindOverlapMatch
~~~
## Output
# GvcfToBed
## Introduction
## Example
The help menu:
~~~
Usage: GvcfToBed [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-I <file> | --inputVcf <file>
Input vcf file
-O <file> | --outputBed <file>
Output bed file
--invertedOutputBed <file>
Output bed file
-S <sample> | --sample <sample>
Sample to consider. Will take first sample on alphabetical order by default
--minGenomeQuality <int>
Minimum genome quality to consider
~~~
To run the tool use:
~~~
biopet tool GvcfToBed
~~~
## Output
# MergeTables
## Introduction
## Example
The help menu:
~~~
MergeTables - Tabular file merging based on feature ID equality.
Usage: MergeTables [options] [<input_tables> ...]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <idx1>,<idx2>, ... | --id_column_index <idx1>,<idx2>, ...
Index of feature ID column from each input file (1-based)
-a <idx> | --value_column_index <idx>
Index of column from each input file containing the value to merge (1-based)
-o <path> | --output <path>
Path to output file (default: '-' <stdout>)
-n <name> | --id_column_name <name>
Name of feature ID column in the output merged file (default: feature)
-N <name> | --column_names <name>
Name of feature ID column in the output merged file (default: feature)
-e <ext> | --strip_extension <ext>
Common extension of all input tables to strip (default: empty string)
-m <value> | --num_header_lines <value>
The number of header lines present in all input files (default: 0; no header)
-f <value> | --fallback <value>
The string to use when a value for a feature is missing in one or more sample(s) (default: '-')
-d <value> | --delimiter <value>
The character used for separating columns in the input files (default: '\t')
<input_tables> ...
Input tables to merge
This tool merges multiple tab-delimited files and outputs a single
tab delimited file whose columns are the feature IDs and a single
column from each input files.
Note that in each input file there must not be any duplicate features.
If there are, the tool will only keep one and discard the rest.
~~~
To run the tool use:
~~~
biopet tool MergeTables
~~~
## Output
# PrefixFastq
## Introduction
## Example
The help menu:
~~~
INFO [2017-03-22 10:07:55,583] [PrefixFastq$] - Start
Usage: PrefixFastq [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <file> | --input <file>
-o <file> | --output <file>
-s <prefix seq> | --seq <prefix seq>
~~~
To run the tool use:
~~~
biopet tool PrefixFastq
~~~
## Output
# SageCountFastq
## Introduction
## Example
The help menu:
~~~
Usage: SageCountFastq [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-I <file> | --input <file>
-o <file> | --output <file>
~~~
To run the tool use:
~~~
biopet tool SageCountFastq
~~~
## Output
# SeqStat
## Introduction
## Example
The help menu:
~~~
SeqStat - Summarize FastQ
Usage: SeqStat [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <fastq> | --fastq <fastq>
FastQ file to generate stats from
-o <json> | --output <json>
File to write output to, if not supplied output go to stdout
~~~
To run the tool use:
~~~
biopet tool SeqStat
~~~
## Output
# SquishBed
## Introduction
## Example
The help menu:
~~~
Usage: SquishBed [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-I <file> | --input <file>
-o <file> | --output <file>
-s | --strandSensitive
~~~
To run the tool use:
~~~
biopet tool SquishBed
~~~
## Output
# SummaryToTsv
## Introduction
## Example
The help menu:
~~~
Usage: SummaryToTsv [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-s <file> | --summary <file>
-o <file> | --outputFile <file>
-p <string> | --path <string>
String that determines the values extracted from the summary. Should be of the format:
<header_name>=<namespace>:<lower_namespace>:<even_lower_namespace>...
-m <root|sample|lib> | --mode <root|sample|lib>
Determines on what level to aggregate data.
root: at the root level
sample: at the sample level
lib: at the library level
~~~
To run the tool use:
~~~
biopet tool SummaryToTsv
~~~
## Output
# ValidateFastq
## Introduction
## Example
The help menu:
~~~
INFO [2017-03-22 10:08:01,355] [ValidateFastq$] - Start
Usage: ValidateFastq [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <file> | --fastq1 <file>
-j <file> | --fastq2 <file>
~~~
To run the tool use:
~~~
biopet tool ValidateFastq
~~~
## Output
# ValidateVcf
## Introduction
## Example
The help menu:
~~~
INFO [2017-03-22 10:08:02,092] [ValidateVcf$] - Start
Usage: ValidateVcf [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-i <file> | --inputVcf <file>
Vcf file to check
-R <file> | --reference <file>
Reference fasta to check vcf file against
--disableFail
Do not fail on error. The tool will still exit when encountering an error, but will do so with exit code 0
~~~
To run the tool use:
~~~
biopet tool ValidateVcf
~~~
## Output
# VcfWithVcf
## Introduction
## Example
The help menu:
~~~
INFO [2017-03-22 10:08:02,817] [VcfWithVcf$] - Init phase
Usage: VcfWithVcf [options]
-l <value> | --log_level <value>
Level of log information printed. Possible levels: 'debug', 'info', 'warn', 'error'
-h | --help
Print usage
-v | --version
Print version
-I <file> | --inputFile <file>
-o <file> | --outputFile <file>
-s <file> | --secondaryVcf <file>
-R <file> | --reference <file>
-f <field> or <input_field:output_field> or <input_field:output_field:method> | --field <field> or <input_field:output_field> or <input_field:output_field:method>
If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
By default we will return all values found for a given field.
For INFO fields with type R or A we will take the respective alleles present in the input file.
If a <method> is supplied, a method will be applied over the contents of the field.
In this case, all values will be considered.
The following methods are available:
- max : takes maximum of found value, only works for numeric (integer/float) fields
- min : takes minimum of found value, only works for numeric (integer/float) fields
- unique: takes only unique values
--match <Boolean>
Match alternative alleles; default true
~~~
To run the tool use:
~~~
biopet tool VcfWithVcf
~~~
## Output
......@@ -22,21 +22,37 @@ pages:
- Toucan (Annotation): 'pipelines/toucan.md'
- Tools:
- AnnotateVcfWithBed: 'tools/AnnotateVcfWithBed.md'
- SamplesTsvToConfig: 'tools/SamplesTsvToConfig.md'
- BedToInterval: 'tools/bedtointerval.md'
- BaseCounter: 'tools/BaseCounter.md'
- BastyGenerateFasta: 'tools/BastyGenerateFasta.md'
- BedToInterval: 'tools/bedtointerval.md'
- BedtoolsCoverageToCounts: 'tools/bedtoolscoveragetocounts.md'
- BedToInterval: 'tools/BedToInterval.md'
- BedtoolsCoverageToCounts: 'tools/BedtoolsCoverageToCounts.md'
- BiopetFlagstat: 'tools/BiopetFlagstat.md'
- CheckAllelesVcfInBam: 'tools/CheckAllelesVcfInBam.md'
- DownloadNcbiAssembly: 'tools/DownloadNcbiAssembly.md'
- ExtractAlignedFastq: 'tools/ExtractAlignedFastq.md'
- FastqFilter: 'tools/FastqFilter.md'
- FastqSplitter: 'tools/FastqSplitter.md'
- FastqSync: 'tools/FastqSync.md'
- FindOverlapMatch: 'tools/FindOverlapMatch.md'
- FindRepeatsPacBio: 'tools/FindRepeatsPacBio.md'
- GvcfToBed: 'tools/GvcfToBed.md'
- MergeAlleles: 'tools/MergeAlleles.md'
- MergeTables: 'tools/MergeTables.md'
- MpileupToVcf: 'tools/MpileupToVcf.md'
- Sagetools: 'tools/sagetools.md'
- WipeReads: 'tools/WipeReads.md'
- PrefixFastq: 'tools/PrefixFastq.md'
- SageCountFastq: 'tools/SageCountFastq.md'
- sagetools: 'tools/sagetools.md'
- SamplesTsvToConfig: 'tools/SamplesTsvToConfig.md'
- SeqStat: 'tools/SeqStat.md'
- SquishBed: 'tools/SquishBed.md'
- SummaryToTsv: 'tools/SummaryToTsv.md'
- ValidateFastq: 'tools/ValidateFastq.md'
- ValidateVcf: 'tools/ValidateVcf.md'
- VcfFilter: 'tools/VcfFilter.md'
- VcfToTsv: 'tools/VcfToTsv.md'
- VcfWithVcf: 'tools/VcfWithVcf.md'
- VepNormalizer: 'tools/VepNormalizer.md'
- WipeReads: 'tools/WipeReads.md'
- Release notes:
- 0.7.0: 'releasenotes/release_notes_0.7.0.md'
- 0.6.0: 'releasenotes/release_notes_0.6.0.md'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment