From ed71984b423615458671754c7f731a1b9bbe3256 Mon Sep 17 00:00:00 2001 From: Sander Bollen <a.h.b.bollen@lumc.nl> Date: Mon, 26 Feb 2018 16:03:34 +0100 Subject: [PATCH] some docs on split_genome --- Snakefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Snakefile b/Snakefile index f6b91ae..d5b3431 100644 --- a/Snakefile +++ b/Snakefile @@ -45,6 +45,14 @@ BASE_REFFLATS = [basename(x) for x in BEDS] def split_genome(ref, approx_n_chunks=100): + """ + Split genome in chunks. + + Chunks are strings in the format: `<ctg>:<start>-<end>` + These follow the region string format as used by htslib, + which uses _1_-based indexing. + See: http://www.htslib.org/doc/tabix.html + """ fa = Fasta(ref) tot_size = sum([len(x) for x in fa.records.values()]) chunk_size = tot_size//approx_n_chunks -- GitLab