From ed71984b423615458671754c7f731a1b9bbe3256 Mon Sep 17 00:00:00 2001
From: Sander Bollen <a.h.b.bollen@lumc.nl>
Date: Mon, 26 Feb 2018 16:03:34 +0100
Subject: [PATCH] some docs on split_genome

---
 Snakefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Snakefile b/Snakefile
index f6b91ae..d5b3431 100644
--- a/Snakefile
+++ b/Snakefile
@@ -45,6 +45,14 @@ BASE_REFFLATS = [basename(x) for x in BEDS]
 
 
 def split_genome(ref, approx_n_chunks=100):
+    """
+    Split genome in chunks.
+
+    Chunks are strings in the format: `<ctg>:<start>-<end>`
+    These follow the region string format as used by htslib,
+    which uses _1_-based indexing.
+    See: http://www.htslib.org/doc/tabix.html
+    """
     fa = Fasta(ref)
     tot_size = sum([len(x) for x in fa.records.values()])
     chunk_size = tot_size//approx_n_chunks
-- 
GitLab