From 5d6917a827891b58f2e30c2ec16ca8c24a7dacd0 Mon Sep 17 00:00:00 2001
From: samnooij <s.nooij@lumc.nl>
Date: Tue, 17 May 2022 15:20:50 +0200
Subject: [PATCH] Add samples helper scripts

---
 README.md                          | 13 ++++++++++++
 bin/generate_sample_yaml.sh        | 10 ++++++++++
 bin/paste_samples_in_parameters.sh | 32 ++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)
 create mode 100644 bin/generate_sample_yaml.sh
 create mode 100644 bin/paste_samples_in_parameters.sh

diff --git a/README.md b/README.md
index 4c4b454..aaa008c 100644
--- a/README.md
+++ b/README.md
@@ -118,6 +118,19 @@ do
 done
 ```
 
+**Note: there are two helper scripts to automatically put sample**
+**names in the parameters.yaml file.**  
+These scripts assume that:  
+1. Your data files are stored in `data/raw/`  
+2. Your data files have names like {sample}-trimmed_R1.fastq.gz
+
+If those are both true for your data, use:
+
+```bash
+bash bin/generate_sample_yaml.sh
+bash bin/paste_samples_in_parameters.sh -f
+```
+
 Next is the location of your input files
 (assembled scaffolds as `.fasta`) and
 optional filename suffix:
diff --git a/bin/generate_sample_yaml.sh b/bin/generate_sample_yaml.sh
new file mode 100644
index 0000000..a86097b
--- /dev/null
+++ b/bin/generate_sample_yaml.sh
@@ -0,0 +1,10 @@
+#! /usr/bin/env bash
+
+# Generate a yaml file for the samples in data/raw
+echo "samples:" > config/samples.yaml
+
+for sample in data/raw/*_R1.fastq.gz
+do
+    name="$(basename -s "-trimmed_R1.fastq.gz" $sample)"
+    printf "  - ${name}\n"
+done >> config/samples.yaml
diff --git a/bin/paste_samples_in_parameters.sh b/bin/paste_samples_in_parameters.sh
new file mode 100644
index 0000000..91abca4
--- /dev/null
+++ b/bin/paste_samples_in_parameters.sh
@@ -0,0 +1,32 @@
+#! /usr/bin/env bash
+
+# If there is no 'samples:' line in config/parameters.yaml,
+# and there is a 'config/samples.yaml' file,
+# paste that into the config/parameters.yaml file.
+
+start_line=$(grep -n "input_directory:" config/parameters.yaml | cut -f 1 -d ':')
+
+function paste_samples_in_parameters() {
+    mv config/parameters.yaml config/parameters-backup.yaml
+    head -n 3 config/parameters-backup.yaml > config/parameters.yaml
+    cat config/samples.yaml >> config/parameters.yaml
+    echo >> config/parameters.yaml # add an empty newline below the sample list
+    tail -n +${start_line} config/parameters-backup.yaml >> config/parameters.yaml
+}
+
+if [[ $1 == "-f" ]] # if the -f option is provided ('force')
+then
+# Just do the pasting
+    paste_samples_in_parameters
+    echo "Force pasted samples in config/parameters.yaml" >&2
+else
+# First check to see if there is 'samples:' in the parameters file
+# and config/samples.yaml exists and is size > 0
+    grep -xq "samples:" config/parameters.yaml
+    [[ $? == 1 ]] && [ -s config/samples.yaml ]\
+     && ( paste_samples_in_parameters && echo "Pasted samples in config/parameters.yaml" ) >&2\
+     || echo "Either 'samples:' exists in config/parameters.yaml or there is no config/samples.yaml file" >&2
+fi
+
+# if there is no 'samples:' in parameters.yaml and the file samples.yaml exists,
+# the lower part executes (grep returns false, $? == 1)
-- 
GitLab