seqtk.py 1.35 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
"""
Little script from running seqtk with conda

Conda directives can't be used with a run directive,
so must be combined with script directive in stead.

This script assumes the following:
 - a `snakemake` object exists,
 - this object has the following attributes:
    - input: a list of two items:
        1. output of fastq-count as path to json file
        2. a fastq file to be sub-sampled
    - output: a list of one item containing path to output file
    - params: a list of one item containing the max number of bases
 - a `shell` function exists

This will _not_ work outside of a snakemake context.
"""
import json
Sander Bollen's avatar
Sander Bollen committed
20
from snakemake import shell
Sander Bollen's avatar
Sander Bollen committed
21
22
23
24
25


def subsample(json_path, fastq_path, opath, max_bases):
    with open(json_path) as handle:
        bases = json.load(handle)['bases']
Sander Bollen's avatar
Sander Bollen committed
26
    if max_bases == "" or max_bases is None:
Sander Bollen's avatar
Sander Bollen committed
27
        frac = 100
Sander Bollen's avatar
Sander Bollen committed
28
    else:
Sander Bollen's avatar
Sander Bollen committed
29
        frac = int(max_bases) / float(bases)
Sander Bollen's avatar
Sander Bollen committed
30

Sander Bollen's avatar
Sander Bollen committed
31
    if frac >= 1:
Sander Bollen's avatar
Sander Bollen committed
32
        cmd = "ln -s {0} {1}".format(fastq_path, opath)
Sander Bollen's avatar
Sander Bollen committed
33
    else:
Sander Bollen's avatar
Sander Bollen committed
34
        cmd = "seqtk sample -s100 {0} {1} | gzip -c > {2}".format(fastq_path,
Sander Bollen's avatar
Sander Bollen committed
35
                                                                  frac,
Sander Bollen's avatar
Sander Bollen committed
36
                                                                  opath)
Sander Bollen's avatar
Sander Bollen committed
37
38
    print("executing")
    print(cmd)
Sander Bollen's avatar
Sander Bollen committed
39
    shell(cmd)
Sander Bollen's avatar
Sander Bollen committed
40
41
42
43
44
45
46


subsample(snakemake.input[0], snakemake.input[1],
          snakemake.output[0], snakemake.params[0])