Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Klinische Genetica
capture-lumc
hutspot
Commits
a191f581
Commit
a191f581
authored
Feb 26, 2018
by
Sander Bollen
Browse files
write seqtk as bash script
parent
9256dcbf
Changes
4
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
a191f581
...
...
@@ -26,6 +26,7 @@ def fsrc_dir(*args):
covpy = fsrc_dir("src", "covstats.py")
colpy = fsrc_dir("src", "collect_stats.py")
mpy = fsrc_dir("src", "merge_stats.py")
seq = fsrc_dir("src", "seqtk.sh")
if FASTQ_COUNT is None:
fqc = "python {0}".format(fsrc_dir("src", "fastq-count.py"))
...
...
@@ -144,26 +145,28 @@ rule seqtk_r1:
"""Either subsample or link forward fastq file"""
input:
stats=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
fastq=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
seqtk=seq
params:
max_bases=MAX_BASES
output:
fastq=temp(out_path("{sample}/pre_process/{sample}.sampled_R1.fastq.gz"))
conda: "envs/seqtk.yml"
s
cript: "src/seqtk.py
"
s
hell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} {params.max_bases}
"
rule seqtk_r2:
"""Either subsample or link reverse fastq file"""
input:
stats = out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
fastq = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"),
seqtk=seq
params:
max_bases = MAX_BASES
output:
fastq = temp(out_path("{sample}/pre_process/{sample}.sampled_R2.fastq.gz"))
conda: "envs/seqtk.yml"
s
cript: "src/seqtk.py
"
s
hell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} {params.max_bases}
"
# contains original merged fastq files as input to prevent them from being prematurely deleted
...
...
envs/seqtk.yml
View file @
a191f581
...
...
@@ -6,4 +6,6 @@ channels:
-
r
dependencies
:
-
seqtk=1.2=0
-
zlib=1.2.11=0
-
bc=1.06=0
-
sed=4.4=1
-
zlib=1.2.11=0
\ No newline at end of file
src/seqtk.py
deleted
100644 → 0
View file @
9256dcbf
"""
Little script from running seqtk with conda
Conda directives can't be used with a run directive,
so must be combined with script directive in stead.
This script assumes the following:
- a `snakemake` object exists,
- this object has the following attributes:
- input: a list of two items:
1. output of fastq-count as path to json file
2. a fastq file to be sub-sampled
- output: a list of one item containing path to output file
- params: a list of one item containing the max number of bases
- a `shell` function exists
This will _not_ work outside of a snakemake context.
"""
import
json
from
snakemake
import
shell
def
subsample
(
json_path
,
fastq_path
,
opath
,
max_bases
):
with
open
(
json_path
)
as
handle
:
bases
=
json
.
load
(
handle
)[
'bases'
]
if
max_bases
==
""
or
max_bases
is
None
:
frac
=
100
else
:
frac
=
int
(
max_bases
)
/
float
(
bases
)
if
frac
>=
1
:
cmd
=
"ln -s {0} {1}"
.
format
(
fastq_path
,
opath
)
else
:
cmd
=
"seqtk sample -s100 {0} {1} | gzip -c > {2}"
.
format
(
fastq_path
,
frac
,
opath
)
print
(
"executing"
)
print
(
cmd
)
shell
(
cmd
)
subsample
(
snakemake
.
input
[
0
],
snakemake
.
input
[
1
],
snakemake
.
output
[
0
],
snakemake
.
params
[
0
])
src/seqtk.sh
0 → 100644
View file @
a191f581
#!/usr/bin/env bash
count_json
=
${
1
}
input_fastq
=
${
2
}
output_fastq
=
${
3
}
max_bases
=
${
4
}
bases
=
$(
jq
'.bases'
$count_json
)
frac
=
$(
jq
-n
"
$max_bases
/
$bases
"
|
sed
-e
"s:e:E:g"
)
echo
$frac
if
((
$(
echo
"
$frac
> 1"
| bc
-l
)
))
;
then
ln
-s
$input_fastq
$output_fastq
else
seqtk sample
-s100
$frac
$input_fastq
|
gzip
-c
>
$output_fastq
fi
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment