Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
72985a2d
Commit
72985a2d
authored
Aug 11, 2014
by
Peter van 't Hof
Browse files
Removed unused scripts
parent
b0cd998e
Changes
4
Hide whitespace changes
Inline
Side-by-side
flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala
deleted
100644 → 0
View file @
b0cd998e
package
nl.lumc.sasc.biopet.pipelines.flexiprep.scripts
import
java.io.File
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
class
FastqcToQualtype
(
val
root
:
Configurable
)
extends
PythonCommandLineFunction
{
setPythonScript
(
"__init__.py"
,
"pyfastqc/"
)
setPythonScript
(
"qual_type_sickle.py"
)
@Input
(
doc
=
"Fastqc output"
,
shortName
=
"fastqc"
,
required
=
true
)
var
fastqc_output
:
File
=
_
@Output
(
doc
=
"Output file"
,
shortName
=
"out"
,
required
=
true
)
var
out
:
File
=
_
def
cmdLine
=
{
getPythonCommand
+
required
(
fastqc_output
.
getParent
())
+
" > "
+
required
(
out
)
}
}
flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala
deleted
100644 → 0
View file @
b0cd998e
package
nl.lumc.sasc.biopet.pipelines.flexiprep.scripts
import
java.io.File
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
class
Summarize
(
val
root
:
Configurable
)
extends
PythonCommandLineFunction
{
setPythonScript
(
"__init__.py"
,
"pyfastqc/"
)
setPythonScript
(
"summarize_flexiprep.py"
)
@Output
(
doc
=
"Output file"
,
shortName
=
"out"
,
required
=
true
)
var
out
:
File
=
_
var
samplea
:
String
=
_
var
sampleb
:
String
=
_
var
runDir
:
String
=
_
var
samplename
:
String
=
_
var
trim
:
Boolean
=
true
var
clip
:
Boolean
=
true
def
cmdLine
=
{
var
mode
:
String
=
""
if
(
clip
)
mode
+=
"clip"
if
(
trim
)
mode
+=
"trim"
if
(
mode
.
isEmpty
)
mode
=
"none"
getPythonCommand
+
optional
(
"--run-dir"
,
runDir
)
+
optional
(
"--sampleb"
,
sampleb
)
+
required
(
samplename
)
+
required
(
mode
)
+
required
(
samplea
)
+
required
(
out
)
}
}
flexiprep/src/main/scripts/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/qual_type_sickle.py
deleted
100755 → 0
View file @
b0cd998e
#!/usr/bin/env python
import
argparse
from
pyfastqc
import
load_from_dir
## CONSTANTS ##
# fastqc-sickle encoding name map
# from FastQC source
# (uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding.java)
# and sickle source (src/sickle.h)
FQ_SICKLE_ENC_MAP
=
{
'Sanger / Illumina 1.9'
:
(
'sanger'
,
33
),
'Illumina <1.3'
:
(
'solexa'
,
59
),
'Illumina 1.3'
:
(
'illumina'
,
64
),
'Illumina 1.5'
:
(
'illumina'
,
64
),
}
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'results_dir'
,
type
=
str
,
help
=
'Path to FastQC result directory file'
)
parser
.
add_argument
(
'--force'
,
type
=
str
,
help
=
'Force quality to use the given encoding'
)
args
=
parser
.
parse_args
()
fastqc
=
load_from_dir
(
args
.
results_dir
)
if
args
.
force
is
None
:
enc
,
offset
=
FQ_SICKLE_ENC_MAP
.
get
(
fastqc
.
basic_statistics
.
data
[
'Encoding'
])
else
:
enc
=
args
.
force
offset
=
[
x
[
1
]
for
x
in
FQ_SICKLE_ENC_MAP
.
values
()
if
x
[
0
]
==
enc
][
0
]
print
"{0}
\t
{1}"
.
format
(
enc
,
offset
)
flexiprep/src/main/scripts/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/summarize_flexiprep.py
deleted
100644 → 0
View file @
b0cd998e
#!/usr/bin/env python
"""
Summarize results from a Flexiprep run into a gzipped JSON file.
Requirements:
* Python == 2.7.x
Copyright (c) 2013 Wibowo Arindrarto <w.arindrarto@lumc.nl>
MIT License <http://opensource.org/licenses/MIT>
"""
RELEASE
=
False
__version_info__
=
(
'0'
,
'2'
,
)
__version__
=
'.'
.
join
(
__version_info__
)
__version__
+=
'-dev'
if
not
RELEASE
else
''
import
argparse
import
contextlib
import
json
import
os
import
re
from
datetime
import
datetime
def
fastqc2dict
(
run_dir
,
sample
,
name
,
data_fname
=
'fastqc_data.txt'
,
image_dname
=
'Images'
):
"""Summarize FastQC results into a single dictionary object."""
dirname
=
os
.
path
.
join
(
run_dir
,
sample
)
fqc_core_dir
=
os
.
path
.
join
(
dirname
,
os
.
walk
(
dirname
).
next
()[
1
][
0
])
fqc_dataf
,
fqc_imaged
=
None
,
None
for
p1
,
p2
,
p3
in
os
.
walk
(
fqc_core_dir
):
if
image_dname
in
p2
:
fqc_imaged
=
os
.
path
.
join
(
p1
,
image_dname
)
if
data_fname
in
p3
:
fqc_dataf
=
os
.
path
.
join
(
p1
,
data_fname
)
if
fqc_dataf
is
not
None
and
fqc_imaged
is
not
None
:
break
if
fqc_imaged
is
None
or
fqc_dataf
is
None
:
raise
ValueError
(
"Could not find data directory "
"and/or image directory in FastQC result."
)
d
=
{
'files'
:
{
'txt_fastqc_'
+
name
:
{
'checksum_sha1'
:
None
,
'path'
:
fqc_dataf
},
},
'stats'
:
{},
}
for
img
in
[
os
.
path
.
join
(
fqc_imaged
,
f
)
for
f
in
os
.
listdir
(
fqc_imaged
)]:
key
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
img
))[
0
]
d
[
'files'
][
'plot_'
+
key
+
'_'
+
name
]
=
{
'path'
:
os
.
path
.
abspath
(
img
),
'checksum_sha1'
:
None
,
}
return
d
def
clip2dict
(
sample
,
samplea
,
sampleb
,
lib_type
,
run_dir
):
re_aff
=
re
.
compile
(
r
'\s*Trimmed reads:\s*(\d+)'
)
re_short
=
re
.
compile
(
r
'\s*Too short reads:\s*(\d+)'
)
re_long
=
re
.
compile
(
r
'\s*Too long reads:\s*(\d+)'
)
re_disc_1
=
re
.
compile
(
r
'Filtered (\d+) .+ first'
)
re_disc_2
=
re
.
compile
(
r
'Filtered (\d+) reads from second'
)
re_kept
=
re
.
compile
(
r
'Synced read .+ (\d+) reads.'
)
@
contextlib
.
contextmanager
def
open_result
(
*
toks
):
with
open
(
os
.
path
.
join
(
run_dir
,
*
toks
))
as
src
:
yield
src
def
get_re_adp
(
seq
):
return
re
.
compile
(
"Adapter.+'{0}'.+trimmed (\d+) times."
.
format
(
seq
))
def
get_adapters
(
read_pair
):
adapters
=
{}
with
open_result
(
read_pair
+
'.contams.txt'
)
as
src
:
for
line
in
src
:
name
,
seq
=
line
.
strip
().
split
(
'
\t
'
)
# None is placeholder for trimming count
adapters
[
name
]
=
[
seq
,
None
]
return
adapters
def
get_cutadapt_stats
(
read_pair
,
adapter_dict
):
stats
=
{}
stat_file
=
read_pair
+
'.clip.stats'
try
:
with
open_result
(
stat_file
)
as
src
:
cutadapt_txt
=
src
.
read
()
except
IOError
:
# file does not exist, no adapter contamination found
stats
[
'num_reads_discarded'
]
=
0
stats
[
'num_reads_affected'
]
=
0
return
stats
short_count
=
int
(
re
.
search
(
re_short
,
cutadapt_txt
).
group
(
1
))
long_count
=
int
(
re
.
search
(
re_long
,
cutadapt_txt
).
group
(
1
))
stats
[
'num_reads_discarded'
]
=
short_count
+
long_count
stats
[
'num_reads_affected'
]
=
int
(
re
.
search
(
re_aff
,
cutadapt_txt
).
group
(
1
))
for
key
in
adapter_dict
:
re_adp
=
get_re_adp
(
adapter_dict
[
key
][
0
])
adapter_dict
[
key
][
1
]
=
int
(
re
.
search
(
re_adp
,
cutadapt_txt
).
group
(
1
))
return
stats
def
get_sync_stats
(
sample
):
stats
=
{}
with
open_result
(
sample
+
'.clip.sync.stats'
)
as
src
:
sync_txt
=
src
.
read
()
stats
[
'num_reads_discarded_1'
]
=
int
(
re
.
search
(
re_disc_1
,
sync_txt
).
group
(
1
))
stats
[
'num_reads_discarded_2'
]
=
int
(
re
.
search
(
re_disc_2
,
sync_txt
).
group
(
1
))
stats
[
'num_reads_kept'
]
=
int
(
re
.
search
(
re_kept
,
sync_txt
).
group
(
1
))
stats
[
'num_reads_discarded_total'
]
=
stats
[
'num_reads_discarded_1'
]
+
\
stats
[
'num_reads_discarded_2'
]
return
stats
clip_stats
=
{
'fastq_1'
:
{}}
clip_stats
[
'fastq_1'
][
'adapters'
]
=
get_adapters
(
samplea
)
clip_stats
[
'fastq_1'
].
update
(
get_cutadapt_stats
(
samplea
,
clip_stats
[
'fastq_1'
][
'adapters'
]))
if
lib_type
==
'paired'
:
clip_stats
[
'fastq_2'
]
=
{}
clip_stats
[
'fastq_2'
][
'adapters'
]
=
get_adapters
(
sampleb
)
clip_stats
[
'fastq_2'
].
update
(
get_cutadapt_stats
(
sampleb
,
clip_stats
[
'fastq_2'
][
'adapters'
]))
clip_stats
[
'sync'
]
=
get_sync_stats
(
sample
)
else
:
clip_stats
[
'sync'
]
=
{}
return
clip_stats
def
sickle2dict
(
run_name
,
qc_mode
,
lib_type
,
run_dir
):
trim_stats
=
{}
if
qc_mode
==
'trim'
:
stat_mark
=
'.trim.stats'
else
:
if
lib_type
==
'paired'
:
stat_mark
=
'.clip.sync.trim.stats'
else
:
stat_mark
=
'.clip.trim.stats'
if
lib_type
==
'paired'
:
re_paired_kept
=
re
.
compile
(
r
'paired records kept: \d+ \((\d+) pairs\)'
)
re_disc
=
re
.
compile
(
r
'single records discarded: \d+ \(from PE1: (\d+), from PE2: (\d+)\)'
)
re_disc_paired
=
re
.
compile
(
r
'paired records discarded: \d+ \((\d+) pairs\)'
)
with
open
(
os
.
path
.
join
(
run_dir
,
run_name
+
stat_mark
))
as
src
:
sickle_txt
=
src
.
read
()
discarda
=
int
(
re
.
search
(
re_disc
,
sickle_txt
).
group
(
1
))
discardb
=
int
(
re
.
search
(
re_disc
,
sickle_txt
).
group
(
2
))
discard_both
=
int
(
re
.
search
(
re_disc_paired
,
sickle_txt
).
group
(
1
))
trim_stats
[
'num_reads_kept'
]
=
int
(
re
.
search
(
re_paired_kept
,
sickle_txt
).
group
(
1
))
trim_stats
[
'num_reads_discarded_1'
]
=
discarda
trim_stats
[
'num_reads_discarded_2'
]
=
discardb
trim_stats
[
'num_reads_discarded_both'
]
=
discard_both
trim_stats
[
'num_reads_discarded_total'
]
=
discarda
+
discardb
+
discard_both
else
:
re_kept
=
re
.
compile
(
r
'records kept: (\d+)'
)
re_disc
=
re
.
compile
(
r
'records discarded: (\d+)'
)
with
open
(
os
.
path
.
join
(
run_dir
,
run_name
+
stat_mark
))
as
src
:
sickle_txt
=
src
.
read
()
trim_stats
[
'num_reads_kept'
]
=
int
(
re
.
search
(
re_kept
,
sickle_txt
).
group
(
1
))
trim_stats
[
'num_reads_discarded_total'
]
=
int
(
re
.
search
(
re_disc
,
sickle_txt
).
group
(
1
))
return
trim_stats
def
dict2json
(
d_input
,
f_out
):
"""Dump the given dictionary as a JSON file."""
with
open
(
f_out
,
'w'
)
as
target
:
json
.
dump
(
d_input
,
target
,
sort_keys
=
True
,
indent
=
4
,
separators
=
(
','
,
': '
))
def
summarize_flexiprep
(
run_name
,
qc_mode
,
samplea
,
sampleb
,
outf
,
run_dir
):
def
load_json
(
fname
):
with
open
(
os
.
path
.
join
(
run_dir
,
fname
),
'r'
)
as
target
:
return
json
.
load
(
target
)
def
load_chksum
(
fname
):
with
open
(
os
.
path
.
join
(
run_dir
,
fname
),
'r'
)
as
src
:
return
src
.
readline
().
strip
().
split
()
def
gc_from_fastqc
(
fname
):
# other quick statistics
with
open
(
fname
,
'r'
)
as
src
:
return
int
([
x
for
x
in
src
if
x
.
startswith
(
'%GC'
)].
pop
().
split
(
'
\t
'
)[
1
])
sumd
=
{
'_meta'
:
{
'module'
:
'flexiprep'
,
'run_name'
:
run_name
,
'run_time'
:
datetime
.
utcnow
().
isoformat
(),
},
'stats'
:
{
'qc_mode'
:
qc_mode
,
},
'dirs'
:
{
'run'
:
run_dir
,
},
'files'
:
{
},
}
if
sampleb
is
None
:
lib_type
=
'single'
else
:
lib_type
=
'paired'
sumd
[
'stats'
][
'lib_type'
]
=
lib_type
# gather checksum files
chksums
=
[
c
for
c
in
os
.
listdir
(
run_dir
)
if
c
.
endswith
(
'.sha1'
)]
# gather fastqc directories
fastqcs
=
[
d
for
d
in
os
.
listdir
(
run_dir
)
if
d
.
endswith
(
'.fastqc'
)]
# gather seqstat files
sstats
=
[
s
for
s
in
os
.
listdir
(
run_dir
)
if
s
.
endswith
(
'.seqstats.json'
)]
stat_mark
=
""
if
lib_type
==
'paired'
:
if
qc_mode
==
'clip'
:
stat_mark
=
'.clip.sync'
elif
qc_mode
==
'trim'
:
stat_mark
=
'.trim'
elif
qc_mode
==
'cliptrim'
:
stat_mark
=
'.clip.sync.trim'
else
:
assert
qc_mode
==
'none'
else
:
if
qc_mode
==
'clip'
:
stat_mark
=
'.clip'
elif
qc_mode
==
'trim'
:
stat_mark
=
'.trim'
elif
qc_mode
==
'cliptrim'
:
stat_mark
=
'.clip.trim'
else
:
assert
qc_mode
==
'none'
if
lib_type
==
'single'
:
if
qc_mode
==
'none'
:
assert
len
(
chksums
)
==
len
(
fastqcs
)
==
len
(
sstats
)
==
1
chksum_r1
=
load_chksum
(
chksums
[
0
])
sumd
[
'files'
][
'fastq_raw_1'
]
=
{
'checksum_sha1'
:
chksum_r1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r1
[
1
]))}
fqd_r1
=
fastqc2dict
(
args
.
run_dir
,
fastqcs
[
0
],
'raw_1'
)
seqstat_r1
=
load_json
(
sstats
[
0
])[
'stats'
]
seqstat_r1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r1
[
'files'
][
'txt_fastqc_raw_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_1'
]
=
seqstat_r1
for
k
in
(
'files'
,
'stats'
):
sumd
[
k
].
update
(
fqd_r1
[
k
])
else
:
assert
len
(
chksums
)
==
len
(
fastqcs
)
==
len
(
sstats
)
==
2
fqp1_name
=
[
x
for
x
in
fastqcs
if
x
.
endswith
(
'.qc.fastqc'
)][
0
]
chksum_p1
=
load_chksum
([
x
for
x
in
chksums
if
x
.
endswith
(
'.qc.sha1'
)][
0
])
sumd
[
'files'
][
'fastq_proc_1'
]
=
{
'checksum_sha1'
:
chksum_p1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_p1
[
1
]))}
fqd_p1
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
fastqcs
if
x
.
endswith
(
'.qc.fastqc'
)][
0
],
'proc_1'
)
seqstat_p1
=
load_json
([
x
for
x
in
sstats
if
x
.
endswith
(
stat_mark
+
'.seqstats.json'
)][
0
])[
'stats'
]
seqstat_p1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_p1
[
'files'
][
'txt_fastqc_proc_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_proc_1'
]
=
seqstat_p1
chksum_r1
=
load_chksum
([
x
for
x
in
chksums
if
not
x
.
endswith
(
'.qc.sha1'
)][
0
])
sumd
[
'files'
][
'fastq_raw_1'
]
=
{
'checksum_sha1'
:
chksum_r1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r1
[
1
]))}
fqd_r1
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
fastqcs
if
x
!=
fqp1_name
][
0
],
'raw_1'
)
seqstat_r1
=
load_json
([
x
for
x
in
sstats
if
not
x
.
endswith
(
stat_mark
+
'.seqstats.json'
)][
0
])[
'stats'
]
seqstat_r1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r1
[
'files'
][
'txt_fastqc_raw_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_1'
]
=
seqstat_r1
for
k
in
(
'files'
,
'stats'
):
sumd
[
k
].
update
(
fqd_p1
[
k
])
sumd
[
k
].
update
(
fqd_r1
[
k
])
else
:
if
qc_mode
==
'none'
:
assert
len
(
chksums
)
==
len
(
fastqcs
)
==
len
(
sstats
)
==
2
chksum_r1
=
load_chksum
([
x
for
x
in
chksums
if
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_raw_1'
]
=
{
'checksum_sha1'
:
chksum_r1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r1
[
1
]))}
fqd_r1
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
fastqcs
if
x
.
startswith
(
samplea
)][
0
],
'raw_1'
)
seqstat_r1
=
load_json
([
x
for
x
in
sstats
if
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_r1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r1
[
'files'
][
'txt_fastqc_raw_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_1'
]
=
seqstat_r1
chksum_r2
=
load_chksum
([
x
for
x
in
chksums
if
not
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_raw_2'
]
=
{
'checksum_sha1'
:
chksum_r2
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r2
[
1
]))}
fqd_r2
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
fastqcs
if
not
x
.
startswith
(
samplea
)][
0
],
'raw_2'
)
seqstat_r2
=
load_json
([
x
for
x
in
sstats
if
not
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_r2
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r2
[
'files'
][
'txt_fastqc_raw_2'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_2'
]
=
seqstat_r2
for
k
in
(
'files'
,
'stats'
):
sumd
[
k
].
update
(
fqd_r1
[
k
])
sumd
[
k
].
update
(
fqd_r2
[
k
])
else
:
assert
len
(
fastqcs
)
==
len
(
sstats
)
==
4
proc_chksums
=
[
x
for
x
in
chksums
if
x
.
endswith
(
'.qc.sha1'
)]
proc_fastqcs
=
[
x
for
x
in
fastqcs
if
x
.
endswith
(
'.qc.fastqc'
)]
proc_sstats
=
[
x
for
x
in
sstats
if
x
.
endswith
(
stat_mark
+
'.seqstats.json'
)]
raw_chksums
=
[
x
for
x
in
chksums
if
x
not
in
proc_chksums
]
raw_fastqcs
=
[
x
for
x
in
fastqcs
if
x
not
in
proc_fastqcs
]
raw_sstats
=
[
x
for
x
in
sstats
if
x
not
in
proc_sstats
]
chksum_r1
=
load_chksum
([
x
for
x
in
raw_chksums
if
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_raw_1'
]
=
{
'checksum_sha1'
:
chksum_r1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r1
[
1
]))}
fqd_r1
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
raw_fastqcs
if
x
.
startswith
(
samplea
)][
0
],
'raw_1'
)
seqstat_r1
=
load_json
([
x
for
x
in
raw_sstats
if
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_r1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r1
[
'files'
][
'txt_fastqc_raw_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_1'
]
=
seqstat_r1
chksum_r2
=
load_chksum
([
x
for
x
in
raw_chksums
if
not
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_raw_2'
]
=
{
'checksum_sha1'
:
chksum_r2
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_r2
[
1
]))}
fqd_r2
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
raw_fastqcs
if
not
x
.
startswith
(
samplea
)][
0
],
'raw_2'
)
seqstat_r2
=
load_json
([
x
for
x
in
raw_sstats
if
not
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_r2
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_r2
[
'files'
][
'txt_fastqc_raw_2'
][
'path'
])
sumd
[
'stats'
][
'fastq_raw_2'
]
=
seqstat_r2
chksum_p1
=
load_chksum
([
x
for
x
in
proc_chksums
if
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_proc_1'
]
=
{
'checksum_sha1'
:
chksum_p1
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_p1
[
1
]))}
fqd_p1
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
proc_fastqcs
if
x
.
startswith
(
samplea
)][
0
],
'proc_1'
)
seqstat_p1
=
load_json
([
x
for
x
in
proc_sstats
if
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_p1
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_p1
[
'files'
][
'txt_fastqc_proc_1'
][
'path'
])
sumd
[
'stats'
][
'fastq_proc_1'
]
=
seqstat_p1
chksum_p2
=
load_chksum
([
x
for
x
in
proc_chksums
if
not
x
.
startswith
(
samplea
)][
0
])
sumd
[
'files'
][
'fastq_proc_2'
]
=
{
'checksum_sha1'
:
chksum_p2
[
0
],
'path'
:
os
.
path
.
join
(
args
.
run_dir
,
os
.
path
.
basename
(
chksum_p2
[
1
]))}
fqd_p2
=
fastqc2dict
(
args
.
run_dir
,
[
x
for
x
in
proc_fastqcs
if
not
x
.
startswith
(
samplea
)][
0
],
'proc_2'
)
seqstat_p2
=
load_json
([
x
for
x
in
proc_sstats
if
not
x
.
startswith
(
samplea
)][
0
])[
'stats'
]
seqstat_p2
[
'mean_gc'
]
=
gc_from_fastqc
(
fqd_p2
[
'files'
][
'txt_fastqc_proc_2'
][
'path'
])
sumd
[
'stats'
][
'fastq_proc_2'
]
=
seqstat_p2
for
k
in
(
'files'
,
'stats'
):
for
fqd
in
(
fqd_r1
,
fqd_r2
,
fqd_p1
,
fqd_p2
):
sumd
[
k
].
update
(
fqd
[
k
])
if
'clip'
in
qc_mode
:
sumd
[
'stats'
][
'clip'
]
=
clip2dict
(
run_name
,
samplea
,
sampleb
,
lib_type
,
run_dir
)
if
'trim'
in
qc_mode
:
sumd
[
'stats'
][
'trim'
]
=
sickle2dict
(
run_name
,
qc_mode
,
lib_type
,
run_dir
)
dict2json
(
sumd
,
outf
)
if
__name__
==
'__main__'
:
usage
=
__doc__
.
split
(
'
\n\n\n
'
)
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
usage
[
0
],
epilog
=
usage
[
1
])
parser
.
add_argument
(
'run_name'
,
type
=
str
,
help
=
'FastQC run name'
)
parser
.
add_argument
(
'qc_mode'
,
type
=
str
,
choices
=
[
'clip'
,
'cliptrim'
,
'trim'
,
'none'
],
help
=
'Flexiprep QC mode'
)
parser
.
add_argument
(
'samplea'
,
type
=
str
,
help
=
'Sample A name.'
)
parser
.
add_argument
(
'--sampleb'
,
type
=
str
,
help
=
'Sample B name.'
)