Commit 4a0dc089 authored by van den Berg's avatar van den Berg
Browse files

Merge branch 'devel' into 'master'

Merge development into master

See merge request !7
parents 86c18017 2c1d312f
Pipeline #3879 failed with stages
in 32 minutes and 23 seconds
......@@ -28,7 +28,6 @@ config-cluster.yml
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
......
from snakemake.remote.NCBI import RemoteProvider as NCBIRemoteProvider
import os
NCBI = NCBIRemoteProvider(email='RedmarvandenBerg@lumc.nl')
output_dir = '/tmp/deps'
temp_dir = '/tmp/temp'
#### SETTINGS ####
ctat_resource = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_pre-StarFv1.3/GRCh38_gencode_v26_CTAT_lib_July192017.source_data.tar.gz'
ctat_resource_tar_gz = ctat_resource.split('/')[-1]
ctat_resource_folder = ctat_resource_tar_gz.split('.')[0]
gsnap = 'http://research-pub.gene.com/gmap/src/gmap-gsnap-2014-12-31.v2.tar.gz'
gsnap_tar_gz = gsnap.split('/')[-1]
gsnap_folder = 'gmap-2014-12-31'
gmap_index = 'ftp.ncbi.nlm.nih.gov/genomes/archive/old_genbank/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz'
gmap_gz = gmap_index.split('/')[-1]
# GRCH38_no_alt_analysis_set, see the readme for details
# https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_genbank/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/README_ANALYSIS_SETS
GRCH38_no_alt_analysis_set_url = 'ftp.ncbi.nlm.nih.gov/genomes/archive/old_genbank/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines'
GRCH38_no_alt_analysis_set_filename = os.path.join(GRCH38_no_alt_analysis_set_url, 'GCA_000001405.15_GRCh38_no_alt_analysis_set')
GRCH38_no_alt_analysis_set_fasta_gz = f'{GRCH38_no_alt_analysis_set_filename}.fna.gz'
GRCH38_no_alt_analysis_set_fasta = f'{GRCH38_no_alt_analysis_set_filename}.fna'
GRCH38_no_alt_analysis_set_fai = f'{GRCH38_no_alt_analysis_set_filename}.fna.fai'
GRCH38_no_alt_analysis_set_dict = f'{GRCH38_no_alt_analysis_set_filename}.dict'
#### RULES ####
rule all:
input:
#f'{output_dir}/FLT3.fasta',
#f'{output_dir}/KMT2A.fasta',
f'{output_dir}/{ctat_resource_folder}',
#f'{output_dir}/{gsnap_folder}',
f'{output_dir}/{gsnap_folder}/bin/gsnap',
#f'{output_dir}/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna'
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta_gz)}',
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fai)}',
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta)}',
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_dict)}'
rule download_flt3:
input:
NCBI.remote('NM_004119.2.fasta', db='nuccore')
output:
f'{output_dir}/FLT3.fasta'
run:
shell('sleep 5s; cp {input} {output}')
rule download_kmt2a:
input:
NCBI.remote('NM_001197104.1.fasta', db='nuccore')
output:
f'{output_dir}/KMT2A.fasta'
run:
shell('sleep 5s; cp {input} {output}')
rule download_genome_star_fusion_lib:
params:
ctat_resource
output:
f'{temp_dir}/GRCh38_gencode_v26_CTAT_lib_July192017.source_data.tar.gz'
run:
shell('mkdir -p {temp_dir} && wget {params} --output-document={output}')
rule unpack_genome_star_fusion_lib:
input:
f'{temp_dir}/{ctat_resource_tar_gz}'
output:
f'{output_dir}/{ctat_resource_folder}'
run:
shell('tar -zxvf {input} -C {output_dir}')
rule download_SIMD_gsnap:
params:
gsnap
output:
f'{temp_dir}/{gsnap_tar_gz}'
run:
shell('mkdir -p {temp_dir} && wget {params} --output-document={output}')
rule unpack_SIMD_gsnap:
input:
f'{temp_dir}/{gsnap_tar_gz}'
output:
f'{output_dir}/{gsnap_folder}'
run:
shell('tar -zxvf {input} -C {output_dir}')
rule compile_SIMD_gsnap:
input:
f'{output_dir}/{gsnap_folder}'
output:
f'{output_dir}/{gsnap_folder}/bin/gsnap'
run:
shell('cd {input} && ./configure --prefix {input} && make && make install')
rule download_gmap_index:
params:
gmap_index
output:
f'{temp_dir}/{gmap_gz}'
run:
shell('mkdir -p {temp_dir} && wget {params} --output-document={output}')
rule build_gmap_index:
input:
gzip = f'{temp_dir}/{gmap_gz}',
gmap_bin = f'{output_dir}/{gsnap_folder}/bin'
output:
f'{output_dir}/GCA_000001405.15_GRCh38_no_alt_analysis_set.sources'
run:
shell('{input.gmap_bin}/gmap_build -D {output_dir} -d GCA_000001405.15_GRCh38_no_alt_analysis_set -g {input.gzip} -B {input.gmap_bin}')
rule download_GRCH38_no_alt_fasta:
params:
GRCH38_no_alt_analysis_set_fasta_gz
output:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta_gz)}'
run:
shell('wget {params} --output-document={output}')
rule unpack_GRCH38_no_alt:
input:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta_gz)}'
output:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta)}'
run:
shell('gunzip --decompress --keep --to-stdout {input} > {output}')
rule download_GRCH38_no_alt_fai:
params:
GRCH38_no_alt_analysis_set_fai
output:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fai)}'
run:
shell('wget {params} --output-document={output}')
rule picard_index_GRCH38_no_alt:
input:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_fasta)}'
output:
f'{output_dir}/{os.path.basename(GRCH38_no_alt_analysis_set_dict)}'
conda:
srcdir('includes/snv-indels/envs/align_vars.yml')
shell:
'picard CreateSequenceDictionary R={input} O={output}'
......@@ -7,6 +7,6 @@ channels:
- conda-forge
- defaults
dependencies:
- pytest-workflow
- pytest-workflow>=1.4.0
- python>3.6
- snakemake=5.9
name: create_summary
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- ca-certificates=2018.03.07
- certifi=2018.1.18
- crimson=0.3.0
- click=6.7
- libgcc-ng=7.2.0
- openssl=1.0.2o
- pip=9.0.3
- python=3.6.2
- pyyaml=3.12
- readline=6.2
- setuptools=39.0.1
- sqlite=3.13.0
- tk=8.5.18
- wheel=0.31.0
- xz=5.2.3
- yaml=0.1.7
- zlib=1.2.11
name: calc_ratio
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- click=6.7
- pandas=0.20.3
- pip=9.0.1
- python=3.6.2
- mkl=2017.0.3
- numpy=1.13.1
- openssl=1.0.2l
- python-dateutil=2.6.1
- pytz=2017.2
- readline=6.2
- setuptools=27.2.0
- six=1.10.0
- sqlite=3.13.0
- tk=8.5.18
- wheel=0.29.0
- xz=5.2.3
- zlib=1.2.11
name: count_bases
channels:
- bioconda
- r
- defaults
- conda-forge
dependencies:
- bedtools=2.17.0
- r=3.3.2
- r-getopt=1.20.0
- python=2.7.13
- pip=9.0.1
- bzip2=1.0.6
- cairo=1.14.8
- curl=7.54.1
- fontconfig=2.12.1
- freetype=2.5.5
- glib=2.50.2
- gsl=2.2.1
- harfbuzz=0.9.39
- icu=54.1
- jbig=2.1
- jpeg=9b
- krb5=1.13.2
- libffi=3.2.1
- libgcc=5.2.0
- libiconv=1.14
- libpng=1.6.27
- libssh2=1.8.0
- libtiff=4.0.6
- libxml2=2.9.4
- ncurses=5.9
- openssl=1.0.2l
- pango=1.40.3
- pcre=8.39
- pixman=0.34.0
- r-base=3.3.2
- r-boot=1.3_18
- r-class=7.3_14
- r-cluster=2.0.5
- r-codetools=0.2_15
- r-foreign=0.8_67
- r-kernsmooth=2.23_15
- r-lattice=0.20_34
- r-mass=7.3_45
- r-matrix=1.2_7.1
- r-mgcv=1.8_16
- r-nlme=3.1_128
- r-nnet=7.3_12
- r-recommended=3.3.2
- r-rpart=4.1_10
- r-spatial=7.3_11
- r-survival=2.40_1
- readline=6.2
- setuptools=27.2.0
- sqlite=3.13.0
- tk=8.5.18
- wheel=0.29.0
- xz=5.2.3
- zlib=1.2.11
name: count_fragments
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- python=2.7.13
- bcftools=1.5
- pysam=0.11.2.2
- bzip2=1.0.6
- cairo=1.14.8
- curl=7.54.1
- cycler=0.10.0
- dbus=1.10.20
- expat=2.1.0
- fontconfig=2.12.1
- freetype=2.5.5
- functools32=3.2.3.2
- glib=2.50.2
- gst-plugins-base=1.8.0
- gstreamer=1.8.0
- htslib=1.5
- icu=54.1
- jpeg=9b
- krb5=1.13.2
- libffi=3.2.1
- libgcc=5.2.0
- libiconv=1.14
- libpng=1.6.27
- libssh2=1.8.0
- libxcb=1.12
- libxml2=2.9.4
- matplotlib=2.0.2
- mkl=2017.0.3
- numpy=1.13.1
- openssl=1.0.2l
- pcre=8.39
- pip=9.0.1
- pixman=0.34.0
- pycairo=1.10.0
- pyparsing=2.2.0
- pyqt=5.6.0
- python-dateutil=2.4.1
- pytz=2017.2
- qt=5.6.2
- readline=6.2
- samtools=1.5
- setuptools=27.2.0
- sip=4.18
- six=1.10.0
- sqlite=3.13.0
- subprocess32=3.2.7
- tk=8.5.18
- wheel=0.29.0
- xz=5.2.3
- zlib=1.2.11
- pip:
- htseq==0.6.1p1
name: idsort_aln
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- picard=1.141
- java-jdk=8.0.92
name: combine_plots
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- click=6.7
- libxml2=2.9.4
- libxslt=1.1.29
- pip=9.0.1
- python=3.5.3
- libiconv=1.14
- openssl=1.0.2l
- readline=6.2
- setuptools=27.2.0
- sqlite=3.13.0
- tk=8.5.18
- wheel=0.29.0
- xz=5.2.2
- zlib=1.2.8
- pip:
- svgutils==0.2.0
- lxml==3.8.0
name: fusioncatcher
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- fusioncatcher=1.00
- fusioncatcher-seqtk=1.2
- bbmap=38.49
- biopython=1.68
- blas=1.0
- blat=35
- bowtie=1.2.0
- bowtie2=2.3.0
- bwa=0.7.12
- ca-certificates=2019.1.23
- certifi=2019.3.9
- et_xmlfile=1.0.1
- freetype=2.9.1
- intel-openmp=2019.3
- java-jdk=8.0.92
- jdcal=1.4.1
- jpeg=9b
- libedit=3.1.20181209
- libffi=3.2.1
- libgcc=7.2.0
- libgcc-ng=8.2.0
- libgfortran-ng=7.3.0
- libpng=1.6.37
- libstdcxx-ng=8.2.0
- libtiff=4.0.10
- libuuid=1.0.3
- lzo=2.10
- lzop=1.04
- mkl=2018.0.3
- mmtf-python=1.0.2
- msgpack-python=0.6.1
- mysql-connector-c=6.1.6
- ncurses=6.1
- numpy=1.13.1
- oases=0.2.09
- olefile=0.46
- openjdk=8.0.152
- openpyxl=2.5.0a2
- openssl=1.1.1b
- parallel=20171222
- perl=5.26.0
- perl-threaded=5.26.0
- picard=2.10.6
- pigz=2.3
- pillow=6.0.0
- pip=19.1.1
- python=2.7.16
- readline=7.0
- reportlab=3.5.19
- samtools=0.1.19
- setuptools=41.0.1
- sqlite=3.28.0
- sra-tools=2.6.2
- star=2.5.2b
- system=5.8
- tbb=2019.4
- tk=8.6.8
- ucsc-fatotwobit=357
- ucsc-liftover=357
- velvet=1.2.10
- wheel=0.33.4
- xlrd=1.0.0
- xz=5.2.4
- zip=3.0
- zlib=1.2.11
- zstd=1.3.7
name: intersect_fusions
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- pip=9.0.1
- python=2.7
- grep=2.14
- fuma=3.0.5
- bcftools=1.5
- bzip2=1.0.6
- cairo=1.14.8
- curl=7.54.1
- cycler=0.10.0
- dbus=1.10.20
- expat=2.1.0
- fontconfig=2.12.1
- freetype=2.5.5
- functools32=3.2.3.2
- glib=2.50.2
- gst-plugins-base=1.8.0
- gstreamer=1.8.0
- htseq=0.7.2
- htslib=1.5
- icu=54.1
- jpeg=9b
- krb5=1.13.2
- libffi=3.2.1
- libgcc=5.2.0
- libiconv=1.14
- libpng=1.6.27
- libssh2=1.8.0
- libxcb=1.12
- libxml2=2.9.4
- matplotlib=2.0.2
- mkl=2017.0.3
- numpy=1.13.1
- openssl=1.0.2l
- pcre=8.39
- pixman=0.34.0
- pycairo=1.10.0
- pyparsing=2.2.0
- pyqt=5.6.0
- pysam=0.11.2.2
- python-dateutil=2.4.1
- pytz=2017.2
- qt=5.6.2
- readline=6.2
- samtools=1.5
- setuptools=27.2.0
- sip=4.18
- six=1.10.0
- sqlite=3.13.0
- subprocess32=3.2.7
- tk=8.5.18
- wheel=0.29.0
- xz=5.2.3
- zlib=1.2.11
name: plot_fusion
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- circos=0.69.4
- icu=58
- pip=9.0.1
- fontconfig=2.13
- freetype=2.9
- giflib=5.1.4
- jbig=2.1
- libgcc=5.2.0
- libiconv=1.14
- libpng=1.6
- libtiff=4.0
- libwebp=0.5.2
- libxml2=2.9
- openssl=1.1
- perl=5.22.0.1
- perl-app-cpanminus=1.7039
- perl-clone=0.38
- perl-config-general=2.61
- perl-data-dumper=2.161
- perl-digest-hmac=1.03
- perl-digest-perl-md5=1.9
- perl-encode-locale=1.05
- perl-exporter-tiny=0.042
- perl-extutils-makemaker=7.24
- perl-file-listing=6.04
- perl-font-ttf=1.05
- perl-gd=2.56
- perl-html-parser=3.72
- perl-html-tagset=3.20
- perl-http-cookies=6.01
- perl-http-daemon=6.01
- perl-http-date=6.02
- perl-http-message=6.11
- perl-http-negotiate=6.01
- perl-io-html=1.001
- perl-io-socket-ssl=2.024
- perl-io-string=1.08
- perl-libwww-perl=6.15
- perl-list-moreutils=0.413
- perl-lwp-mediatypes=6.02
- perl-math-bezier=0.01
- perl-math-round=0.07
- perl-math-vecstat=0.08
- perl-module-implementation=0.09
- perl-module-runtime=0.014
- perl-net-http=6.09
- perl-net-ssleay=1.74
- perl-ntlm=1.09
- perl-number-format=1.75
- perl-params-validate=1.26
- perl-pathtools=3.40
- perl-readonly=1.04
- perl-regexp-common=2016060801
- perl-scalar-list-utils=1.45
- perl-set-intspan=1.19
- perl-statistics-basic=1.6611
- perl-svg=2.64
- perl-test-more=1.001002
- perl-text-format=0.59
- perl-threaded=5.22.0
- perl-time-hires=1.9728
- perl-try-tiny=0.24
- perl-uri=1.71
- perl-www-robotrules=6.02
- perl-xml-parser=2.44
- perl-xsloader=0.22
- python=3.6
- readline=7.0
- setuptools=27.2.0
- tk=8.6
- wheel=0.29.0
- xz=5.2
- zlib=1.2
- r=3.5.1
- r-base=3.5.1
- r-boot=1.3_22
- r-class=7.3_15
- r-codetools=0.2_16
- r-foreign=0.8_71
- r-kernsmooth=2.23_15
- r-lattice=0.20_38
- r-mass=7.3_51.4
- r-matrix=1.2_17
- r-mgcv=1.8_28
- r-nlme=3.1_140
- r-nnet=7.3_12
- r-recommended=3.5.1
- r-rpart=4.1_15
- r-spatial=7.3_11
- r-survival=2.44_1.1
- tktable=2.10
- xorg-kbproto=1.0.7
- xorg-libice=1.0.9
- xorg-libsm=1.2.3
- xorg-libx11=1.6.7
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xorg-libxext=1.3.4
- xorg-libxrender=0.9.10
- xorg-renderproto=0.11.1
- xorg-xextproto=7.3.0
- xorg-xproto=7.0.31
- pip:
- fsnviz==0.3.0
- click==6.7
- crimson==0.3.0
- jinja2==2.9.5
- markupsafe==1.0
- pyyaml==3.12
name: star_fusion
channels:
- bioconda
- defaults
- conda-forge
dependencies:
- star-fusion=0.5.4
- star=2.5.3a
- blast=2.2.31