Commit 02a5d983 authored by Beatrice Tan's avatar Beatrice Tan

Different input possible: either input file or info to download file from Firehose.

parent 840bdc75
......@@ -13,8 +13,11 @@ include: "rules/UseControl.smk"
workdir: config["workdir"]
onstart:
shell("echo Executing pipeline to identify recurrent CNAs and candidate genes from tumor copy number profiles.")
shell("echo - Inputfile: " + config["inputfile"])
shell("echo Executing pipeline to identify recurrent CNAs and candidate genes from tumor copy number profiles.\n")
if config["inputfile"] == "":
shell("echo - Inputfile will be downloaded from Firehose: segmentation data from " + config["cancer_type"] + " samples on " + config["date_data"])
else:
shell("echo - Inputfile: " + config["inputfile"])
shell("echo - Snakefile, rules and wrappers directory: {workflow.basedir}")
shell("echo - Output directory: " + config["workdir"])
......
#Directories to be specified
workdir: /home/beatrice/Documents/SASC
workdir: /home/beatrice/CNA_analysis
gsdir: /home/beatrice/Documents/SASC/run_GISTIC2
#Input specifications
inputfile: /home/beatrice/Documents/SASC/SKCM_segmentedCNAs/SKCM.cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.seg.txt
#Input details to download from firehose
cancer_type: SKCM
date_data: "2016_07_15"
#Or provide input file
inputfile: "" #tumor segmentation data
normal: "" #normal segmentation data
#Data for running and benchmarking tools.
reference: hg19
markerfile: /home/beatrice/Documents/SASC/Input_files/markers.tsv
prev_found_genes: /home/beatrice/Documents/SASC/Input_files/intogen-CM-drivers-data.tsv #/home/beatrice/Documents/SASC/Input_files/SKCM_genes.txt
......
rule retrieve_firehose_data:
rule firehose:
output:
"Input/Segments_all.txt"
params:
cancer_type="SKCM", #config["cancer_type"]
date="2016_07_15",
dateshort="20160715"
cancer_type=config["cancer_type"],
date=config["date_data"],
dateshort=str(config["date_data"]).replace("_", "")
shell: #Add FIREHOSE conda recipe
"firehose_get -tasks Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg stddata {params.date} {params.cancer_type} && \
tar -xzvf stddata__{params.date}/{params.cancer_type}/{params.dateshort}/gdac.broadinstitute.org_SKCM.Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.Level_3.{params.dateshort}00.0.0.tar.gz && \
cp --no-preserve=mode,ownership stddata__{params.date}/{params.cancer_type}/{params.dateshort}/gdac.broadinstitute.org_SKCM.Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.Level_3.{params.dateshort}00.0.0/{params.cancer_type}.cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.seg.txt {output}"
DATA_DIR=stddata__{params.date}/{params.cancer_type}/{params.dateshort} && \
tar -xzf $DATA_DIR/gdac.broadinstitute.org_SKCM.Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.Level_3.{params.dateshort}00.0.0.tar.gz -C $DATA_DIR && \
cp --no-preserve=mode,ownership $DATA_DIR/gdac.broadinstitute.org_SKCM.Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.Level_3.{params.dateshort}00.0.0/{params.cancer_type}.cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.seg.txt {output}"
rule split_segfile:
def pipeline_input(wildcards):
"""Decide whether to use firehose data or own input file."""
provided_input = config["inputfile"]
if (provided_input != ""): #use provided input file
return provided_input
elif (provided_input == "") and (config["cancer_type"] != "") and (config["date_data"] != ""): #use firehose data
return rules.firehose.output
else:
raise ValueError("Provide either a segmentation file as input or get firehose data based on the cancer type and date of choice.")
rule define_input:
input:
"Input/Segments_all.txt"
pipeline_input
output:
tumor="Input/Segments_tumor.txt",
normal="Input/Segments_normal.txt"
params:
inputfile=config["inputfile"],
normalfile=config["normal"]
run:
split_normal_tumor(input[0], output.tumor, output.normal)
if input[0] == params.inputfile: #use provided input file
shell("cp {params.inputfile} {output.tumor}")
if config["normal"] != "":
shell("cp {params.normalfile} {output.normal}")
else:
shell("touch {output.normal}")
else: #split firehose data in tumor and normal files.
split_normal_tumor(input[0], output.tumor, output.normal)
def split_normal_tumor(all_samples, out_tumor, out_normal):
"""Split a segmentation file from firehose in a segmentation file with only tumor samples and one with only normal samples"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment