diff --git a/samplesheet.wdl b/samplesheet.wdl new file mode 100644 index 0000000000000000000000000000000000000000..e269911ddac6c9c353dcdcee8d95a8abf90713f6 --- /dev/null +++ b/samplesheet.wdl @@ -0,0 +1,73 @@ +version 1.0 +struct Readgroup { + String id + File R1 + String R1_md5 + File? R2 + String R2_md5 +} + +struct Library { + String id + Array[Readgroup]+ readgroups +} + +struct Sample { + String id + Array[Library]+ libraries +} + +task sampleConfigFileToStruct { + input { + File sampleConfigFile + String outputJson = "output.json" + } + + # Below command can convert any samplesheet with a nested dictionary + # structure to a list of objects model. + # It was specifically designed to run on both python2 and python3. + # Only requirement is PyYAML. + # + # Code maintained in https://github.com/rhpvorderman/samplesheet-to-struct + # can be moved to biowdl group later. + command { + python <<CODE + + import yaml + import json + + + def nested_dicts_to_lists(dictionary): + new_dict = dict() + for key, value in dictionary.items(): + if type(value) == dict: + new_dict[key] = dict_to_item_list_with_id(value) + else: + new_dict[key] = value + return new_dict + + + def dict_to_item_list_with_id(dictionary): + items = [] + for sub_key, sub_dictionary in dictionary.items(): + item_dict = dict(id=sub_key, **nested_dicts_to_lists(sub_dictionary)) + items.append(item_dict) + return items + + + with open("~{sampleConfigFile}", "r") as samplesheet: + samplesheet_dict = yaml.load(samplesheet) + + sample_struct = nested_dicts_to_lists(samplesheet_dict) + + with open("~{outputJson}", "w") as output_json: + output_json.write(json.dumps(sample_struct)) + + CODE + } + + output { + Map[String,Array[Sample]] map = read_json(outputJson) + Array[Sample] samples = map["samples"] + } +} \ No newline at end of file