Skip to content
Snippets Groups Projects
Unverified Commit bfabd797 authored by Ruben Vorderman's avatar Ruben Vorderman Committed by GitHub
Browse files

Merge pull request #315 from biowdl/BIOWDL-604

Allow a custom separator char in tagUmi
parents 44781d17 b3c9204b
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ that users understand how the changes affect the new version.
-->
version 5.1.0-dev
---------------------------
+ Add a separatorChar input to the tagUmi task.
+ Bug fix: Add space between flag and the value provided for macs2
+ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added:
+ nomodel
......
......@@ -29,6 +29,7 @@ task BamReadNameToUmiTag {
File inputBam
String outputPath = "output.bam"
String umiTag = "RX"
String separatorChar = "_"
String memory = "2GiB"
Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10)
......@@ -45,26 +46,26 @@ task BamReadNameToUmiTag {
from typing import Tuple
def split_umi_from_name(name) -> Tuple[str, str]:
def split_umi_from_name(name, separator_char = "_") -> Tuple[str, str]:
id_and_rest = name.split(maxsplit=1)
id = id_and_rest[0]
# If there was no whitespace id_and_rest will have length 1
other_parts = id_and_rest[1] if len(id_and_rest) == 2 else ""
underscore_index = id.rfind("_")
underscore_index = id.rfind(separator_char)
umi = id[underscore_index + 1:]
new_id = id[:underscore_index]
if other_parts:
return " ".join([new_id, other_parts]), umi
return new_id, umi
def annotate_umis(in_file, out_file, bam_tag="RX"):
def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"):
in_bam = pysam.AlignmentFile(in_file, "rb")
os.makedirs(os.path.dirname(out_file), exist_ok=True)
out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam)
# Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway.
encoded_bam_tag = bam_tag.encode('ascii')
for segment in in_bam: # type: pysam.AlignedSegment
new_name, umi = split_umi_from_name(segment.query_name)
new_name, umi = split_umi_from_name(segment.query_name, separator_char)
segment.query_name = new_name
# Encode umi as ascii. Otherwise pysam encodes it to bytes anyway.
# Value type has to be a string though, otherwise pysam crashes.
......@@ -72,7 +73,7 @@ task BamReadNameToUmiTag {
out_bam.write(segment)
if __name__ == "__main__":
annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}")
annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}")
pysam.index("~{outputPath}", "~{bamIndexPath}", b=True)
CODE
>>>
......@@ -93,6 +94,7 @@ task BamReadNameToUmiTag {
inputBam: {description: "The input SAM file.", category: "required"}
outputPath: {description: "Output directory path + output file.", category: "common"}
umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"}
separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment