Source code for gnomad.utils.intervals

# noqa: D100

from typing import List

import hail as hl


[docs]def sort_intervals(intervals: List[hl.Interval]): """ Sort an array of intervals by start contig, then start position, then end contig, then end position. :param intervals: Intervals to sort :return: Sorted interval list """ return sorted( intervals, key=lambda interval: ( interval.start.reference_genome.contigs.index(interval.start.contig), interval.start.position, interval.end.reference_genome.contigs.index(interval.end.contig), interval.end.position, ), )
[docs]def union_intervals(intervals: List[hl.Interval], is_sorted: bool = False): """ Generate a list with the union of all intervals in the input list by merging overlapping intervals. :param intervals: Intervals to merge :param is_sorted: If set, assumes intervals are already sorted, otherwise will sort. :return: List of merged intervals """ sorted_intervals = intervals if is_sorted else sort_intervals(intervals) merged_intervals = sorted_intervals[:1] for interval in sorted_intervals[1:]: if merged_intervals[-1].start.contig == interval.start.contig: if merged_intervals[-1].end.position < interval.end.position: if interval.start.position <= merged_intervals[-1].end.position: merged_intervals[-1] = hl.Interval( merged_intervals[-1].start, interval.end ) else: merged_intervals.append(interval) else: merged_intervals.append(interval) return merged_intervals
[docs]def interval_length(interval: hl.Interval) -> int: """ Return the total number of bases in an Interval. :param interval: Input interval :return: Total length of the interval """ if interval.start.contig != interval.end.contig: ref = interval.start.reference_genome return ( ref.contig_length(interval.start.contig) - interval.start.position + sum( ref.contig_length(contig) for contig in ref.contigs[ ref.contigs.index(interval.start.contig) + 1 : ref.contigs.index(interval.end.contig) ] ) + interval.end.position ) else: return interval.end.position - interval.start.position