Source code for gnomad.utils.intervals

# noqa: D100

from typing import List, Union

import hail as hl


[docs]def sort_intervals(intervals: List[hl.Interval]): """ Sort an array of intervals by start contig, then start position, then end contig, then end position. :param intervals: Intervals to sort :return: Sorted interval list """ return sorted( intervals, key=lambda interval: ( interval.start.reference_genome.contigs.index(interval.start.contig), interval.start.position, interval.end.reference_genome.contigs.index(interval.end.contig), interval.end.position, ), )
[docs]def union_intervals(intervals: List[hl.Interval], is_sorted: bool = False): """ Generate a list with the union of all intervals in the input list by merging overlapping intervals. :param intervals: Intervals to merge :param is_sorted: If set, assumes intervals are already sorted, otherwise will sort. :return: List of merged intervals """ sorted_intervals = intervals if is_sorted else sort_intervals(intervals) merged_intervals = sorted_intervals[:1] for interval in sorted_intervals[1:]: if merged_intervals[-1].start.contig == interval.start.contig: if merged_intervals[-1].end.position < interval.end.position: if interval.start.position <= merged_intervals[-1].end.position: merged_intervals[-1] = hl.Interval( merged_intervals[-1].start, interval.end ) else: merged_intervals.append(interval) else: merged_intervals.append(interval) return merged_intervals
[docs]def interval_length(interval: hl.Interval) -> int: """ Return the total number of bases in an Interval. :param interval: Input interval :return: Total length of the interval """ if interval.start.contig != interval.end.contig: ref = interval.start.reference_genome return ( ref.contig_length(interval.start.contig) - interval.start.position + sum( ref.contig_length(contig) for contig in ref.contigs[ ref.contigs.index(interval.start.contig) + 1 : ref.contigs.index(interval.end.contig) ] ) + interval.end.position ) else: return interval.end.position - interval.start.position
[docs]def pad_intervals( intervals: Union[ hl.expr.IntervalExpression, hl.Interval, List[hl.Interval], ], padding_bp: int, ) -> Union[hl.expr.IntervalExpression, List[hl.expr.IntervalExpression]]: """ Add padding to interval(s). :param intervals: Interval(s) to add padding to. This can be a single Interval, a list of Intervals, or an IntervalExpression. :param padding_bp: Number of base pairs to add to each side of the interval. :return: Interval(s) with padding added. """ def _add_padding( interval: Union[hl.expr.IntervalExpression, hl.Interval], ) -> hl.expr.IntervalExpression: """ Add padding to an interval. :param interval: Interval to add padding to. :return: Interval with padding added. """ return hl.locus_interval( interval.start.contig, interval.start.position - padding_bp, interval.end.position + padding_bp, includes_start=interval.includes_start, includes_end=interval.includes_end, reference_genome=interval.start.dtype.reference_genome, ) if isinstance(intervals, List): return [_add_padding(i) for i in intervals] else: return _add_padding(intervals)