# noqa: D100
from typing import List
import hail as hl
[docs]def sort_intervals(intervals: List[hl.Interval]):
"""
Sort an array of intervals by start contig, then start position, then end contig, then end position.
:param intervals: Intervals to sort
:return: Sorted interval list
"""
return sorted(
intervals,
key=lambda interval: (
interval.start.reference_genome.contigs.index(interval.start.contig),
interval.start.position,
interval.end.reference_genome.contigs.index(interval.end.contig),
interval.end.position,
),
)
[docs]def union_intervals(intervals: List[hl.Interval], is_sorted: bool = False):
"""
Generate a list with the union of all intervals in the input list by merging overlapping intervals.
:param intervals: Intervals to merge
:param is_sorted: If set, assumes intervals are already sorted, otherwise will sort.
:return: List of merged intervals
"""
sorted_intervals = intervals if is_sorted else sort_intervals(intervals)
merged_intervals = sorted_intervals[:1]
for interval in sorted_intervals[1:]:
if merged_intervals[-1].start.contig == interval.start.contig:
if merged_intervals[-1].end.position < interval.end.position:
if interval.start.position <= merged_intervals[-1].end.position:
merged_intervals[-1] = hl.Interval(
merged_intervals[-1].start, interval.end
)
else:
merged_intervals.append(interval)
else:
merged_intervals.append(interval)
return merged_intervals
[docs]def interval_length(interval: hl.Interval) -> int:
"""
Return the total number of bases in an Interval.
:param interval: Input interval
:return: Total length of the interval
"""
if interval.start.contig != interval.end.contig:
ref = interval.start.reference_genome
return (
ref.contig_length(interval.start.contig)
- interval.start.position
+ sum(
ref.contig_length(contig)
for contig in ref.contigs[
ref.contigs.index(interval.start.contig)
+ 1 : ref.contigs.index(interval.end.contig)
]
)
+ interval.end.position
)
else:
return interval.end.position - interval.start.position