Intervals¶

Max-end segment tree for O(log n + k) interval overlap queries. IntervalIndex is the data structure behind IntervalTrack.regions().

Build an index from Regions, then query it with chromosome + coordinate range to get all overlapping entries.

intervals ¶

Fast interval overlap lookups and set operations.

IntervalIndex provides O(log n + k) overlap queries backed by a max-end segment tree. intersect_intervals() computes pairwise intersections of two region sets — the fundamental spatial join primitive.

No external dependencies — uses bisect from stdlib.

IntervalIndex ¶

IntervalIndex(data: dict[str, _ChromIndex])

Index for fast overlap, nearest, and proximity queries on Regions.

Backed by a per-chromosome segment tree over coordinate-sorted (start, end, region) tuples. Construction is O(n log n) via sorting; overlap queries are O(log n + k) where k is the number of results — bisect prunes the right boundary and the max-end tree prunes the left.

Examples:

>>> idx = IntervalIndex.build([
...     Region("chr1", 100, 200),
...     Region("chr1", 150, 250),
... ])
>>> len(idx.overlapping("chr1", 120, 160))
2

Source code in src/seqchain/primitives/intervals.py

def __init__(self, data: dict[str, _ChromIndex]) -> None:
    self._data = data

build `staticmethod` ¶

build(regions: Sequence[Region]) -> IntervalIndex

Construct an index from a sequence of Regions.

Regions are grouped by chromosome and sorted by start position (ties broken by end position).

Parameters:

Name	Type	Description	Default
`regions`	`Sequence[Region]`	Iterable of `Region` objects.	required

Returns:

Type	Description
`IntervalIndex`	A new `IntervalIndex` ready for queries.

Examples:

>>> idx = IntervalIndex.build([Region("chr1", 10, 20)])
>>> idx.overlapping("chr1", 15, 25)
[Region(chrom='chr1', start=10, end=20, strand='.', score=0.0, name='', tags={})]

Source code in src/seqchain/primitives/intervals.py

@staticmethod
def build(regions: Sequence[Region]) -> IntervalIndex:
    """Construct an index from a sequence of Regions.

    Regions are grouped by chromosome and sorted by start position
    (ties broken by end position).

    Args:
        regions: Iterable of `Region` objects.

    Returns:
        A new `IntervalIndex` ready for queries.

    Examples:
        >>> idx = IntervalIndex.build([Region("chr1", 10, 20)])
        >>> idx.overlapping("chr1", 15, 25)
        [Region(chrom='chr1', start=10, end=20, strand='.', score=0.0, name='', tags={})]
    """
    by_chrom: dict[str, list[tuple[int, int, Region]]] = {}
    for r in regions:
        by_chrom.setdefault(r.chrom, []).append((r.start, r.end, r))

    data: dict[str, _ChromIndex] = {}
    for chrom, entries in by_chrom.items():
        entries.sort(key=lambda t: (t[0], t[1]))
        data[chrom] = _ChromIndex(entries)

    return IntervalIndex(data)

overlapping ¶

overlapping(chrom: str, start: int, end: int) -> list[Region]

Return all regions that overlap the query interval.

A region overlaps if region.start < end and region.end > start (half-open interval intersection). Complexity is O(log n + k) where k is the result count.

Parameters:

Name	Type	Description	Default
`chrom`	`str`	Chromosome name.	required
`start`	`int`	Query interval start (inclusive).	required
`end`	`int`	Query interval end (exclusive).	required

Returns:

Type	Description
`list[Region]`	List of overlapping `Region` objects,
`list[Region]`	ordered by start position.

Examples:

>>> idx = IntervalIndex.build([Region("chr1", 10, 20)])
>>> idx.overlapping("chr1", 15, 25)
[Region(chrom='chr1', start=10, end=20, strand='.', score=0.0, name='', tags={})]

Source code in src/seqchain/primitives/intervals.py

def overlapping(self, chrom: str, start: int, end: int) -> list[Region]:
    """Return all regions that overlap the query interval.

    A region overlaps if ``region.start < end`` and
    ``region.end > start`` (half-open interval intersection).
    Complexity is O(log n + k) where *k* is the result count.

    Args:
        chrom: Chromosome name.
        start: Query interval start (inclusive).
        end: Query interval end (exclusive).

    Returns:
        List of overlapping `Region` objects,
        ordered by start position.

    Examples:
        >>> idx = IntervalIndex.build([Region("chr1", 10, 20)])
        >>> idx.overlapping("chr1", 15, 25)
        [Region(chrom='chr1', start=10, end=20, strand='.', score=0.0, name='', tags={})]
    """
    ci = self._data.get(chrom)
    if ci is None or start >= end:
        return []
    return ci.overlap(start, end)

nearest ¶

nearest(chrom: str, pos: int, max_distance: int | None = None) -> Region | None

Return the single nearest region to a position.

Distance is measured from pos to the closest edge of each region (0 if pos is inside the region).

Parameters:

Name	Type	Description	Default
`chrom`	`str`	Chromosome name.	required
`pos`	`int`	Query position.	required
`max_distance`	`int \| None`	If given, only consider regions within this many bp. `None` (default) means no limit.	`None`

Returns:

Type	Description
`Region \| None`	The nearest `Region`, or `None` if
`Region \| None`	the chromosome has no regions (or none within
`Region \| None`	max_distance).

Examples:

>>> idx = IntervalIndex.build([Region("chr1", 100, 200)])
>>> idx.nearest("chr1", 50)
Region(chrom='chr1', start=100, end=200, strand='.', score=0.0, name='', tags={})

Source code in src/seqchain/primitives/intervals.py

def nearest(
    self, chrom: str, pos: int, max_distance: int | None = None
) -> Region | None:
    """Return the single nearest region to a position.

    Distance is measured from *pos* to the closest edge of each
    region (0 if *pos* is inside the region).

    Args:
        chrom: Chromosome name.
        pos: Query position.
        max_distance: If given, only consider regions within this
            many bp. ``None`` (default) means no limit.

    Returns:
        The nearest `Region`, or ``None`` if
        the chromosome has no regions (or none within
        *max_distance*).

    Examples:
        >>> idx = IntervalIndex.build([Region("chr1", 100, 200)])
        >>> idx.nearest("chr1", 50)
        Region(chrom='chr1', start=100, end=200, strand='.', score=0.0, name='', tags={})
    """
    ci = self._data.get(chrom)
    if ci is None or ci.n == 0:
        return None

    # Find insertion point for pos
    idx = bisect.bisect_right(ci.entries, pos, key=lambda e: e[0])

    best: Region | None = None
    best_dist = float("inf")

    # Check candidates around the insertion point
    for i in (idx - 1, idx):
        if 0 <= i < ci.n:
            s, e, region = ci.entries[i]
            dist = self._point_distance(pos, s, e)
            if dist < best_dist:
                best_dist = dist
                best = region

    # Also scan backwards from idx-2 in case of ties / overlapping
    # regions with the same start
    if idx >= 2:
        s, e, region = ci.entries[idx - 2]
        dist = self._point_distance(pos, s, e)
        if dist < best_dist:
            best_dist = dist
            best = region

    if max_distance is not None and best_dist > max_distance:
        return None

    return best

within_distance ¶

within_distance(chrom: str, pos: int, distance: int) -> list[Region]

Return all regions within a given distance of a position.

Distance is measured from pos to the closest edge of each region (0 if pos is inside the region). Complexity is O(log n + k) — delegates to overlapping() with an expanded window.

Parameters:

Name	Type	Description	Default
`chrom`	`str`	Chromosome name.	required
`pos`	`int`	Query position.	required
`distance`	`int`	Maximum distance in bp (inclusive).	required

Returns:

Type	Description
`list[Region]`	List of `Region` objects within
`list[Region]`	distance bp, ordered by start position.

Examples:

>>> idx = IntervalIndex.build([
...     Region("chr1", 100, 200),
...     Region("chr1", 500, 600),
... ])
>>> len(idx.within_distance("chr1", 50, 100))
1

Source code in src/seqchain/primitives/intervals.py

def within_distance(
    self, chrom: str, pos: int, distance: int
) -> list[Region]:
    """Return all regions within a given distance of a position.

    Distance is measured from *pos* to the closest edge of each
    region (0 if *pos* is inside the region).  Complexity is
    O(log n + k) — delegates to `overlapping()` with an
    expanded window.

    Args:
        chrom: Chromosome name.
        pos: Query position.
        distance: Maximum distance in bp (inclusive).

    Returns:
        List of `Region` objects within
        *distance* bp, ordered by start position.

    Examples:
        >>> idx = IntervalIndex.build([
        ...     Region("chr1", 100, 200),
        ...     Region("chr1", 500, 600),
        ... ])
        >>> len(idx.within_distance("chr1", 50, 100))
        1
    """
    return self.overlapping(chrom, pos - distance, pos + distance + 1)

len ¶

__len__() -> int

Total number of indexed regions across all chromosomes.

Returns:

Type	Description
`int`	Region count.

Source code in src/seqchain/primitives/intervals.py

def __len__(self) -> int:
    """Total number of indexed regions across all chromosomes.

    Returns:
        Region count.
    """
    return sum(ci.n for ci in self._data.values())

chromosomes ¶

chromosomes() -> list[str]

Return the list of chromosomes present in the index.

Returns:

Type	Description
`list[str]`	Sorted list of chromosome names.

Examples:

>>> idx = IntervalIndex.build([Region("chr2", 0, 10), Region("chr1", 0, 10)])
>>> idx.chromosomes()
['chr1', 'chr2']

Source code in src/seqchain/primitives/intervals.py

def chromosomes(self) -> list[str]:
    """Return the list of chromosomes present in the index.

    Returns:
        Sorted list of chromosome names.

    Examples:
        >>> idx = IntervalIndex.build([Region("chr2", 0, 10), Region("chr1", 0, 10)])
        >>> idx.chromosomes()
        ['chr1', 'chr2']
    """
    chroms: list[str] = []
    for chrom in self._data:
        chroms.append(chrom)
    chroms.sort()
    return chroms

intersect_intervals ¶

intersect_intervals(query: Iterable[Region], reference: Iterable[Region]) -> Iterator[tuple[Region, Region]]

Compute pairwise intersections of two region sets.

For each query region, finds all overlapping reference regions and yields (fragment, ref) pairs. The fragment is the query region with coordinates clipped to the intersection — it preserves the query's name, strand, score, and tags. The ref is the original reference region (unmodified).

Uses a sorted sweep-line grouped by chromosome. Reference regions are grouped by chromosome and sorted by start; each query region scans only its own chromosome's sorted slice, stopping early when ref.start >= query.end.

Parameters:

Name	Type	Description	Default
`query`	`Iterable[Region]`	Input regions to intersect.	required
`reference`	`Iterable[Region]`	Reference regions to intersect against.	required

Yields:

Type	Description
`Region`	`(fragment, ref)` tuples where fragment is the query region
`Region`	clipped to the overlap with ref. Query regions with no
`tuple[Region, Region]`	overlapping reference regions are silently dropped.

Examples:

>>> q = [Region("chr1", 100, 300)]
>>> r = [Region("chr1", 0, 200, name="A"), Region("chr1", 200, 400, name="B")]
>>> [(f.start, f.end, ref.name) for f, ref in intersect_intervals(q, r)]
[(100, 200, 'A'), (200, 300, 'B')]

Source code in src/seqchain/primitives/intervals.py

def intersect_intervals(
    query: Iterable[Region],
    reference: Iterable[Region],
) -> Iterator[tuple[Region, Region]]:
    """Compute pairwise intersections of two region sets.

    For each query region, finds all overlapping reference regions and
    yields ``(fragment, ref)`` pairs.  The *fragment* is the query region
    with coordinates clipped to the intersection — it preserves the
    query's name, strand, score, and tags.  The *ref* is the original
    reference region (unmodified).

    Uses a sorted sweep-line grouped by chromosome.  Reference regions
    are grouped by chromosome and sorted by start; each query region
    scans only its own chromosome's sorted slice, stopping early when
    ``ref.start >= query.end``.

    Args:
        query: Input regions to intersect.
        reference: Reference regions to intersect against.

    Yields:
        ``(fragment, ref)`` tuples where *fragment* is the query region
        clipped to the overlap with *ref*.  Query regions with no
        overlapping reference regions are silently dropped.

    Examples:
        >>> q = [Region("chr1", 100, 300)]
        >>> r = [Region("chr1", 0, 200, name="A"), Region("chr1", 200, 400, name="B")]
        >>> [(f.start, f.end, ref.name) for f, ref in intersect_intervals(q, r)]
        [(100, 200, 'A'), (200, 300, 'B')]
    """
    # Group reference by chromosome without calling list() — use dict + append
    ref_by_chrom: dict[str, list[Region]] = {}
    for r in reference:
        if r.chrom not in ref_by_chrom:
            ref_by_chrom[r.chrom] = []
        ref_by_chrom[r.chrom].append(r)
    # Sort each chromosome's slice in-place (sorted() not needed — .sort() is fine)
    for chrom_refs in ref_by_chrom.values():
        chrom_refs.sort(key=lambda r: (r.start, r.end))

    for q in query:
        chrom_refs = ref_by_chrom.get(q.chrom, [])
        for ref in chrom_refs:
            if ref.start >= q.end:
                break  # sorted by start — no further refs can overlap
            if ref.end <= q.start:
                continue
            int_start = max(q.start, ref.start)
            int_end = min(q.end, ref.end)
            if int_start < int_end:
                fragment = replace(q, start=int_start, end=int_end)
                yield (fragment, ref)

overlay ¶

overlay(query: Iterable[Region], reference: Iterable[Region], *, name_tag: str | None = None) -> Iterator[Region]

Spatial join with tag merge.

For each query region, finds all overlapping reference regions via intersect_intervals(), clips to the intersection, and merges the reference region's tags into the fragment. If name_tag is given, promotes the reference region's name field into a tag on the output fragment.

Parameters:

Name	Type	Description	Default
`query`	`Iterable[Region]`	Input regions to overlay.	required
`reference`	`Iterable[Region]`	Reference regions to join against.	required
`name_tag`	`str \| None`	If given, sets `tags[name_tag] = ref.name` on each output fragment. Without this, the reference's `name` field would not transfer (it's a field, not a tag).	`None`

Yields:

Type	Description
`Region`	Regions with coordinates clipped to the intersection and tags
`Region`	merged from both query and reference. Query regions with no
`Region`	overlapping reference are silently dropped.

Examples:

>>> q = [Region("chr1", 100, 300, name="state", tags={"mark": "H3"})]
>>> r = [Region("chr1", 0, 200, name="promoter", tags={"gene": "G1"})]
>>> result = list(overlay(q, r, name_tag="feature_type"))
>>> result[0].tags["feature_type"]
'promoter'
>>> result[0].tags["gene"]
'G1'
>>> result[0].tags["mark"]
'H3'

Source code in src/seqchain/primitives/intervals.py

def overlay(
    query: Iterable[Region],
    reference: Iterable[Region],
    *,
    name_tag: str | None = None,
) -> Iterator[Region]:
    """Spatial join with tag merge.

    For each query region, finds all overlapping reference regions via
    `intersect_intervals()`, clips to the intersection, and merges
    the reference region's tags into the fragment.  If *name_tag* is
    given, promotes the reference region's ``name`` field into a tag on
    the output fragment.

    Args:
        query: Input regions to overlay.
        reference: Reference regions to join against.
        name_tag: If given, sets ``tags[name_tag] = ref.name`` on each
            output fragment.  Without this, the reference's ``name``
            field would not transfer (it's a field, not a tag).

    Yields:
        Regions with coordinates clipped to the intersection and tags
        merged from both query and reference.  Query regions with no
        overlapping reference are silently dropped.

    Examples:
        >>> q = [Region("chr1", 100, 300, name="state", tags={"mark": "H3"})]
        >>> r = [Region("chr1", 0, 200, name="promoter", tags={"gene": "G1"})]
        >>> result = list(overlay(q, r, name_tag="feature_type"))
        >>> result[0].tags["feature_type"]
        'promoter'
        >>> result[0].tags["gene"]
        'G1'
        >>> result[0].tags["mark"]
        'H3'
    """
    for fragment, ref in intersect_intervals(query, reference):
        merged = {**fragment.tags, **ref.tags}
        if name_tag and ref.name:
            merged[name_tag] = ref.name
        yield replace(fragment, tags=merged)

Intervals¶

intervals ¶

IntervalIndex ¶

build staticmethod ¶

overlapping ¶

nearest ¶

within_distance ¶

__len__ ¶

chromosomes ¶

intersect_intervals ¶

overlay ¶

build `staticmethod` ¶

len ¶