Tracks I/O¶

Load tracks from BED, narrowPeak, WIG, and BigWig files. Export tracks as BED or WIG. Gzip-compressed files are auto-detected.

from seqchain.io.tracks import load_bed, load_bigwig, load_track, write_bed

# Auto-dispatch by extension
track = load_track("peaks.narrowPeak")

# Explicit loaders
bed = load_bed("intervals.bed")
signal = load_bigwig("coverage.bw")

# Export
write_bed(bed, "output.bed")

Auto-dispatch¶

load_track ¶

load_track(path: str | Path, name: str | None = None) -> Union[IntervalTrack, SignalTrack, TableTrack]

Auto-detect file format and load as the appropriate Track type.

Dispatches based on file extension:

.json → load_json()
.bed, .bed.gz → load_bed()
.narrowPeak, .narrowPeak.gz → load_narrowpeak()
.wig, .wig.gz → load_wig()
.bw, .bigwig, .bigWig → load_bigwig()

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a track file.	required
`name`	`str \| None`	Track name. Defaults to format-specific default.	`None`

Returns:

Type	Description
`Union[IntervalTrack, SignalTrack, TableTrack]`	An `IntervalTrack`,
`Union[IntervalTrack, SignalTrack, TableTrack]`	`SignalTrack`, or `TableTrack`.

Raises:

Type	Description
`ValueError`	If the file extension is not recognized.

Examples:

>>> track = load_track("peaks.bed")
>>> track = load_track("signal.json")

Source code in src/seqchain/io/tracks.py

def load_track(
    path: str | Path, name: str | None = None
) -> Union[IntervalTrack, SignalTrack, TableTrack]:
    """Auto-detect file format and load as the appropriate Track type.

    Dispatches based on file extension:

    - ``.json`` → `load_json()`
    - ``.bed``, ``.bed.gz`` → `load_bed()`
    - ``.narrowPeak``, ``.narrowPeak.gz`` → `load_narrowpeak()`
    - ``.wig``, ``.wig.gz`` → `load_wig()`
    - ``.bw``, ``.bigwig``, ``.bigWig`` → `load_bigwig()`

    Args:
        path: Path to a track file.
        name: Track name. Defaults to format-specific default.

    Returns:
        An `IntervalTrack`,
        `SignalTrack`, or `TableTrack`.

    Raises:
        ValueError: If the file extension is not recognized.

    Examples:
        >>> track = load_track("peaks.bed")  # doctest: +SKIP
        >>> track = load_track("signal.json")  # doctest: +SKIP
    """
    path = Path(path)

    # Strip .gz to get the real extension
    stem = path.name
    if stem.endswith(".gz"):
        stem = stem[:-3]

    suffix = Path(stem).suffix.lower()

    if suffix == ".json":
        return load_json(path, name)
    if suffix == ".bed":
        return load_bed(path, name)
    if suffix == ".narrowpeak":
        return load_narrowpeak(path, name)
    if suffix == ".wig":
        return load_wig(path, name)
    if suffix in {s.lower() for s in _BIGWIG_EXTENSIONS}:
        return load_bigwig(path, name)

    raise ValueError(
        f"Unrecognized track format: {path.suffix!r}. "
        f"Supported: .json, .bed, .narrowPeak, .wig, .bw, .bigwig, .bigWig"
    )

Loaders¶

BED¶

load_bed ¶

load_bed(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a BED file into an IntervalTrack.

Reads BED3 through BED12. Columns beyond the first three are optional and populate Region fields:

Column 4 → name
Column 5 → score
Column 6 → strand

Columns 7–12 (thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) are stored in tags if present.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a BED file.	required
`name`	`str \| None`	Track name. Defaults to the filename stem.	`None`

Returns:

Type	Description
`IntervalTrack`	An `IntervalTrack` of parsed regions.

Examples:

>>> track = load_bed("peaks.bed")
>>> len(track)
1234

Source code in src/seqchain/io/tracks.py

def load_bed(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a BED file into an IntervalTrack.

    Reads BED3 through BED12. Columns beyond the first three are
    optional and populate `Region` fields:

    - Column 4 → ``name``
    - Column 5 → ``score``
    - Column 6 → ``strand``

    Columns 7–12 (thickStart, thickEnd, itemRgb, blockCount,
    blockSizes, blockStarts) are stored in ``tags`` if present.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a BED file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed regions.

    Examples:
        >>> track = load_bed("peaks.bed")
        >>> len(track)  # doctest: +SKIP
        1234
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".bed")
    regions = list(_parse_bed_lines(_read_lines(path)))
    return IntervalTrack(TrackLabel(track_name), regions)

narrowPeak¶

load_narrowpeak ¶

load_narrowpeak(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a narrowPeak file into an IntervalTrack.

NarrowPeak is BED6+4 (10 columns):

chrom, 2. start, 3. end, 4. name, 5. score, 6. strand,
signalValue, 8. pValue, 9. qValue, 10. peak (summit offset).

Extra narrowPeak columns are stored in tags: signal_value, p_value, q_value, summit_offset.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a narrowPeak file.	required
`name`	`str \| None`	Track name. Defaults to the filename stem.	`None`

Returns:

Type	Description
`IntervalTrack`	An `IntervalTrack` of parsed peak
`IntervalTrack`	regions.

Examples:

>>> track = load_narrowpeak("peaks.narrowPeak")
>>> track.name
'peaks'

Source code in src/seqchain/io/tracks.py

def load_narrowpeak(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a narrowPeak file into an IntervalTrack.

    NarrowPeak is BED6+4 (10 columns):

    1. chrom, 2. start, 3. end, 4. name, 5. score, 6. strand,
    7. signalValue, 8. pValue, 9. qValue, 10. peak (summit offset).

    Extra narrowPeak columns are stored in ``tags``:
    ``signal_value``, ``p_value``, ``q_value``, ``summit_offset``.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a narrowPeak file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed peak
        regions.

    Examples:
        >>> track = load_narrowpeak("peaks.narrowPeak")
        >>> track.name  # doctest: +SKIP
        'peaks'
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".narrowPeak")
    regions = list(_parse_narrowpeak_lines(_read_lines(path)))
    return IntervalTrack(TrackLabel(track_name), regions)

WIG¶

load_wig ¶

load_wig(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a WIG file into an IntervalTrack.

Supports both variableStep and fixedStep WIG formats. Each data position becomes a Region with start and end = start + 1 (single-base) and score set to the data value. WIG coordinates are 1-based in the file and converted to 0-based half-open on load.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a WIG file.	required
`name`	`str \| None`	Track name. Defaults to the filename stem.	`None`

Returns:

Type	Description
`IntervalTrack`	An `IntervalTrack` of parsed regions.

Examples:

>>> track = load_wig("signal.wig")
>>> len(track)
1234

Source code in src/seqchain/io/tracks.py

def load_wig(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a WIG file into an IntervalTrack.

    Supports both ``variableStep`` and ``fixedStep`` WIG formats.
    Each data position becomes a `Region` with
    ``start`` and ``end = start + 1`` (single-base) and ``score``
    set to the data value.  WIG coordinates are 1-based in the file
    and converted to 0-based half-open on load.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a WIG file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed regions.

    Examples:
        >>> track = load_wig("signal.wig")
        >>> len(track)  # doctest: +SKIP
        1234
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".wig")
    lines = _read_lines(path)
    regions = _parse_wig_lines(lines)
    return IntervalTrack(TrackLabel(track_name), regions)

BigWig¶

load_bigwig ¶

load_bigwig(path: str | Path, name: str | None = None) -> SignalTrack

Open a BigWig file as a SignalTrack.

Uses a deferred import of pyBigWig so the rest of the library can be used without it installed.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a BigWig file (`.bw`, `.bigwig`, `.bigWig`).	required
`name`	`str \| None`	Track name. Defaults to the filename stem.	`None`

Returns:

Type	Description
`SignalTrack`	A `SignalTrack` backed by the BigWig file.

Examples:

>>> track = load_bigwig("signal.bw")
>>> track.signal_at("chr1", 100, 200)
3.14

Source code in src/seqchain/io/tracks.py

def load_bigwig(path: str | Path, name: str | None = None) -> SignalTrack:
    """Open a BigWig file as a SignalTrack.

    Uses a deferred import of ``pyBigWig`` so the rest of the library
    can be used without it installed.

    Args:
        path: Path to a BigWig file (``.bw``, ``.bigwig``, ``.bigWig``).
        name: Track name. Defaults to the filename stem.

    Returns:
        A `SignalTrack` backed by the BigWig file.

    Examples:
        >>> track = load_bigwig("signal.bw")  # doctest: +SKIP
        >>> track.signal_at("chr1", 100, 200)  # doctest: +SKIP
        3.14
    """
    import pyBigWig

    path = Path(path)
    track_name = name or path.stem
    bw = pyBigWig.open(str(path))
    return SignalTrack(TrackLabel(track_name), bw)

Writers¶

write_wig ¶

write_wig(track: IntervalTrack, path: str | Path, *, track_name: str | None = None) -> None

Write an IntervalTrack to variableStep WIG format.

Each Region's score is written as an integer count at its start position. WIG coordinates are 1-based. Regions are grouped by chromosome and sorted by position. All regions are written, including those with a score of zero — this is required by TRANSIT for essentiality analysis.

Parameters:

Name	Type	Description	Default
`track`	`IntervalTrack`	IntervalTrack to write.	required
`path`	`str \| Path`	Output file path.	required
`track_name`	`str \| None`	Optional track name for the header. Defaults to the track's own name.	`None`

Examples:

>>> write_wig(insertion_track, "output.wig")

Source code in src/seqchain/io/tracks.py

def write_wig(
    track: IntervalTrack,
    path: str | Path,
    *,
    track_name: str | None = None,
) -> None:
    """Write an IntervalTrack to variableStep WIG format.

    Each Region's score is written as an integer count at its start
    position. WIG coordinates are 1-based.  Regions are grouped by
    chromosome and sorted by position.  All regions are written,
    including those with a score of zero — this is required by TRANSIT
    for essentiality analysis.

    Args:
        track: IntervalTrack to write.
        path: Output file path.
        track_name: Optional track name for the header.  Defaults
            to the track's own name.

    Examples:
        >>> write_wig(insertion_track, "output.wig")
    """
    path = Path(path)
    name = track_name or track.name

    # Group regions by chromosome, sorted by start
    by_chrom: dict[str, list[Region]] = {}
    for r in track:
        by_chrom.setdefault(r.chrom, []).append(r)

    with open(path, "w") as f:
        for chrom in sorted(by_chrom):
            regions = sorted(by_chrom[chrom], key=lambda r: r.start)
            f.write(f"variableStep chrom={chrom}\n")
            for r in regions:
                f.write(f"{r.start + 1}\t{int(r.score)}\n")

write_bed ¶

write_bed(track: IntervalTrack, path: str | Path) -> None

Write an IntervalTrack to BED6 format.

Columns: chrom, start (0-based), end, name, score (integer), strand. Regions are sorted by (chrom, start).

Parameters:

Name	Type	Description	Default
`track`	`IntervalTrack`	IntervalTrack to write.	required
`path`	`str \| Path`	Output file path.	required

Examples:

>>> write_bed(insertion_track, "output.bed")

Source code in src/seqchain/io/tracks.py

def write_bed(
    track: IntervalTrack,
    path: str | Path,
) -> None:
    """Write an IntervalTrack to BED6 format.

    Columns: chrom, start (0-based), end, name, score (integer), strand.
    Regions are sorted by (chrom, start).

    Args:
        track: IntervalTrack to write.
        path: Output file path.

    Examples:
        >>> write_bed(insertion_track, "output.bed")
    """
    path = Path(path)
    regions = sorted(track, key=lambda r: (r.chrom, r.start))

    with open(path, "w") as f:
        for r in regions:
            f.write(
                f"{r.chrom}\t{r.start}\t{r.end}\t{r.name}"
                f"\t{int(r.score)}\t{r.strand}\n"
            )