Skip to content

Tracks I/O

Load tracks from BED, narrowPeak, WIG, and BigWig files. Export tracks as BED or WIG. Gzip-compressed files are auto-detected.

from seqchain.io.tracks import load_bed, load_bigwig, load_track, write_bed

# Auto-dispatch by extension
track = load_track("peaks.narrowPeak")

# Explicit loaders
bed = load_bed("intervals.bed")
signal = load_bigwig("coverage.bw")

# Export
write_bed(bed, "output.bed")

Auto-dispatch

load_track

load_track(path: str | Path, name: str | None = None) -> Union[IntervalTrack, SignalTrack, TableTrack]

Auto-detect file format and load as the appropriate Track type.

Dispatches based on file extension:

  • .jsonload_json()
  • .bed, .bed.gzload_bed()
  • .narrowPeak, .narrowPeak.gzload_narrowpeak()
  • .wig, .wig.gzload_wig()
  • .bw, .bigwig, .bigWigload_bigwig()

Parameters:

Name Type Description Default
path str | Path

Path to a track file.

required
name str | None

Track name. Defaults to format-specific default.

None

Returns:

Type Description
Union[IntervalTrack, SignalTrack, TableTrack]

An IntervalTrack,

Union[IntervalTrack, SignalTrack, TableTrack]

SignalTrack, or TableTrack.

Raises:

Type Description
ValueError

If the file extension is not recognized.

Examples:

>>> track = load_track("peaks.bed")
>>> track = load_track("signal.json")
Source code in src/seqchain/io/tracks.py
def load_track(
    path: str | Path, name: str | None = None
) -> Union[IntervalTrack, SignalTrack, TableTrack]:
    """Auto-detect file format and load as the appropriate Track type.

    Dispatches based on file extension:

    - ``.json`` → `load_json()`
    - ``.bed``, ``.bed.gz`` → `load_bed()`
    - ``.narrowPeak``, ``.narrowPeak.gz`` → `load_narrowpeak()`
    - ``.wig``, ``.wig.gz`` → `load_wig()`
    - ``.bw``, ``.bigwig``, ``.bigWig`` → `load_bigwig()`

    Args:
        path: Path to a track file.
        name: Track name. Defaults to format-specific default.

    Returns:
        An `IntervalTrack`,
        `SignalTrack`, or `TableTrack`.

    Raises:
        ValueError: If the file extension is not recognized.

    Examples:
        >>> track = load_track("peaks.bed")  # doctest: +SKIP
        >>> track = load_track("signal.json")  # doctest: +SKIP
    """
    path = Path(path)

    # Strip .gz to get the real extension
    stem = path.name
    if stem.endswith(".gz"):
        stem = stem[:-3]

    suffix = Path(stem).suffix.lower()

    if suffix == ".json":
        return load_json(path, name)
    if suffix == ".bed":
        return load_bed(path, name)
    if suffix == ".narrowpeak":
        return load_narrowpeak(path, name)
    if suffix == ".wig":
        return load_wig(path, name)
    if suffix in {s.lower() for s in _BIGWIG_EXTENSIONS}:
        return load_bigwig(path, name)

    raise ValueError(
        f"Unrecognized track format: {path.suffix!r}. "
        f"Supported: .json, .bed, .narrowPeak, .wig, .bw, .bigwig, .bigWig"
    )

Loaders

BED

load_bed

load_bed(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a BED file into an IntervalTrack.

Reads BED3 through BED12. Columns beyond the first three are optional and populate Region fields:

  • Column 4 → name
  • Column 5 → score
  • Column 6 → strand

Columns 7–12 (thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) are stored in tags if present.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name Type Description Default
path str | Path

Path to a BED file.

required
name str | None

Track name. Defaults to the filename stem.

None

Returns:

Type Description
IntervalTrack

An IntervalTrack of parsed regions.

Examples:

>>> track = load_bed("peaks.bed")
>>> len(track)
1234
Source code in src/seqchain/io/tracks.py
def load_bed(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a BED file into an IntervalTrack.

    Reads BED3 through BED12. Columns beyond the first three are
    optional and populate `Region` fields:

    - Column 4 → ``name``
    - Column 5 → ``score``
    - Column 6 → ``strand``

    Columns 7–12 (thickStart, thickEnd, itemRgb, blockCount,
    blockSizes, blockStarts) are stored in ``tags`` if present.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a BED file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed regions.

    Examples:
        >>> track = load_bed("peaks.bed")
        >>> len(track)  # doctest: +SKIP
        1234
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".bed")
    regions = list(_parse_bed_lines(_read_lines(path)))
    return IntervalTrack(TrackLabel(track_name), regions)

narrowPeak

load_narrowpeak

load_narrowpeak(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a narrowPeak file into an IntervalTrack.

NarrowPeak is BED6+4 (10 columns):

  1. chrom, 2. start, 3. end, 4. name, 5. score, 6. strand,
  2. signalValue, 8. pValue, 9. qValue, 10. peak (summit offset).

Extra narrowPeak columns are stored in tags: signal_value, p_value, q_value, summit_offset.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name Type Description Default
path str | Path

Path to a narrowPeak file.

required
name str | None

Track name. Defaults to the filename stem.

None

Returns:

Type Description
IntervalTrack

An IntervalTrack of parsed peak

IntervalTrack

regions.

Examples:

>>> track = load_narrowpeak("peaks.narrowPeak")
>>> track.name
'peaks'
Source code in src/seqchain/io/tracks.py
def load_narrowpeak(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a narrowPeak file into an IntervalTrack.

    NarrowPeak is BED6+4 (10 columns):

    1. chrom, 2. start, 3. end, 4. name, 5. score, 6. strand,
    7. signalValue, 8. pValue, 9. qValue, 10. peak (summit offset).

    Extra narrowPeak columns are stored in ``tags``:
    ``signal_value``, ``p_value``, ``q_value``, ``summit_offset``.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a narrowPeak file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed peak
        regions.

    Examples:
        >>> track = load_narrowpeak("peaks.narrowPeak")
        >>> track.name  # doctest: +SKIP
        'peaks'
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".narrowPeak")
    regions = list(_parse_narrowpeak_lines(_read_lines(path)))
    return IntervalTrack(TrackLabel(track_name), regions)

WIG

load_wig

load_wig(path: str | Path, name: str | None = None) -> IntervalTrack

Parse a WIG file into an IntervalTrack.

Supports both variableStep and fixedStep WIG formats. Each data position becomes a Region with start and end = start + 1 (single-base) and score set to the data value. WIG coordinates are 1-based in the file and converted to 0-based half-open on load.

Lines starting with #, track, or browser are skipped. Supports gzip-compressed files (.gz).

Parameters:

Name Type Description Default
path str | Path

Path to a WIG file.

required
name str | None

Track name. Defaults to the filename stem.

None

Returns:

Type Description
IntervalTrack

An IntervalTrack of parsed regions.

Examples:

>>> track = load_wig("signal.wig")
>>> len(track)
1234
Source code in src/seqchain/io/tracks.py
def load_wig(path: str | Path, name: str | None = None) -> IntervalTrack:
    """Parse a WIG file into an IntervalTrack.

    Supports both ``variableStep`` and ``fixedStep`` WIG formats.
    Each data position becomes a `Region` with
    ``start`` and ``end = start + 1`` (single-base) and ``score``
    set to the data value.  WIG coordinates are 1-based in the file
    and converted to 0-based half-open on load.

    Lines starting with ``#``, ``track``, or ``browser`` are skipped.
    Supports gzip-compressed files (``.gz``).

    Args:
        path: Path to a WIG file.
        name: Track name. Defaults to the filename stem.

    Returns:
        An `IntervalTrack` of parsed regions.

    Examples:
        >>> track = load_wig("signal.wig")
        >>> len(track)  # doctest: +SKIP
        1234
    """
    path = Path(path)
    track_name = name or path.stem.removesuffix(".wig")
    lines = _read_lines(path)
    regions = _parse_wig_lines(lines)
    return IntervalTrack(TrackLabel(track_name), regions)

BigWig

load_bigwig

load_bigwig(path: str | Path, name: str | None = None) -> SignalTrack

Open a BigWig file as a SignalTrack.

Uses a deferred import of pyBigWig so the rest of the library can be used without it installed.

Parameters:

Name Type Description Default
path str | Path

Path to a BigWig file (.bw, .bigwig, .bigWig).

required
name str | None

Track name. Defaults to the filename stem.

None

Returns:

Type Description
SignalTrack

A SignalTrack backed by the BigWig file.

Examples:

>>> track = load_bigwig("signal.bw")
>>> track.signal_at("chr1", 100, 200)
3.14
Source code in src/seqchain/io/tracks.py
def load_bigwig(path: str | Path, name: str | None = None) -> SignalTrack:
    """Open a BigWig file as a SignalTrack.

    Uses a deferred import of ``pyBigWig`` so the rest of the library
    can be used without it installed.

    Args:
        path: Path to a BigWig file (``.bw``, ``.bigwig``, ``.bigWig``).
        name: Track name. Defaults to the filename stem.

    Returns:
        A `SignalTrack` backed by the BigWig file.

    Examples:
        >>> track = load_bigwig("signal.bw")  # doctest: +SKIP
        >>> track.signal_at("chr1", 100, 200)  # doctest: +SKIP
        3.14
    """
    import pyBigWig

    path = Path(path)
    track_name = name or path.stem
    bw = pyBigWig.open(str(path))
    return SignalTrack(TrackLabel(track_name), bw)

Writers

write_wig

write_wig(track: IntervalTrack, path: str | Path, *, track_name: str | None = None) -> None

Write an IntervalTrack to variableStep WIG format.

Each Region's score is written as an integer count at its start position. WIG coordinates are 1-based. Regions are grouped by chromosome and sorted by position. All regions are written, including those with a score of zero — this is required by TRANSIT for essentiality analysis.

Parameters:

Name Type Description Default
track IntervalTrack

IntervalTrack to write.

required
path str | Path

Output file path.

required
track_name str | None

Optional track name for the header. Defaults to the track's own name.

None

Examples:

>>> write_wig(insertion_track, "output.wig")
Source code in src/seqchain/io/tracks.py
def write_wig(
    track: IntervalTrack,
    path: str | Path,
    *,
    track_name: str | None = None,
) -> None:
    """Write an IntervalTrack to variableStep WIG format.

    Each Region's score is written as an integer count at its start
    position. WIG coordinates are 1-based.  Regions are grouped by
    chromosome and sorted by position.  All regions are written,
    including those with a score of zero — this is required by TRANSIT
    for essentiality analysis.

    Args:
        track: IntervalTrack to write.
        path: Output file path.
        track_name: Optional track name for the header.  Defaults
            to the track's own name.

    Examples:
        >>> write_wig(insertion_track, "output.wig")
    """
    path = Path(path)
    name = track_name or track.name

    # Group regions by chromosome, sorted by start
    by_chrom: dict[str, list[Region]] = {}
    for r in track:
        by_chrom.setdefault(r.chrom, []).append(r)

    with open(path, "w") as f:
        for chrom in sorted(by_chrom):
            regions = sorted(by_chrom[chrom], key=lambda r: r.start)
            f.write(f"variableStep chrom={chrom}\n")
            for r in regions:
                f.write(f"{r.start + 1}\t{int(r.score)}\n")

write_bed

write_bed(track: IntervalTrack, path: str | Path) -> None

Write an IntervalTrack to BED6 format.

Columns: chrom, start (0-based), end, name, score (integer), strand. Regions are sorted by (chrom, start).

Parameters:

Name Type Description Default
track IntervalTrack

IntervalTrack to write.

required
path str | Path

Output file path.

required

Examples:

>>> write_bed(insertion_track, "output.bed")
Source code in src/seqchain/io/tracks.py
def write_bed(
    track: IntervalTrack,
    path: str | Path,
) -> None:
    """Write an IntervalTrack to BED6 format.

    Columns: chrom, start (0-based), end, name, score (integer), strand.
    Regions are sorted by (chrom, start).

    Args:
        track: IntervalTrack to write.
        path: Output file path.

    Examples:
        >>> write_bed(insertion_track, "output.bed")
    """
    path = Path(path)
    regions = sorted(track, key=lambda r: (r.chrom, r.start))

    with open(path, "w") as f:
        for r in regions:
            f.write(
                f"{r.chrom}\t{r.start}\t{r.end}\t{r.name}"
                f"\t{int(r.score)}\t{r.strand}\n"
            )