Scale¶

Scaling and normalization transforms. Use add_pseudocount before log_transform to avoid log(0). Use normalize to make tracks from different sequencing depths comparable. Use standardize for z-scores or rank for rank-based comparisons.

add_pseudocount ¶

add_pseudocount(track: Track, value: float = 1.0) -> Track

Add a constant to every score.

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required
`value`	`float`	The pseudocount to add. Defaults to `1.0`.	`1.0`

Returns:

Type	Description
`Track`	A new Track with value added to all scores.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": 0.0, "b": 5.0})
>>> add_pseudocount(t, 1.0).get("a")
1.0

Source code in src/seqchain/transform/scale.py

def add_pseudocount(track: Track, value: float = 1.0) -> Track:
    """Add a constant to every score.

    Args:
        track: The track to transform.
        value: The pseudocount to add. Defaults to ``1.0``.

    Returns:
        A new Track with *value* added to all scores.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": 0.0, "b": 5.0})
        >>> add_pseudocount(t, 1.0).get("a")
        1.0
    """
    return track.map_scores(lambda s: s + value)

log_transform ¶

log_transform(track: Track, base: float = 2) -> Track

Log-transform scores.

Edge cases

log(0) → -inf
negative scores → NaN
NaN → NaN

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required
`base`	`float`	Logarithm base. Defaults to `2`.	`2`

Returns:

Type	Description
`Track`	A new Track with log-transformed scores.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": 8.0})
>>> log_transform(t, base=2).get("a")
3.0

Source code in src/seqchain/transform/scale.py

def log_transform(track: Track, base: float = 2) -> Track:
    """Log-transform scores.

    Edge cases:
        - ``log(0)`` → ``-inf``
        - negative scores → ``NaN``
        - ``NaN`` → ``NaN``

    Args:
        track: The track to transform.
        base: Logarithm base. Defaults to ``2``.

    Returns:
        A new Track with log-transformed scores.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": 8.0})
        >>> log_transform(t, base=2).get("a")
        3.0
    """
    log_base = math.log(base)

    def _log(s: float) -> float:
        if math.isnan(s):
            return float("nan")
        if s < 0:
            return float("nan")
        if s == 0:
            return float("-inf")
        return math.log(s) / log_base

    return track.map_scores(_log)

clamp ¶

clamp(track: Track, floor: float | None = None, ceiling: float | None = None) -> Track

Clip scores to a floor and/or ceiling.

NaN values pass through unchanged.

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required
`floor`	`float \| None`	Minimum score. `None` means no lower bound.	`None`
`ceiling`	`float \| None`	Maximum score. `None` means no upper bound.	`None`

Returns:

Type	Description
`Track`	A new Track with clamped scores.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": -5.0, "b": 50.0, "c": 3.0})
>>> t2 = clamp(t, floor=0.0, ceiling=10.0)
>>> t2.get("a"), t2.get("b"), t2.get("c")
(0.0, 10.0, 3.0)

Source code in src/seqchain/transform/scale.py

def clamp(
    track: Track,
    floor: float | None = None,
    ceiling: float | None = None,
) -> Track:
    """Clip scores to a floor and/or ceiling.

    NaN values pass through unchanged.

    Args:
        track: The track to transform.
        floor: Minimum score. ``None`` means no lower bound.
        ceiling: Maximum score. ``None`` means no upper bound.

    Returns:
        A new Track with clamped scores.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": -5.0, "b": 50.0, "c": 3.0})
        >>> t2 = clamp(t, floor=0.0, ceiling=10.0)
        >>> t2.get("a"), t2.get("b"), t2.get("c")
        (0.0, 10.0, 3.0)
    """
    def _clamp(s: float) -> float:
        if math.isnan(s):
            return s
        if floor is not None and s < floor:
            return floor
        if ceiling is not None and s > ceiling:
            return ceiling
        return s

    return track.map_scores(_clamp)

normalize ¶

normalize(track: Track, method: str = 'cpm', total: float | None = None) -> Track

Rescale scores by a normalization method.

Supported methods:

"cpm": counts per million. Each score is divided by the sum of all non-NaN scores and multiplied by 1,000,000. If total is provided, it overrides the computed sum.
"fraction": each score divided by the sum of all non-NaN scores. If total is provided, it overrides the computed sum.
"median_ratio": each score divided by the median of non-zero, non-NaN scores. This is a simple median normalization, not DESeq2-style size factors.

NaN values pass through unchanged.

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required
`method`	`str`	Normalization method. Defaults to `"cpm"`.	`'cpm'`
`total`	`float \| None`	Override for the computed sum (cpm, fraction only).	`None`

Returns:

Type	Description
`Track`	A new Track with normalized scores.

Raises:

Type	Description
`ValueError`	If method is not recognized.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": 500.0, "b": 500.0})
>>> t2 = normalize(t, method="cpm")
>>> t2.get("a")
500000.0

Source code in src/seqchain/transform/scale.py

def normalize(
    track: Track,
    method: str = "cpm",
    total: float | None = None,
) -> Track:
    """Rescale scores by a normalization method.

    Supported methods:

    - ``"cpm"``: counts per million. Each score is divided by the
      sum of all non-NaN scores and multiplied by 1,000,000.
      If *total* is provided, it overrides the computed sum.
    - ``"fraction"``: each score divided by the sum of all
      non-NaN scores. If *total* is provided, it overrides
      the computed sum.
    - ``"median_ratio"``: each score divided by the median of
      non-zero, non-NaN scores. This is a simple median
      normalization, **not** DESeq2-style size factors.

    NaN values pass through unchanged.

    Args:
        track: The track to transform.
        method: Normalization method. Defaults to ``"cpm"``.
        total: Override for the computed sum (cpm, fraction only).

    Returns:
        A new Track with normalized scores.

    Raises:
        ValueError: If *method* is not recognized.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": 500.0, "b": 500.0})
        >>> t2 = normalize(t, method="cpm")
        >>> t2.get("a")
        500000.0
    """
    if method not in ("cpm", "fraction", "median_ratio"):
        raise ValueError(
            f"Unknown normalization method {method!r}. "
            "Supported: 'cpm', 'fraction', 'median_ratio'."
        )

    if method in ("cpm", "fraction"):
        divisor = total if total is not None else sum(
            s for s in track.scores() if not math.isnan(s)
        )
        if divisor == 0:
            return track.map_scores(
                lambda s: s if math.isnan(s) else float("nan")
            )
        multiplier = 1_000_000 if method == "cpm" else 1.0

        def _norm(s: float) -> float:
            if math.isnan(s):
                return s
            return (s / divisor) * multiplier

        return track.map_scores(_norm)

    # median_ratio — statistics.median() accepts a generator; no sorted() needed
    try:
        med = statistics.median(
            s for s in track.scores() if not math.isnan(s) and s != 0.0
        )
    except statistics.StatisticsError:
        return track.map_scores(
            lambda s: s if math.isnan(s) else float("nan")
        )

    def _median_norm(s: float) -> float:
        if math.isnan(s):
            return s
        return s / med

    return track.map_scores(_median_norm)

standardize ¶

standardize(track: Track) -> Track

Z-score normalization: (score - mean) / std.

Computes mean and standard deviation over non-NaN scores, then transforms each score. NaN values pass through unchanged. If fewer than 2 non-NaN values exist, all scores become NaN.

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required

Returns:

Type	Description
`Track`	A new Track with z-scored values.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": 10.0, "b": 20.0, "c": 30.0})
>>> t2 = standardize(t)
>>> abs(t2.get("b")) < 1e-10  # mean is at center
True

Source code in src/seqchain/transform/scale.py

def standardize(track: Track) -> Track:
    """Z-score normalization: ``(score - mean) / std``.

    Computes mean and standard deviation over non-NaN scores, then
    transforms each score. NaN values pass through unchanged.
    If fewer than 2 non-NaN values exist, all scores become NaN.

    Args:
        track: The track to transform.

    Returns:
        A new Track with z-scored values.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": 10.0, "b": 20.0, "c": 30.0})
        >>> t2 = standardize(t)
        >>> abs(t2.get("b")) < 1e-10  # mean is at center
        True
    """
    count = sum(1 for s in track.scores() if not math.isnan(s))
    if count < 2:
        return track.map_scores(
            lambda s: s if math.isnan(s) else float("nan")
        )
    mean = statistics.mean(s for s in track.scores() if not math.isnan(s))
    std = statistics.stdev(
        (s for s in track.scores() if not math.isnan(s)), xbar=mean
    )
    if std == 0:
        return track.map_scores(
            lambda s: s if math.isnan(s) else 0.0
        )

    def _zscore(s: float) -> float:
        if math.isnan(s):
            return s
        return (s - mean) / std

    return track.map_scores(_zscore)

rank ¶

rank(track: Track) -> Track

Replace scores with their rank (1-based, average ties).

NaN scores receive NaN rank.

Parameters:

Name	Type	Description	Default
`track`	`Track`	The track to transform.	required

Returns:

Type	Description
`Track`	A new Track with rank values.

Examples:

>>> from seqchain.track import TableTrack, TrackLabel
>>> t = TableTrack(TrackLabel("t"), {"a": 30.0, "b": 10.0, "c": 20.0})
>>> t2 = rank(t)
>>> t2.get("b"), t2.get("c"), t2.get("a")
(1.0, 2.0, 3.0)

Source code in src/seqchain/transform/scale.py

def rank(track: Track) -> Track:
    """Replace scores with their rank (1-based, average ties).

    NaN scores receive NaN rank.

    Args:
        track: The track to transform.

    Returns:
        A new Track with rank values.

    Examples:
        >>> from seqchain.track import TableTrack, TrackLabel
        >>> t = TableTrack(TrackLabel("t"), {"a": 30.0, "b": 10.0, "c": 20.0})
        >>> t2 = rank(t)
        >>> t2.get("b"), t2.get("c"), t2.get("a")
        (1.0, 2.0, 3.0)
    """
    ranked = _compute_ranks(track.scores())
    return track.with_scores(ranked)