Source code for fsh_lib.links

"""Link-shortening primitives for kiln-generated FastAPI projects.

A *short link* maps a compact, URL-safe ``code`` to a longer
``target_url``.  Useful for fitting links into SMS bodies (the
160-character single-segment limit is unforgiving) and for emitting
click counts on outbound comms.

Shortening is *explicit*: the producer calls :func:`shorten` to
swap a long URL for a short one before assembling the message body
(e.g. just before :func:`~fsh_lib.comms.send_communication`).  There
is intentionally no auto-applied Jinja filter -- the row write
happens at a predictable point, and shortened URLs that never get
sent (because :class:`~fsh_lib.comms.PreferenceResolver` filters the
recipient out, or the caller's transaction rolls back) don't leak
into the table.

The module ships three primitives, following the same pgcraft-
flavoured idiom as :mod:`fsh_lib.files` -- consumer owns the table,
we own the columns:

* :class:`ShortLinkMixin` -- pgcraft-compatible mixin supplying
  the storage columns (``code``, ``target_url``, ``click_count``).
  ``created_at`` is managed by pgcraft's
  :class:`~pgcraft.plugins.timestamps.TimestampPlugin`, which
  :class:`~pgcraft.factory.dimension.simple.PGCraftSimple`
  auto-adds; ``id`` is plugin-owned (the consumer attaches e.g.
  :class:`~pgcraft.plugins.pk.UUIDV4PKPlugin`), matching the
  :class:`~fsh_lib.files.FileMixin` pattern.

* :func:`shorten` -- producer entry point.  A single
  ``INSERT ... ON CONFLICT DO NOTHING RETURNING code`` does both
  the dedup ("shorten X twice, get the same code back" because
  ``target_url`` is ``UNIQUE``) and the code-collision check in
  one round trip -- no SELECT-then-INSERT race window.  Returns
  the full short URL ``{base_url}/{code}``.

* :func:`resolve` -- redirect-handler helper.  Single
  ``UPDATE ... RETURNING`` atomically increments
  :attr:`ShortLinkMixin.click_count` and returns the row's
  ``target_url`` (or ``None`` for unknown codes).  Consumers
  wire it into a 5-line FastAPI route:

  .. code-block:: python

      from fastapi import APIRouter, HTTPException
      from fastapi.responses import RedirectResponse
      from fsh_lib.links import resolve

      router = APIRouter()

      @router.get("/l/{code}")
      async def follow(code: str, db: AsyncSession = Depends(get_db)):
          url = await resolve(model_cls=ShortLink, db=db, code=code)
          if url is None:
              raise HTTPException(status_code=404)
          return RedirectResponse(url, status_code=302)
"""

from __future__ import annotations

import os
import secrets
from typing import TYPE_CHECKING

from pydantic import BaseModel
from sqlalchemy import (
    BigInteger,
    String,
    select,
    update,
)

# ``AsyncSession`` lives at module scope (not under TYPE_CHECKING)
# so be's action introspector can evaluate :func:`shorten_action`'s
# type hints at import time -- guarding the import would yield an
# unresolved ForwardRef and the action wouldn't classify as a
# session-taking handler.  Same workaround the consumer-facing
# fsh_lib.files actions need.
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.asyncio import AsyncSession  # noqa: TC002
from sqlalchemy.orm import Mapped, mapped_column

if TYPE_CHECKING:
    import uuid


# Lowercase letters only.  Mixing cases or including digits in a
# short code shown on an SMS confuses recipients hand-typing it
# ("is that an O or a 0?  capital I or lowercase l?  rn or m?") --
# letters-only avoids the worst offenders.  Still URL-safe so the
# path component never needs escaping.
_BASE26_ALPHABET = "abcdefghijklmnopqrstuvwxyz"


DEFAULT_CODE_LENGTH = 7
"""Default short-code length.

Seven base26 (lowercase-letter) characters give 26**7 ≈ 8.0e9
possible codes.  At a million rows the per-insert collision rate
is ≈ 1.2e-4 -- low enough that :func:`shorten`'s retry loop is
overwhelmingly a no-op.

Tune via the ``code_length`` argument when shorter codes are
needed (smaller keyspace, higher collision rate -- a 5-char code
with 100k rows collides ≈ 0.8% of the time, well within
:data:`MAX_CODE_RETRIES`) or when longer is acceptable.
"""


MAX_CODE_RETRIES = 5
"""How many times :func:`shorten` retries a colliding code.

A retry triggers when ``INSERT ... ON CONFLICT DO NOTHING``
returns no row *and* the follow-up SELECT confirms it was the
``code`` collision (not the ``target_url`` dedup).  With
default-length codes the loop is virtually never entered; the
bound exists so a saturated keyspace (very small ``code_length``,
many rows) raises loudly instead of looping forever.
"""


[docs] class ShortLinkMixin: """pgcraft mixin supplying the storage columns of a short link. Subclass on a pgcraft-mapped model alongside a PK plugin (the plugin owns ``id``): .. code-block:: python from fsh_lib.links import ShortLinkMixin from pgcraft.factory import PGCraftSimple from pgcraft.plugins.pk import UUIDV4PKPlugin class ShortLink(Base, ShortLinkMixin): __tablename__ = "short_links" __factory__ = PGCraftSimple __plugins__ = [UUIDV4PKPlugin()] Like :class:`fsh_lib.files.FileMixin`, the mixin deliberately doesn't declare ``id`` -- the consumer's PK plugin owns it. ``created_at`` is also pgcraft-owned: ``PGCraftSimple`` auto-adds :class:`~pgcraft.plugins.timestamps.TimestampPlugin`, which injects ``created_at`` with ``server_default=now()``. Both :attr:`code` and :attr:`target_url` are ``UNIQUE``. The code uniqueness lets :func:`shorten` retry on the rare random-code collision; the target_url uniqueness collapses same-URL dedup into the same atomic INSERT (no SELECT-then- INSERT race). """ if TYPE_CHECKING: # Type-only -- the actual column comes from the consumer's # pgcraft PK plugin. Declaring it as a mapped column here # at runtime would collide with the plugin's column at # table-build time, so the column lives only in the # type-checker's view. id: Mapped[uuid.UUID] code: Mapped[str] = mapped_column(String(32), unique=True) """The short code. ``UNIQUE`` so :func:`shorten` can catch the rare random-code collision via ``ON CONFLICT DO NOTHING`` and retry with a fresh code.""" target_url: Mapped[str] = mapped_column( String(2048), nullable=False, unique=True, ) """The URL the short link redirects to. ``UNIQUE`` so a repeat :func:`shorten` for the same URL bounces off the constraint and returns the existing code rather than racing to insert a duplicate row.""" click_count: Mapped[int] = mapped_column( BigInteger, nullable=False, default=0, ) """Times :func:`resolve` has served this row. ``BigInteger`` so popular links don't wrap in any realistic horizon."""
[docs] def generate_code(length: int = DEFAULT_CODE_LENGTH) -> str: """Return a random lowercase-letter code of *length* characters. Uses :func:`secrets.choice` -- code generation is security- adjacent (a guessable code lets an attacker enumerate link targets), so the CSPRNG matters even though the keyspace is large for default lengths. Raises: ValueError: When *length* is less than 1. """ if length < 1: msg = f"code_length must be >= 1, got {length}" raise ValueError(msg) return "".join(secrets.choice(_BASE26_ALPHABET) for _ in range(length))
[docs] async def shorten( *, model_cls: type[ShortLinkMixin], db: AsyncSession, target_url: str, base_url: str, code_length: int = DEFAULT_CODE_LENGTH, ) -> str: """Return a short URL for *target_url*. Reuses the existing row for *target_url* when one is found -- "shorten X twice, get the same code back". The dedup rides the ``UNIQUE (target_url)`` constraint so it's race-free: two concurrent shortens of the same URL can't both insert, and the loser's INSERT bounces off the unique violation rather than racing past a stale SELECT. Args: model_cls: The consumer's short-link model class (must mix in :class:`ShortLinkMixin`). db: Async SQLAlchemy session for the insert. The caller commits. target_url: Long URL to shorten. base_url: Base URL of the redirect endpoint, *without* a trailing slash. The returned short URL is ``{base_url}/{code}``. Typically a short domain the redirect route is mounted under, e.g. ``"https://l.example.com"``. code_length: Number of base26 letters to generate when inserting a new row. Defaults to :data:`DEFAULT_CODE_LENGTH`. Has no effect when the dedup hits -- the existing row's code is returned regardless. Returns: The full short URL: ``{base_url}/{code}``. Raises: RuntimeError: If :data:`MAX_CODE_RETRIES` random codes in a row collided with existing rows -- almost certainly means *code_length* is too small for the current row count. """ for _ in range(MAX_CODE_RETRIES): code = generate_code(code_length) # ``ON CONFLICT DO NOTHING`` catches *any* unique-violation # -- a same-URL dedup hit OR a same-code collision -- in # a single atomic INSERT. Common case (fresh URL, fresh # code) returns the new code in one round trip; conflict # case falls through to a SELECT that distinguishes the # two below. result = await db.execute( insert(model_cls) .values( code=code, target_url=target_url, click_count=0, ) .on_conflict_do_nothing() .returning(model_cls.code) ) inserted = result.scalar_one_or_none() if inserted is not None: return f"{base_url}/{inserted}" # Some unique constraint was violated. Look up by # ``target_url``: a hit means the dedup constraint fired # (return the existing code); a miss means it was the # ``code`` constraint (retry with a fresh code). existing = await db.execute( select(model_cls.code).where(model_cls.target_url == target_url) ) existing_code = existing.scalar_one_or_none() if existing_code is not None: return f"{base_url}/{existing_code}" msg = ( f"Could not generate a unique short code in " f"{MAX_CODE_RETRIES} attempts -- increase code_length" ) raise RuntimeError(msg)
[docs] async def resolve( *, model_cls: type[ShortLinkMixin], db: AsyncSession, code: str, ) -> str | None: """Return the target URL for *code*, or ``None`` if unknown. Atomically increments :attr:`ShortLinkMixin.click_count` and returns the row's ``target_url`` in a single ``UPDATE ... RETURNING`` -- the click counter and the lookup can't drift even under concurrent redirects. Args: model_cls: The consumer's short-link model class. db: Async SQLAlchemy session. The caller commits -- if the surrounding transaction rolls back, the click increment is rolled back with it, which is the right behaviour (a failed redirect didn't actually serve the link). code: The short code from the request path. Returns: The target URL on hit, ``None`` when *code* is unknown. """ result = await db.execute( update(model_cls) .where(model_cls.code == code) .values(click_count=model_cls.click_count + 1) .returning(model_cls.target_url) ) return result.scalar_one_or_none()
# --- Action-shaped wrapper for be codegen --------------------------------
[docs] class ShortenRequest(BaseModel): """Request body for :func:`shorten_action`. Field shape matches the dotted-path pattern be's introspector expects for a collection-scoped action body. """ target_url: str
[docs] class ShortenResponse(BaseModel): """Response from :func:`shorten_action`.""" short_url: str
[docs] def default_base_url() -> str: """Return the public origin + redirect prefix from env. Reads ``LINK_BASE_URL`` (e.g. ``"https://l.example.com/l"``) -- the value :func:`shorten` joins with the random code to form ``{base}/{code}``. Mirrors :func:`fsh_lib.files.default_storage` in shape: the action's resource lookup is env-driven so the same generated handler works across environments. Raises: RuntimeError: when ``LINK_BASE_URL`` is not set. """ base = os.environ.get("LINK_BASE_URL") if not base: msg = "LINK_BASE_URL environment variable is required" raise RuntimeError(msg) return base
[docs] async def shorten_action( *, model_cls: type[ShortLinkMixin], db: AsyncSession, body: ShortenRequest, ) -> ShortenResponse: """Collection-scoped action wrapping :func:`shorten`. Wired into a generated POST route by be's action codegen -- the ``type[ShortLinkMixin]`` annotation matches any concrete subclass via the introspector's supertype check, so the same function serves every consumer's short-link model. Mirrors :func:`fsh_lib.files.request_upload`'s shape. ``base_url`` comes from :func:`default_base_url` (i.e. the ``LINK_BASE_URL`` env var) so the same generated handler works across dev / staging / prod without scaffold-time configuration. """ short_url = await shorten( model_cls=model_cls, db=db, target_url=body.target_url, base_url=default_base_url(), ) return ShortenResponse(short_url=short_url)