"""Link-shortening primitives for kiln-generated FastAPI projects.
A *short link* maps a compact, URL-safe ``code`` to a longer
``target_url``. Useful for fitting links into SMS bodies (the
160-character single-segment limit is unforgiving) and for emitting
click counts on outbound comms.
Shortening is *explicit*: the producer calls :func:`shorten` to
swap a long URL for a short one before assembling the message body
(e.g. just before :func:`~fsh_lib.comms.send_communication`). There
is intentionally no auto-applied Jinja filter -- the row write
happens at a predictable point, and shortened URLs that never get
sent (because :class:`~fsh_lib.comms.PreferenceResolver` filters the
recipient out, or the caller's transaction rolls back) don't leak
into the table.
The module ships three primitives, following the same pgcraft-
flavoured idiom as :mod:`fsh_lib.files` -- consumer owns the table,
we own the columns:
* :class:`ShortLinkMixin` -- pgcraft-compatible mixin supplying
the storage columns (``code``, ``target_url``, ``click_count``).
``created_at`` is managed by pgcraft's
:class:`~pgcraft.plugins.timestamps.TimestampPlugin`, which
:class:`~pgcraft.factory.dimension.simple.PGCraftSimple`
auto-adds; ``id`` is plugin-owned (the consumer attaches e.g.
:class:`~pgcraft.plugins.pk.UUIDV4PKPlugin`), matching the
:class:`~fsh_lib.files.FileMixin` pattern.
* :func:`shorten` -- producer entry point. A single
``INSERT ... ON CONFLICT DO NOTHING RETURNING code`` does both
the dedup ("shorten X twice, get the same code back" because
``target_url`` is ``UNIQUE``) and the code-collision check in
one round trip -- no SELECT-then-INSERT race window. Returns
the full short URL ``{base_url}/{code}``.
* :func:`resolve` -- redirect-handler helper. Single
``UPDATE ... RETURNING`` atomically increments
:attr:`ShortLinkMixin.click_count` and returns the row's
``target_url`` (or ``None`` for unknown codes). Consumers
wire it into a 5-line FastAPI route:
.. code-block:: python
from fastapi import APIRouter, HTTPException
from fastapi.responses import RedirectResponse
from fsh_lib.links import resolve
router = APIRouter()
@router.get("/l/{code}")
async def follow(code: str, db: AsyncSession = Depends(get_db)):
url = await resolve(model_cls=ShortLink, db=db, code=code)
if url is None:
raise HTTPException(status_code=404)
return RedirectResponse(url, status_code=302)
"""
from __future__ import annotations
import os
import secrets
from typing import TYPE_CHECKING
from pydantic import BaseModel
from sqlalchemy import (
BigInteger,
String,
select,
update,
)
# ``AsyncSession`` lives at module scope (not under TYPE_CHECKING)
# so be's action introspector can evaluate :func:`shorten_action`'s
# type hints at import time -- guarding the import would yield an
# unresolved ForwardRef and the action wouldn't classify as a
# session-taking handler. Same workaround the consumer-facing
# fsh_lib.files actions need.
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.asyncio import AsyncSession # noqa: TC002
from sqlalchemy.orm import Mapped, mapped_column
if TYPE_CHECKING:
import uuid
# Lowercase letters only. Mixing cases or including digits in a
# short code shown on an SMS confuses recipients hand-typing it
# ("is that an O or a 0? capital I or lowercase l? rn or m?") --
# letters-only avoids the worst offenders. Still URL-safe so the
# path component never needs escaping.
_BASE26_ALPHABET = "abcdefghijklmnopqrstuvwxyz"
DEFAULT_CODE_LENGTH = 7
"""Default short-code length.
Seven base26 (lowercase-letter) characters give 26**7 ≈ 8.0e9
possible codes. At a million rows the per-insert collision rate
is ≈ 1.2e-4 -- low enough that :func:`shorten`'s retry loop is
overwhelmingly a no-op.
Tune via the ``code_length`` argument when shorter codes are
needed (smaller keyspace, higher collision rate -- a 5-char code
with 100k rows collides ≈ 0.8% of the time, well within
:data:`MAX_CODE_RETRIES`) or when longer is acceptable.
"""
MAX_CODE_RETRIES = 5
"""How many times :func:`shorten` retries a colliding code.
A retry triggers when ``INSERT ... ON CONFLICT DO NOTHING``
returns no row *and* the follow-up SELECT confirms it was the
``code`` collision (not the ``target_url`` dedup). With
default-length codes the loop is virtually never entered; the
bound exists so a saturated keyspace (very small ``code_length``,
many rows) raises loudly instead of looping forever.
"""
[docs]
class ShortLinkMixin:
"""pgcraft mixin supplying the storage columns of a short link.
Subclass on a pgcraft-mapped model alongside a PK plugin (the
plugin owns ``id``):
.. code-block:: python
from fsh_lib.links import ShortLinkMixin
from pgcraft.factory import PGCraftSimple
from pgcraft.plugins.pk import UUIDV4PKPlugin
class ShortLink(Base, ShortLinkMixin):
__tablename__ = "short_links"
__factory__ = PGCraftSimple
__plugins__ = [UUIDV4PKPlugin()]
Like :class:`fsh_lib.files.FileMixin`, the mixin deliberately
doesn't declare ``id`` -- the consumer's PK plugin owns it.
``created_at`` is also pgcraft-owned: ``PGCraftSimple``
auto-adds
:class:`~pgcraft.plugins.timestamps.TimestampPlugin`, which
injects ``created_at`` with ``server_default=now()``.
Both :attr:`code` and :attr:`target_url` are ``UNIQUE``. The
code uniqueness lets :func:`shorten` retry on the rare
random-code collision; the target_url uniqueness collapses
same-URL dedup into the same atomic INSERT (no SELECT-then-
INSERT race).
"""
if TYPE_CHECKING:
# Type-only -- the actual column comes from the consumer's
# pgcraft PK plugin. Declaring it as a mapped column here
# at runtime would collide with the plugin's column at
# table-build time, so the column lives only in the
# type-checker's view.
id: Mapped[uuid.UUID]
code: Mapped[str] = mapped_column(String(32), unique=True)
"""The short code. ``UNIQUE`` so :func:`shorten` can catch
the rare random-code collision via ``ON CONFLICT DO NOTHING``
and retry with a fresh code."""
target_url: Mapped[str] = mapped_column(
String(2048),
nullable=False,
unique=True,
)
"""The URL the short link redirects to. ``UNIQUE`` so a
repeat :func:`shorten` for the same URL bounces off the
constraint and returns the existing code rather than racing
to insert a duplicate row."""
click_count: Mapped[int] = mapped_column(
BigInteger,
nullable=False,
default=0,
)
"""Times :func:`resolve` has served this row. ``BigInteger``
so popular links don't wrap in any realistic horizon."""
[docs]
def generate_code(length: int = DEFAULT_CODE_LENGTH) -> str:
"""Return a random lowercase-letter code of *length* characters.
Uses :func:`secrets.choice` -- code generation is security-
adjacent (a guessable code lets an attacker enumerate link
targets), so the CSPRNG matters even though the keyspace is
large for default lengths.
Raises:
ValueError: When *length* is less than 1.
"""
if length < 1:
msg = f"code_length must be >= 1, got {length}"
raise ValueError(msg)
return "".join(secrets.choice(_BASE26_ALPHABET) for _ in range(length))
[docs]
async def shorten(
*,
model_cls: type[ShortLinkMixin],
db: AsyncSession,
target_url: str,
base_url: str,
code_length: int = DEFAULT_CODE_LENGTH,
) -> str:
"""Return a short URL for *target_url*.
Reuses the existing row for *target_url* when one is found --
"shorten X twice, get the same code back". The dedup rides
the ``UNIQUE (target_url)`` constraint so it's race-free:
two concurrent shortens of the same URL can't both insert,
and the loser's INSERT bounces off the unique violation
rather than racing past a stale SELECT.
Args:
model_cls: The consumer's short-link model class (must
mix in :class:`ShortLinkMixin`).
db: Async SQLAlchemy session for the insert. The caller
commits.
target_url: Long URL to shorten.
base_url: Base URL of the redirect endpoint, *without* a
trailing slash. The returned short URL is
``{base_url}/{code}``. Typically a short domain the
redirect route is mounted under, e.g.
``"https://l.example.com"``.
code_length: Number of base26 letters to generate when
inserting a new row. Defaults to
:data:`DEFAULT_CODE_LENGTH`. Has no effect when the
dedup hits -- the existing row's code is returned
regardless.
Returns:
The full short URL: ``{base_url}/{code}``.
Raises:
RuntimeError: If :data:`MAX_CODE_RETRIES` random codes in
a row collided with existing rows -- almost certainly
means *code_length* is too small for the current row
count.
"""
for _ in range(MAX_CODE_RETRIES):
code = generate_code(code_length)
# ``ON CONFLICT DO NOTHING`` catches *any* unique-violation
# -- a same-URL dedup hit OR a same-code collision -- in
# a single atomic INSERT. Common case (fresh URL, fresh
# code) returns the new code in one round trip; conflict
# case falls through to a SELECT that distinguishes the
# two below.
result = await db.execute(
insert(model_cls)
.values(
code=code,
target_url=target_url,
click_count=0,
)
.on_conflict_do_nothing()
.returning(model_cls.code)
)
inserted = result.scalar_one_or_none()
if inserted is not None:
return f"{base_url}/{inserted}"
# Some unique constraint was violated. Look up by
# ``target_url``: a hit means the dedup constraint fired
# (return the existing code); a miss means it was the
# ``code`` constraint (retry with a fresh code).
existing = await db.execute(
select(model_cls.code).where(model_cls.target_url == target_url)
)
existing_code = existing.scalar_one_or_none()
if existing_code is not None:
return f"{base_url}/{existing_code}"
msg = (
f"Could not generate a unique short code in "
f"{MAX_CODE_RETRIES} attempts -- increase code_length"
)
raise RuntimeError(msg)
[docs]
async def resolve(
*,
model_cls: type[ShortLinkMixin],
db: AsyncSession,
code: str,
) -> str | None:
"""Return the target URL for *code*, or ``None`` if unknown.
Atomically increments :attr:`ShortLinkMixin.click_count` and
returns the row's ``target_url`` in a single ``UPDATE ...
RETURNING`` -- the click counter and the lookup can't drift
even under concurrent redirects.
Args:
model_cls: The consumer's short-link model class.
db: Async SQLAlchemy session. The caller commits -- if
the surrounding transaction rolls back, the click
increment is rolled back with it, which is the right
behaviour (a failed redirect didn't actually serve
the link).
code: The short code from the request path.
Returns:
The target URL on hit, ``None`` when *code* is unknown.
"""
result = await db.execute(
update(model_cls)
.where(model_cls.code == code)
.values(click_count=model_cls.click_count + 1)
.returning(model_cls.target_url)
)
return result.scalar_one_or_none()
# --- Action-shaped wrapper for be codegen --------------------------------
[docs]
class ShortenRequest(BaseModel):
"""Request body for :func:`shorten_action`.
Field shape matches the dotted-path pattern be's introspector
expects for a collection-scoped action body.
"""
target_url: str
[docs]
class ShortenResponse(BaseModel):
"""Response from :func:`shorten_action`."""
short_url: str
[docs]
def default_base_url() -> str:
"""Return the public origin + redirect prefix from env.
Reads ``LINK_BASE_URL`` (e.g. ``"https://l.example.com/l"``)
-- the value :func:`shorten` joins with the random code to
form ``{base}/{code}``. Mirrors :func:`fsh_lib.files.default_storage`
in shape: the action's resource lookup is env-driven so the
same generated handler works across environments.
Raises:
RuntimeError: when ``LINK_BASE_URL`` is not set.
"""
base = os.environ.get("LINK_BASE_URL")
if not base:
msg = "LINK_BASE_URL environment variable is required"
raise RuntimeError(msg)
return base
[docs]
async def shorten_action(
*,
model_cls: type[ShortLinkMixin],
db: AsyncSession,
body: ShortenRequest,
) -> ShortenResponse:
"""Collection-scoped action wrapping :func:`shorten`.
Wired into a generated POST route by be's action codegen --
the ``type[ShortLinkMixin]`` annotation matches any concrete
subclass via the introspector's supertype check, so the same
function serves every consumer's short-link model. Mirrors
:func:`fsh_lib.files.request_upload`'s shape.
``base_url`` comes from :func:`default_base_url` (i.e. the
``LINK_BASE_URL`` env var) so the same generated handler
works across dev / staging / prod without scaffold-time
configuration.
"""
short_url = await shorten(
model_cls=model_cls,
db=db,
target_url=body.target_url,
base_url=default_base_url(),
)
return ShortenResponse(short_url=short_url)