Source code for pipeworks_mud_mapper.models.ollama_generation

"""Ollama generation metadata model for tracking LLM-generated descriptions.

This module defines the ``OllamaGenerationInfo`` model for storing provenance
and reproducibility information about room descriptions generated by Ollama.

Design Philosophy
-----------------
The metadata serves two purposes:

1. **Reproducibility**: With the same model, seed, and parameters, Ollama
   should produce identical output. Storing ``actual_seed`` (even when the
   user requested random) enables exact reproduction later.

2. **Provenance**: Authors can see how a description was generated, what
   prompts were used, and when. This is valuable for iterating on templates
   and understanding the creative process.

Storage and Export
------------------
The metadata follows the same pattern as room coordinates:

- **Stored in .map.json exports**: Preserved for authoring purposes
- **Stripped on zone export**: Not part of game truth

This separation reflects the pipe-works philosophy that authoring scaffolding
(coordinates, LLM metadata) supports the creation process but is not part of
the final game state consumed by the MUD server.

Example Usage
-------------
The model is typically created during generation and attached to room data::

    >>> from datetime import datetime
    >>> from pipeworks_mud_mapper.models import OllamaGenerationInfo
    >>>
    >>> # After successful Ollama generation
    >>> info = OllamaGenerationInfo(
    ...     model="gemma2:2b",
    ...     actual_seed=1706234567,  # Even if user requested -1 (random)
    ...     template_id="ledgerfall_goblin",
    ...     temperature=0.7,
    ...     top_k=40,
    ...     top_p=0.9,
    ...     num_ctx=4096,
    ...     num_predict=512,
    ...     system_prompt="You are a creative writer for a MUD...",
    ...     user_prompt="Describe a quiet alley in Ledgerfall",
    ...     generated_at=datetime.utcnow(),
    ... )
    >>>
    >>> # Attach to room data
    >>> room_data["llm_generation"] = info.model_dump()

See Also
--------
- ``models/room.py``: MapRoom model that contains this metadata
- ``callbacks/ollama_callbacks.py``: Callbacks that create and store metadata
- ``services/zone_service.py``: Export function that strips metadata
"""

from datetime import UTC, datetime

from pydantic import BaseModel, Field



[docs]
class OllamaGenerationInfo(BaseModel):
    """Metadata for an LLM-generated room description.

    This model captures everything needed to reproduce or understand the
    provenance of a generated description. It is stored per-room in map
    JSON exports (.map.json) but stripped during zone export (.json).

    The data structure mirrors the parameters sent to Ollama's ``/api/chat``
    endpoint, plus additional context about the prompts used.

    Attributes
    ----------
    model : str
        The Ollama model identifier used for generation.

        Examples: ``"gemma2:2b"``, ``"llama3:8b"``, ``"mistral:7b"``

    actual_seed : int
        The seed value actually used for generation.

        **Critical for reproducibility**: If the user specified ``-1`` (random
        mode), this field contains the randomly-generated seed that was used.
        Storing this enables exact reproduction later - using this seed with
        the same parameters should produce identical output.

        Range: 0 to 2^31-1 (always non-negative, even if -1 was requested)

    template_id : str
        Identifier of the template used for generation.

        Templates are loaded from ``data/ollama/templates/`` and compiled into
        system prompts. The template_id allows tracing which template was used,
        though the full ``system_prompt`` is also stored for exact reproduction.

        Examples: ``"ledgerfall_goblin"``

    temperature : float
        Temperature parameter controlling randomness/creativity.

        - ``0.0``: Deterministic, always picks most likely token
        - ``0.7``: Default, balanced creativity (recommended)
        - ``2.0``: Maximum creativity, more unexpected outputs

        Constraints: Must be in range [0.0, 2.0]

    top_k : int
        Top-K sampling parameter limiting vocabulary.

        Restricts the model to only consider the K most probable next tokens
        at each step. Lower values produce more focused, predictable output.

        - ``1``: Only most likely token (deterministic)
        - ``40``: Default, good balance
        - ``100``: Maximum vocabulary diversity

        Constraints: Must be in range [1, 100]

    top_p : float
        Top-P (nucleus sampling) probability threshold.

        Instead of a fixed K, samples from the smallest set of tokens whose
        cumulative probability exceeds P. Adapts to the probability distribution.

        - ``0.1``: Very focused, only highest probability tokens
        - ``0.9``: Default, includes most reasonable options
        - ``1.0``: Consider all tokens (no filtering)

        Constraints: Must be in range [0.0, 1.0]

    num_ctx : int
        Context window size in tokens.

        How many tokens of context the model can "see" (prompt + history).
        Larger values allow longer prompts but use more memory.

        - ``512``: Minimum, suitable for short prompts
        - ``4096``: Default, good for most use cases
        - ``8192``: Maximum, for very long prompts

        Constraints: Must be in range [512, 8192]

    num_predict : int
        Maximum number of tokens to generate.

        Limits the length of generated output. Room descriptions typically
        need 100-500 tokens.

        - ``30``: Minimum, very short descriptions
        - ``512``: Default, good for room descriptions
        - ``2048``: Maximum, for very long content

        Constraints: Must be in range [30, 2048]

    system_prompt : str
        The full compiled system prompt used for generation.

        For template-based generation, this is the compiled output of the
        template (theme + voice + constraints + examples). For custom prompts,
        this is whatever the user entered.

        **Stored in full for reproducibility**: Even if templates change later,
        the exact prompt used is preserved.

    user_prompt : str
        The user's prompt text describing what to generate.

        This is the content entered in the "User Prompt" field, typically
        describing the room to generate (e.g., "Describe a quiet alley").

    generated_at : datetime
        UTC timestamp when the generation occurred.

        Defaults to current UTC time if not specified. Stored in ISO 8601
        format in JSON (e.g., ``"2024-01-15T10:30:00Z"``).

    Notes
    -----
    **Reproducibility Guarantee**: Given the same ``model``, ``actual_seed``,
    and all parameters, Ollama should produce identical output. However, this
    assumes:

    - Same model weights (model hasn't been updated)
    - Same Ollama version
    - Same hardware (some models may have platform-specific behavior)

    **Field Naming**: The field is called ``actual_seed`` (not ``seed``) to
    emphasize that it contains the seed that was actually used, which may
    differ from what the user requested if they chose random mode (``-1``).

    Examples
    --------
    Create metadata for a generation with random seed::

        >>> info = OllamaGenerationInfo(
        ...     model="gemma2:2b",
        ...     actual_seed=1706234567,  # Random seed that was generated
        ...     template_id="ledgerfall_goblin",
        ...     temperature=0.7,
        ...     top_k=40,
        ...     top_p=0.9,
        ...     num_ctx=4096,
        ...     num_predict=512,
        ...     system_prompt="You are a creative writer...",
        ...     user_prompt="Describe a dark cellar",
        ... )
        >>> info.model
        'gemma2:2b'
        >>> info.actual_seed
        1706234567

    Create metadata for a fixed-seed generation::

        >>> info = OllamaGenerationInfo(
        ...     model="llama3:8b",
        ...     actual_seed=42,  # User specified seed 42 explicitly
        ...     template_id="ledgerfall_goblin",
        ...     temperature=0.5,
        ...     top_k=30,
        ...     top_p=0.8,
        ...     num_ctx=2048,
        ...     num_predict=256,
        ...     system_prompt="Write concise descriptions.",
        ...     user_prompt="Describe the main hall",
        ... )

    Serialize to dictionary for storage::

        >>> data = info.model_dump()
        >>> data["model"]
        'gemma2:2b'
        >>> isinstance(data["generated_at"], datetime)
        True

    Serialize to JSON-compatible dictionary (datetime as ISO string)::

        >>> import json
        >>> data = info.model_dump(mode="json")
        >>> isinstance(data["generated_at"], str)
        True
    """

    # =========================================================================
    # Model Identification
    # =========================================================================

    model: str = Field(
        ...,
        min_length=1,
        description="Ollama model identifier (e.g., 'gemma2:2b', 'llama3:8b')",
    )

    # =========================================================================
    # Reproducibility Parameters
    # =========================================================================

    actual_seed: int = Field(
        ...,
        ge=0,
        description=(
            "Seed value actually used for generation. "
            "If user requested -1 (random), this is the generated seed."
        ),
    )

    # =========================================================================
    # Template Information
    # =========================================================================

    template_id: str = Field(
        ...,
        description="Template identifier used for generation",
    )

    # =========================================================================
    # Generation Parameters
    # =========================================================================

    temperature: float = Field(
        ...,
        ge=0.0,
        le=2.0,
        description="Temperature controlling randomness (0.0=focused, 2.0=creative)",
    )

    top_k: int = Field(
        ...,
        ge=1,
        le=100,
        description="Top-K sampling parameter (vocabulary filtering)",
    )

    top_p: float = Field(
        ...,
        ge=0.0,
        le=1.0,
        description="Top-P nucleus sampling threshold (cumulative probability)",
    )

    num_ctx: int = Field(
        ...,
        ge=512,
        le=8192,
        description="Context window size in tokens",
    )

    num_predict: int = Field(
        ...,
        ge=30,
        le=2048,
        description="Maximum number of tokens to generate",
    )

    target_words: int = Field(
        default=300,
        ge=25,
        le=500,
        description="Target word count used in system prompt compilation",
    )

    # =========================================================================
    # Prompt Content
    # =========================================================================

    system_prompt: str = Field(
        ...,
        description="Full compiled system prompt used for generation",
    )

    user_prompt: str = Field(
        ...,
        description="User's prompt text describing what to generate",
    )

    # =========================================================================
    # Timestamp
    # =========================================================================

    generated_at: datetime = Field(
        default_factory=lambda: datetime.now(UTC),
        description="UTC timestamp when generation occurred (ISO 8601)",
    )