Source code for text_machina.src.types

from enum import Enum
from typing import List

from pydantic import BaseModel, Field, model_validator


[docs]class DetectionLabels(Enum):
    """
    Labels for detection tasks.
    """

    GENERATED: str = "generated"
    HUMAN: str = "human"


[docs]class Placeholders(Enum):
    """
    Placeholders to be used across TextMachina.
    """

    NO_PROMPT: str = "NO-PROMPT"
    NO_EXTRACTOR: str = "NO-EXTRACTOR"


[docs]class Prompt(BaseModel):
    """
    Wrapper for a prompt.
    """

    template: str
    extractor: str


[docs]class PromptedDataset(BaseModel):
    """
    Wrapper for a prompted dataset used to generate MGT texts.
    """

    prompted_texts: List[str]
    human_texts: List[str]


[docs]class TaskType(str, Enum):
    DETECTION: str = "detection"
    ATTRIBUTION: str = "attribution"
    BOUNDARY: str = "boundary"
    MIXCASE: str = "mixcase"


[docs]class LabeledSpan(BaseModel):
    start: int = Field(ge=0)
    end: int
    label: str

[docs]    @model_validator(mode="after")
    def check_valid_positions(self) -> "LabeledSpan":
        if not (self.start < self.end):
            raise ValueError(
                "`start` must be lower than `end` in a LabeledSpan"
                f" (`start`={self.start}, `end`={self.end})"
            )
        return self