Source code for text_machina.src.tokenizers.vertex
from typing import List
import tiktoken
from .base import Tokenizer
[docs]class VertexTokenizer(Tokenizer):
"""
Tokenizer for VertexAI models.
VertexAI does not offer tokenizers. GPT-4 Tokenizer is used instead.
"""
def __init__(self, model_name: str):
super().__init__(model_name)
self.tokenizer = tiktoken.encoding_for_model("gpt-4")