Source code for text_machina.src.tokenizers.azure_openai
from typing import List
import tiktoken
from .base import Tokenizer
[docs]class AzureOpenAITokenizer(Tokenizer):
"""
Tokenizer for AzureOpenAI models.
Tokenizer can't be inferred from the deployment name
of a model. GPT-4 Tokenizer is used instead.
"""
def __init__(self, model_name: str):
super().__init__(model_name)
self.tokenizer = tiktoken.encoding_for_model("gpt-4")