ollama · Ghraven · Apr 29, 2026
diff --git a/ollama/_types.py b/ollama/_types.py
@@ -186,6 +186,34 @@ def serialize_model(self):
         raise ValueError('Invalid image data, expected base64 string or path to image file') from Exception
 
 
+class Audio(BaseModel):
+  value: Union[str, bytes, Path]
+
+  @model_serializer
+  def serialize_model(self):
+    if isinstance(self.value, (Path, bytes)):
+      return b64encode(self.value.read_bytes() if isinstance(self.value, Path) else self.value).decode()
+
+    if isinstance(self.value, str):
+      try:
+        if Path(self.value).exists():
+          return b64encode(Path(self.value).read_bytes()).decode()
+      except Exception:
+        # Long base64 string can't be wrapped in Path, so try to treat as base64 string
+        pass
+
+      # String might be a file path, but might not exist
+      if self.value.split('.')[-1] in ('mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'):
+        raise ValueError(f'File {self.value} does not exist')
+
+      try:
+        # Try to decode to check if it is already base64
+        b64decode(self.value)
+        return self.value
+      except Exception:
+        raise ValueError('Invalid audio data, expected base64 string or path to audio file') from Exception
+
+
 class GenerateRequest(BaseGenerateRequest):
   prompt: Optional[str] = None
   'Prompt to generate response from.'
@@ -327,6 +355,18 @@ class Message(SubscriptableBaseModel):
   Valid image formats depend on the model. See the model card for more information.
   """
 
+  audio: Optional[Sequence[Audio]] = None
+  """
+  Optional list of audio data for multimodal models.
+
+  Valid input types are:
+
+  - `str` or path-like object: path to audio file
+  - `bytes` or bytes-like object: raw audio data
+
+  Valid audio formats depend on the model. See the model card for more information.
+  """
+
   tool_name: Optional[str] = None
   'Name of the executed tool.'