diff --git a/RAG/src/chain_server/configuration.py b/RAG/src/chain_server/configuration.py
index ba0dde217..faea92314 100644
--- a/RAG/src/chain_server/configuration.py
+++ b/RAG/src/chain_server/configuration.py
@@ -61,7 +61,7 @@ class LLMConfig(ConfigWizard):
     model_engine: str = configfield(
         "model_engine",
         default="nvidia-ai-endpoints",
-        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints",
+        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints and minimax",
     )
     model_name_pandas_ai: str = configfield(
         "model_name_pandas_ai",
diff --git a/RAG/src/chain_server/requirements.txt b/RAG/src/chain_server/requirements.txt
index 217b864ae..bd785cb58 100644
--- a/RAG/src/chain_server/requirements.txt
+++ b/RAG/src/chain_server/requirements.txt
@@ -19,6 +19,7 @@ psycopg2-binary==2.9.9
 pgvector==0.2.5
 langchain-core==0.1.29
 langchain-nvidia-ai-endpoints==0.1.6
+langchain-openai>=0.0.6
 opentelemetry-sdk==1.23.0
 opentelemetry-api==1.23.0
 opentelemetry-exporter-otlp-proto-grpc==1.23.0
diff --git a/RAG/src/chain_server/tests/__init__.py b/RAG/src/chain_server/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/RAG/src/chain_server/tests/test_minimax_provider.py b/RAG/src/chain_server/tests/test_minimax_provider.py
new file mode 100644
index 000000000..fc320fc8b
--- /dev/null
+++ b/RAG/src/chain_server/tests/test_minimax_provider.py
@@ -0,0 +1,365 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for the MiniMax LLM provider integration.
+
+These tests are self-contained and do not require the full chain_server
+dependency stack (llama_index, torch, etc.).
+"""
+
+import os
+import sys
+import types
+import unittest
+from unittest.mock import MagicMock, patch
+
+
+def _ensure_stub(name, attrs=None):
+    """Ensure a stub module exists in sys.modules. Does NOT overwrite real packages."""
+    parts = name.split(".")
+    for i in range(1, len(parts) + 1):
+        partial = ".".join(parts[:i])
+        if partial not in sys.modules:
+            sys.modules[partial] = types.ModuleType(partial)
+    mod = sys.modules[name]
+    if attrs:
+        for k, v in attrs.items():
+            setattr(mod, k, v)
+    return mod
+
+
+def _load_utils_with_mocks():
+    """Import utils.py with heavy dependencies mocked out."""
+
+    # Remove any cached version of utils
+    for key in list(sys.modules.keys()):
+        if "RAG.src.chain_server.utils" in key:
+            del sys.modules[key]
+
+    # --- Stub out ALL modules that utils.py imports -----------------------
+    # torch
+    torch_mod = _ensure_stub("torch")
+    torch_mod.cuda = MagicMock()
+    torch_mod.cuda.is_available = MagicMock(return_value=False)
+
+    # psycopg2
+    _ensure_stub("psycopg2")
+
+    # sqlalchemy
+    _ensure_stub("sqlalchemy")
+    _ensure_stub("sqlalchemy.engine")
+    _ensure_stub("sqlalchemy.engine.url", {"make_url": MagicMock()})
+
+    # llama_index (full hierarchy)
+    _ensure_stub("llama_index")
+    _ensure_stub("llama_index.core")
+    _ensure_stub("llama_index.core.indices", {"VectorStoreIndex": MagicMock()})
+    _ensure_stub("llama_index.core.postprocessor")
+    _ensure_stub("llama_index.core.postprocessor.types", {
+        "BaseNodePostprocessor": type("BaseNodePostprocessor", (), {
+            "_postprocess_nodes": lambda self, **kw: []
+        })
+    })
+    _ensure_stub("llama_index.core.schema", {"MetadataMode": MagicMock()})
+    _ensure_stub("llama_index.core.service_context", {
+        "ServiceContext": MagicMock(),
+        "set_global_service_context": MagicMock(),
+    })
+    _ensure_stub("llama_index.core.utils", {
+        "get_tokenizer": MagicMock(),
+        "globals_helper": MagicMock(),
+    })
+    _ensure_stub("llama_index.core.indices.base_retriever")
+    _ensure_stub("llama_index.core.indices.query")
+    _ensure_stub("llama_index.core.indices.query.schema")
+    _ensure_stub("llama_index.core.callbacks", {"CallbackManager": MagicMock()})
+    _ensure_stub("llama_index.embeddings")
+    _ensure_stub("llama_index.embeddings.langchain", {"LangchainEmbedding": MagicMock()})
+    _ensure_stub("llama_index.llms")
+    _ensure_stub("llama_index.llms.langchain", {"LangChainLLM": MagicMock()})
+    _ensure_stub("llama_index.vector_stores")
+    _ensure_stub("llama_index.vector_stores.milvus", {"MilvusVectorStore": MagicMock()})
+    _ensure_stub("llama_index.vector_stores.postgres", {"PGVectorStore": MagicMock()})
+
+    # langchain (bare imports in utils.py lines 93-96, 66-69)
+    _ensure_stub("langchain")
+    _ensure_stub("langchain.llms")
+    _ensure_stub("langchain.llms.base", {"LLM": MagicMock()})
+    _ensure_stub("langchain.text_splitter", {
+        "SentenceTransformersTokenTextSplitter": MagicMock()
+    })
+
+    # langchain_core (bare imports)
+    _ensure_stub("langchain_core")
+    _ensure_stub("langchain_core.documents")
+    _ensure_stub("langchain_core.documents.compressor", {"BaseDocumentCompressor": MagicMock()})
+    _ensure_stub("langchain_core.embeddings", {"Embeddings": MagicMock()})
+    _ensure_stub("langchain_core.language_models")
+    _ensure_stub("langchain_core.language_models.chat_models", {"SimpleChatModel": MagicMock()})
+    _ensure_stub("langchain_core.vectorstores", {"VectorStore": MagicMock()})
+
+    # langchain_community
+    _ensure_stub("langchain_community")
+    _ensure_stub("langchain_community.embeddings", {"HuggingFaceEmbeddings": MagicMock()})
+    _ensure_stub("langchain_community.vectorstores", {
+        "FAISS": MagicMock(), "Milvus": MagicMock(), "PGVector": MagicMock()
+    })
+    _ensure_stub("langchain_community.docstore")
+    _ensure_stub("langchain_community.docstore.in_memory", {"InMemoryDocstore": MagicMock()})
+
+    # langchain_nvidia_ai_endpoints
+    _ensure_stub("langchain_nvidia_ai_endpoints", {
+        "ChatNVIDIA": MagicMock(), "NVIDIAEmbeddings": MagicMock(), "NVIDIARerank": MagicMock()
+    })
+
+    # langchain_openai
+    _ensure_stub("langchain_openai", {"ChatOpenAI": MagicMock()})
+
+    # faiss
+    _ensure_stub("faiss", {"IndexFlatL2": MagicMock()})
+
+    # tracing module
+    _ensure_stub("RAG")
+    _ensure_stub("RAG.src")
+    _ensure_stub("RAG.src.chain_server")
+    _ensure_stub("RAG.src.chain_server.tracing", {"llama_index_cb_handler": MagicMock()})
+
+    # yaml is already installed, no need to stub
+
+    # Now import utils fresh
+    from RAG.src.chain_server import utils
+    return utils
+
+
+# ---------------------------------------------------------------------------
+# Unit Tests
+# ---------------------------------------------------------------------------
+
+
+class TestMiniMaxConfiguration(unittest.TestCase):
+    """Test MiniMax configuration through LLMConfig."""
+
+    def test_model_engine_help_text_includes_minimax(self):
+        """Verify that the LLMConfig model_engine help text mentions minimax."""
+        from RAG.src.chain_server.configuration import LLMConfig
+        fields = LLMConfig.__dataclass_fields__
+        help_txt = fields["model_engine"].metadata.get("help", "")
+        self.assertIn("minimax", help_txt)
+
+    def test_default_model_engine_unchanged(self):
+        """Verify that the default model_engine is still nvidia-ai-endpoints."""
+        from RAG.src.chain_server.configuration import LLMConfig
+        config = LLMConfig()
+        self.assertEqual(config.model_engine, "nvidia-ai-endpoints")
+
+
+class TestGetLlmMiniMax(unittest.TestCase):
+    """Test get_llm() function with MiniMax engine."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.utils = _load_utils_with_mocks()
+        # Access the raw function, bypassing utils_cache and lru_cache decorators
+        # Store in a list to prevent Python descriptor protocol from binding `self`
+        raw = cls.utils.get_llm
+        while hasattr(raw, "__wrapped__"):
+            raw = raw.__wrapped__
+        cls._raw_fn = [raw]
+
+    def _make_config(self, model_engine="minimax", model_name="MiniMax-M2.7", server_url=""):
+        cfg = MagicMock()
+        cfg.llm.model_engine = model_engine
+        cfg.llm.model_name = model_name
+        cfg.llm.server_url = server_url
+        return cfg
+
+    def _call_get_llm(self, **kwargs):
+        return self._raw_fn[0](**kwargs)
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_creates_chat_openai(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            mock_chat.assert_called_once()
+            kw = mock_chat.call_args.kwargs
+            self.assertEqual(kw["model"], "MiniMax-M2.7")
+            self.assertEqual(kw["openai_api_key"], "test-key-123")
+            self.assertEqual(kw["openai_api_base"], "https://api.minimax.io/v1")
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_default_model_when_ensemble(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config(model_name="ensemble")), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            self.assertEqual(mock_chat.call_args.kwargs["model"], "MiniMax-M2.7")
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_custom_model_name(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config(model_name="MiniMax-M2.5-highspeed")), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            self.assertEqual(mock_chat.call_args.kwargs["model"], "MiniMax-M2.5-highspeed")
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_custom_server_url(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config(
+            server_url="https://custom-proxy.example.com/v1"
+        )), patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            self.assertEqual(
+                mock_chat.call_args.kwargs["openai_api_base"],
+                "https://custom-proxy.example.com/v1",
+            )
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_temperature_clamping_high(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm(temperature=2.5)
+            self.assertEqual(mock_chat.call_args.kwargs["temperature"], 1.0)
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_temperature_clamping_low(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm(temperature=-0.5)
+            self.assertEqual(mock_chat.call_args.kwargs["temperature"], 0.0)
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_temperature_none_passthrough(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            self.assertIsNone(mock_chat.call_args.kwargs["temperature"])
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_valid_temperature_passthrough(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm(temperature=0.7)
+            self.assertAlmostEqual(mock_chat.call_args.kwargs["temperature"], 0.7)
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"})
+    def test_minimax_passes_top_p_and_max_tokens(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm(top_p=0.9, max_tokens=512)
+            kw = mock_chat.call_args.kwargs
+            self.assertEqual(kw["top_p"], 0.9)
+            self.assertEqual(kw["max_tokens"], 512)
+
+    @patch.dict(os.environ, {"MINIMAX_API_KEY": ""})
+    def test_minimax_empty_api_key(self):
+        with patch.object(self.utils, "get_config", return_value=self._make_config()), \
+             patch.object(self.utils, "ChatOpenAI") as mock_chat:
+            mock_chat.return_value = MagicMock()
+            self._call_get_llm()
+            mock_chat.assert_called_once()
+            self.assertEqual(mock_chat.call_args.kwargs["openai_api_key"], "")
+
+    def test_unsupported_engine_raises_runtime_error(self):
+        cfg = MagicMock()
+        cfg.llm.model_engine = "unsupported-engine"
+        with patch.object(self.utils, "get_config", return_value=cfg):
+            with self.assertRaises(RuntimeError) as ctx:
+                self._call_get_llm()
+            self.assertIn("nvidia-ai-endpoints", str(ctx.exception))
+            self.assertIn("minimax", str(ctx.exception))
+
+
+class TestGetLlmNvidiaUnchanged(unittest.TestCase):
+    """Ensure the existing NVIDIA AI endpoints path is not broken."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.utils = _load_utils_with_mocks()
+        raw = cls.utils.get_llm
+        while hasattr(raw, "__wrapped__"):
+            raw = raw.__wrapped__
+        cls._raw_fn = [raw]
+
+    def test_nvidia_engine_still_works(self):
+        cfg = MagicMock()
+        cfg.llm.model_engine = "nvidia-ai-endpoints"
+        cfg.llm.model_name = "meta/llama3-70b-instruct"
+        cfg.llm.server_url = ""
+        with patch.object(self.utils, "get_config", return_value=cfg), \
+             patch.object(self.utils, "ChatNVIDIA") as mock_nvidia:
+            mock_nvidia.return_value = MagicMock()
+            self._raw_fn[0]()
+            mock_nvidia.assert_called_once()
+            self.assertEqual(mock_nvidia.call_args.kwargs["model"], "meta/llama3-70b-instruct")
+
+    def test_nvidia_engine_with_server_url(self):
+        cfg = MagicMock()
+        cfg.llm.model_engine = "nvidia-ai-endpoints"
+        cfg.llm.model_name = "meta/llama3-70b-instruct"
+        cfg.llm.server_url = "localhost:8000"
+        with patch.object(self.utils, "get_config", return_value=cfg), \
+             patch.object(self.utils, "ChatNVIDIA") as mock_nvidia:
+            mock_nvidia.return_value = MagicMock()
+            self._raw_fn[0]()
+            mock_nvidia.assert_called_once()
+            self.assertEqual(mock_nvidia.call_args.kwargs["base_url"], "http://localhost:8000/v1")
+
+
+# ---------------------------------------------------------------------------
+# Integration Tests (require MINIMAX_API_KEY)
+# ---------------------------------------------------------------------------
+
+
+@unittest.skipUnless(
+    os.environ.get("MINIMAX_API_KEY"),
+    "MINIMAX_API_KEY not set; skipping integration tests",
+)
+class TestMiniMaxIntegration(unittest.TestCase):
+    """Integration tests that call the real MiniMax API via langchain-openai."""
+
+    def test_minimax_chat_completion(self):
+        from langchain_openai import ChatOpenAI
+        llm = ChatOpenAI(
+            model="MiniMax-M2.7",
+            openai_api_key=os.environ["MINIMAX_API_KEY"],
+            openai_api_base="https://api.minimax.io/v1",
+            temperature=0.1,
+        )
+        response = llm.invoke("Say 'hello' and nothing else.")
+        content = response.content if hasattr(response, "content") else str(response)
+        self.assertTrue(len(content) > 0)
+
+    def test_minimax_streaming(self):
+        from langchain_openai import ChatOpenAI
+        llm = ChatOpenAI(
+            model="MiniMax-M2.7",
+            openai_api_key=os.environ["MINIMAX_API_KEY"],
+            openai_api_base="https://api.minimax.io/v1",
+            temperature=0.1,
+        )
+        chunks = list(llm.stream("Say 'world' and nothing else."))
+        self.assertTrue(len(chunks) > 0)
+
+    def test_minimax_m25_highspeed(self):
+        from langchain_openai import ChatOpenAI
+        llm = ChatOpenAI(
+            model="MiniMax-M2.5-highspeed",
+            openai_api_key=os.environ["MINIMAX_API_KEY"],
+            openai_api_base="https://api.minimax.io/v1",
+            temperature=0.1,
+        )
+        response = llm.invoke("Say 'test' and nothing else.")
+        content = response.content if hasattr(response, "content") else str(response)
+        self.assertTrue(len(content) > 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/RAG/src/chain_server/utils.py b/RAG/src/chain_server/utils.py
index 0a7ee247d..4999e71e5 100644
--- a/RAG/src/chain_server/utils.py
+++ b/RAG/src/chain_server/utils.py
@@ -79,6 +79,11 @@
 except Exception as e:
     logger.error(f"Langchain nvidia ai endpoints import failed with error: {e}")
 
+try:
+    from langchain_openai import ChatOpenAI
+except Exception as e:
+    logger.error(f"Langchain openai import failed with error: {e}")
+
 try:
     from langchain_community.docstore.in_memory import InMemoryDocstore
     from langchain_community.vectorstores import Milvus, PGVector
@@ -397,9 +402,31 @@ def get_llm(**kwargs) -> LLM | SimpleChatModel:
                 top_p=kwargs.get('top_p', None),
                 max_tokens=kwargs.get('max_tokens', None),
             )
+    elif settings.llm.model_engine == "minimax":
+        unused_params = [key for key in kwargs.keys() if key not in ['temperature', 'top_p', 'max_tokens']]
+        if unused_params:
+            logger.warning(
+                f"The following parameters from kwargs are not supported: {unused_params} for {settings.llm.model_engine}"
+            )
+        # MiniMax provides an OpenAI-compatible API at https://api.minimax.io/v1
+        model_name = settings.llm.model_name if settings.llm.model_name != "ensemble" else "MiniMax-M2.7"
+        base_url = settings.llm.server_url if settings.llm.server_url else "https://api.minimax.io/v1"
+        temperature = kwargs.get('temperature', None)
+        if temperature is not None:
+            temperature = max(0.0, min(1.0, temperature))
+        logger.info(f"Using MiniMax model {model_name} via {base_url}")
+        return ChatOpenAI(
+            model=model_name,
+            openai_api_key=os.environ.get("MINIMAX_API_KEY", ""),
+            openai_api_base=base_url,
+            temperature=temperature,
+            top_p=kwargs.get('top_p', None),
+            max_tokens=kwargs.get('max_tokens', None),
+        )
     else:
         raise RuntimeError(
-            "Unable to find any supported Large Language Model server. Supported engine name is nvidia-ai-endpoints."
+            "Unable to find any supported Large Language Model server."
+            " Supported engine names are nvidia-ai-endpoints and minimax."
         )
 
 
diff --git a/docs/change-model.md b/docs/change-model.md
index ef9d78d04..a5a5c17d5 100644
--- a/docs/change-model.md
+++ b/docs/change-model.md
@@ -52,6 +52,28 @@ You can determine the available model names using one of the following methods:
   Refer to the package web page for sample code to list the models.
 
 
+## Alternative LLM Providers
+
+### Using MiniMax
+
+In addition to NVIDIA AI endpoints, you can use [MiniMax](https://www.minimax.io/) as the LLM provider. MiniMax offers an OpenAI-compatible API with models such as `MiniMax-M2.7` and `MiniMax-M2.5-highspeed` (204K context window).
+
+1. Get a MiniMax API key from [MiniMax Platform](https://platform.minimaxi.com/).
+
+2. Set the environment variables and start the Chain Server:
+
+   ```console
+   APP_LLM_MODELENGINE='minimax' \
+   APP_LLM_MODELNAME='MiniMax-M2.7' \
+   MINIMAX_API_KEY='your-minimax-api-key' \
+   docker compose up -d --build
+   ```
+
+   Available MiniMax models:
+   - `MiniMax-M2.7` — Latest flagship model with 1M context
+   - `MiniMax-M2.5` — Previous generation flagship
+   - `MiniMax-M2.5-highspeed` — Optimized for speed, 204K context
+
 ## On Premises Microservices
 
 You can specify the model for NVIDIA NIM containers to use in the `docker-compose-nim-ms.yaml` file.