ProjectTech4DevAI · rajagopalmotivate · Feb 23, 2026 · Feb 23, 2026 · coderabbitai · Feb 23, 2026
diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py
@@ -13,6 +13,7 @@ class Provider(str, Enum):
     AWS = "aws"
     LANGFUSE = "langfuse"
     GOOGLE = "google"
+    SARVAMAI = "sarvamai"
 
 
 @dataclass
@@ -32,6 +33,7 @@ class ProviderConfig:
         required_fields=["secret_key", "public_key", "host"]
     ),
     Provider.GOOGLE: ProviderConfig(required_fields=["api_key"]),
+    Provider.SARVAMAI: ProviderConfig(required_fields=["api_key"]),
 }
 
 

diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py
@@ -154,7 +154,7 @@ class NativeCompletionConfig(SQLModel):
     Supports any LLM provider's native API format.
     """
 
-    provider: Literal["openai-native", "google-native"] = Field(
+    provider: Literal["openai-native", "google-native", "sarvamai-native"] = Field(
         ...,
         description="Native provider type (e.g., openai-native)",
     )

diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py
@@ -7,6 +7,7 @@
 from app.services.llm.providers.base import BaseProvider
 from app.services.llm.providers.oai import OpenAIProvider
 from app.services.llm.providers.gai import GoogleAIProvider
+from app.services.llm.providers.sai import SarvamAIProvider
 
 logger = logging.getLogger(__name__)
 
@@ -17,13 +18,15 @@ class LLMProvider:
     # Future constants for native providers:
     # CLAUDE_NATIVE = "claude-native"
     GOOGLE_NATIVE = "google-native"
+    SARVAMAI_NATIVE = "sarvamai-native"
 
     _registry: dict[str, type[BaseProvider]] = {
         OPENAI_NATIVE: OpenAIProvider,
         OPENAI: OpenAIProvider,
         # Future native providers:
         # CLAUDE_NATIVE: ClaudeProvider,
         GOOGLE_NATIVE: GoogleAIProvider,
+        SARVAMAI_NATIVE: SarvamAIProvider,
     }
 
     @classmethod

diff --git a/backend/app/services/llm/providers/sai.py b/backend/app/services/llm/providers/sai.py
@@ -0,0 +1,154 @@
+import logging
+import os
+from typing import Any
+
+from sarvamai import SarvamAI   
+
+
+
+from app.models.llm import (
+    NativeCompletionConfig,
+    LLMCallResponse,
+    QueryParams,
+    TextOutput,
+    LLMResponse,
+    Usage,
+    TextOutput,
+    TextContent,
+)
-from app.models.llm import (
-    NativeCompletionConfig,
-    LLMCallResponse,
-    QueryParams,
-    TextOutput,
-    LLMResponse,
-    Usage,
-    TextOutput,
-    TextContent,
-)
+from app.models.llm import (
+    NativeCompletionConfig,
+    LLMCallResponse,
+    QueryParams,
+    TextOutput,
+    LLMResponse,
+    Usage,
+    TextContent,
+)
-from app.models.llm import (
-    NativeCompletionConfig,
-    LLMCallResponse,
-    QueryParams,
-    TextOutput,
-    LLMResponse,
-    Usage,
-    TextOutput,
-    TextContent,
-)
+from app.models.llm import (
+    NativeCompletionConfig,
+    LLMCallResponse,
+    QueryParams,
+    TextOutput,
+    LLMResponse,
+    Usage,
+    TextContent,
+)
+from app.services.llm.providers.base import BaseProvider
+
+
+logger = logging.getLogger(__name__)
+
+
+class SarvamAIProvider(BaseProvider):
+    def __init__(self, client: SarvamAI):
+        """Initialize SarvamAI provider with client.
+
+        Args:
+            client: SarvamAI client instance
+        """
+        super().__init__(client)
+        self.client = client
-class SarvamAIProvider(BaseProvider):
-    def __init__(self, client: SarvamAI):
-        """Initialize SarvamAI provider with client.
-
-        Args:
-            client: SarvamAI client instance
-        """
-        super().__init__(client)
-        self.client = client
+class SarvamAIProvider(BaseProvider):
+    def __init__(self, client: SarvamAI) -> None:
+        """Initialize SarvamAI provider with client.
+
+        Args:
+            client: SarvamAI client instance
+        """
+        super().__init__(client)
+        self.client = client
-class SarvamAIProvider(BaseProvider):
-    def __init__(self, client: SarvamAI):
-        """Initialize SarvamAI provider with client.
-
-        Args:
-            client: SarvamAI client instance
-        """
-        super().__init__(client)
-        self.client = client
+class SarvamAIProvider(BaseProvider):
+    def __init__(self, client: SarvamAI) -> None:
+        """Initialize SarvamAI provider with client.
+
+        Args:
+            client: SarvamAI client instance
+        """
+        super().__init__(client)
+        self.client = client
+
+    @staticmethod
+    def create_client(credentials: dict[str, Any]) -> Any:
+        if "api_key" not in credentials:
+            raise ValueError("API Key for SarvamAI Not Set")
+        return SarvamAI(api_subscription_key=credentials["api_key"])
+
+    def _parse_input(self, query_input: Any, completion_type: str, provider: str) -> str:
+        if completion_type == "stt":
+            if isinstance(query_input, str) and os.path.exists(query_input):
+                return query_input
+            else:
+                raise ValueError(f"{provider} STT requires a valid file path as input")
+        raise ValueError(f"Unsupported completion type '{completion_type}' for {provider}")
+
+    def _execute_stt(
+        self,
+        completion_config: NativeCompletionConfig,
+        resolved_input: str,
+        include_provider_raw_response: bool = False,
+    ) -> tuple[LLMCallResponse | None, str | None]:
+        """Execute speech-to-text completion using SarvamAI.
+
+        Args:
+            completion_config: Configuration for the completion request
+            resolved_input: File path to the audio input
+            include_provider_raw_response: Whether to include raw provider response
+
+        Returns:
+            Tuple of (response, error_message)
+        """
+        provider_name = self.get_provider_name()
+        generation_params = completion_config.params
+
+        model = generation_params.get("model")
+        if not model:
+            return None, "Missing 'model' in native params for SarvamAI STT"
+
+        inputlanguageofaudio = generation_params.get("input_language")
+        if not inputlanguageofaudio:
+          inputlanguageofaudio = "unknown" #'unknown' for automatic language detection or ISO 639 language code like 'hi-IN'. SarvamAI's Saarika model supports mixed language content with automatic detection of languages within the sentence, so this parameter is optional and can be set to "unknown" if not provided. 
+
+        # Parse and validate input
+        parsed_input_path = self._parse_input(
+            query_input=resolved_input,
+            completion_type="stt",
+            provider=provider_name,
+        )
+
+        try:
+            with open(parsed_input_path, "rb") as audio_file:
+                sarvam_response = self.client.speech_to_text.transcribe(
+                    file=audio_file,
+                    model=model,
+                    # SarvamAI's flagship STT model  Saarika supports mixed language content with automatic detection of languages within the sentance    
+                    language_code=inputlanguageofaudio, # Optional, can be set to "unknown" for automatic detection or specific ISO 639 language code like 'hi-IN'
+                )
+
+            # SarvamAI does not provide token usage directly for STT, so we'll use placeholders
+            # You might estimate based on transcript length or set to 0
+            input_tokens_estimate = 0 # Not directly provided by SarvamAI STT
+            output_tokens_estimate = len(sarvam_response.transcript.split()) # Estimate by word count
+            total_tokens_estimate = input_tokens_estimate + output_tokens_estimate
+
+            llm_response = LLMCallResponse(
+                response=LLMResponse(
+                    provider_response_id=sarvam_response.request_id or "unknown",
+                    conversation_id=None,  # SarvamAI STT doesn't have conversation_id
+                    provider=provider_name,
+                    model=model,
+                    output=TextOutput(content=TextContent(value=sarvam_response.transcript)),
+                ),
+                usage=Usage(
+                    input_tokens=input_tokens_estimate,
+                    output_tokens=output_tokens_estimate,
+                    total_tokens=total_tokens_estimate,
+                    reasoning_tokens=None, # Not provided by SarvamAI
+                ),
+            )
+
+            if include_provider_raw_response:
+                llm_response.provider_raw_response = sarvam_response.model_dump()
+
+            logger.info(
+                f"[{provider_name}.execute_stt] Successfully transcribed audio: {sarvam_response.request_id}"
+            )
+            return llm_response, None
+
+        except Exception as e:
+            error_message = f"SarvamAI STT transcription failed: {str(e)}"
+            logger.error(f"[{provider_name}.execute_stt] {error_message}", exc_info=True)
+            return None, error_message
+
+    def execute(
+        self,
+        completion_config: NativeCompletionConfig,
+        query: QueryParams,
+        resolved_input: str,
+        include_provider_raw_response: bool = False,
+    ) -> tuple[LLMCallResponse | None, str | None]:
+        try:
+            completion_type = completion_config.type
+
+            if completion_type == "stt":
+                return self._execute_stt(
+                    completion_config=completion_config,
+                    resolved_input=resolved_input,
+                    include_provider_raw_response=include_provider_raw_response,
+                )
+            else:
+                return None, f"Unsupported completion type '{completion_type}' for SarvamAIProvider"
+
+        except ValueError as e:
+            error_message = f"Input validation error: {str(e)}"
+            logger.error(f"[SarvamAIProvider.execute] {error_message}", exc_info=True)
+            return None, error_message
+        except Exception as e:
+            error_message = "Unexpected error occurred during SarvamAI execution"
+            logger.error(f"[SarvamAIProvider.execute] {error_message}: {str(e)}", exc_info=True)
+            return None, error_message
+
diff --git a/backend/app/services/llm/providers/tests_data.py b/backend/app/services/llm/providers/tests_data.py
diff --git a/backend/app/tests/services/llm/providers/STTproviders/test_STT_SarvamProvider.py b/backend/app/tests/services/llm/providers/STTproviders/test_STT_SarvamProvider.py
@@ -0,0 +1,193 @@
+import os
+from dotenv import load_dotenv
+import logging
+
+from sqlmodel import Session
+from openai import OpenAI
+
+from app.crud import get_provider_credential
+from app.services.llm.providers.base import BaseProvider
+from app.services.llm.providers.oai import OpenAIProvider
+from app.services.llm.providers.sai import SarvamAIProvider
+from app.services.llm.providers.gai import GoogleAIProvider
+
+from app.tests.services.llm.providers.STTproviders.test_data_speechsamples import mydata
+
+import tempfile
+
+
+# ad hoc testing code for SarvamAIProvider
+import os
+import tempfile
+
+# temporary import
-# ad hoc testing code for SarvamAIProvider
-import os
-import tempfile
-
-# temporary import
+# ad hoc testing code for SarvamAIProvider
+# temporary import
-# ad hoc testing code for SarvamAIProvider
-import os
-import tempfile
-
-# temporary import
+# ad hoc testing code for SarvamAIProvider
+# temporary import
+
+from app.models.llm import (
+    NativeCompletionConfig,
+    LLMCallResponse,
+    QueryParams,
+    LLMOutput,
+    LLMResponse,
+    Usage,
+)
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+
+
+
+class LLMProvider:
+    OPENAI_NATIVE = "openai-native"
+    OPENAI = "openai"
+    # Future constants for native providers:
+    # CLAUDE_NATIVE = "claude-native"
+    GOOGLE_NATIVE = "google-native"
+    SARVAMAI_NATIVE = "sarvamai-native"
+
+    _registry: dict[str, type[BaseProvider]] = {
+        OPENAI_NATIVE: OpenAIProvider,
+        OPENAI: OpenAIProvider,
+        # Future native providers:
+        # CLAUDE_NATIVE: ClaudeProvider,
+        GOOGLE_NATIVE: GoogleAIProvider,
+        SARVAMAI_NATIVE: SarvamAIProvider,
+    }
+
+    @classmethod
+    def get_provider_class(cls, provider_type: str) -> type[BaseProvider]:
+        """Return the provider class for a given name."""
+        provider = cls._registry.get(provider_type)
+        if not provider:
+            raise ValueError(
+                f"Provider '{provider_type}' is not supported. "
+                f"Supported providers: {', '.join(cls._registry.keys())}"
+            )
+        return provider
+
+    @classmethod
+    def supported_providers(cls) -> list[str]:
+        """Return a list of supported provider names."""
+        return list(cls._registry.keys())
+
+
+def get_llm_provider(
+    session: Session, provider_type: str, project_id: int, organization_id: int
+) -> BaseProvider:
+    provider_class = LLMProvider.get_provider_class(provider_type)
+
+    # e.g "openai-native" -> "openai", "claude-native" -> "claude"
+    credential_provider = provider_type.replace("-native", "")
+
+    # e.g., "openai-native" → "openai", "claude-native" → "claude"
+    credential_provider = provider_type.replace("-native", "")
+
+    credentials = get_provider_credential(
+        session=session,
+        provider=credential_provider,
+        project_id=project_id,
+        org_id=organization_id,
+    )
+
+    if not credentials:
+        raise ValueError(
+            f"Credentials for provider '{credential_provider}' not configured for this project."
+        )
+
+    try:
+        client = provider_class.create_client(credentials=credentials)
+        return provider_class(client=client)
+    except ValueError:
+        # Re-raise ValueError for credential/configuration errors
+        raise
+    except Exception as e:
+        logger.error(f"Failed to initialize {provider_type} client: {e}", exc_info=True)
+        raise RuntimeError(f"Could not connect to {provider_type} services.")
-        logger.error(f"Failed to initialize {provider_type} client: {e}", exc_info=True)
-        raise RuntimeError(f"Could not connect to {provider_type} services.")
+        logger.error(
+            f"[get_llm_provider] Failed to initialize {provider_type} client: {e}",
+            exc_info=True,
+        )
+        raise RuntimeError(f"Could not connect to {provider_type} services.")
-        logger.error(f"Failed to initialize {provider_type} client: {e}", exc_info=True)
-        raise RuntimeError(f"Could not connect to {provider_type} services.")
+        logger.error(
+            f"[get_llm_provider] Failed to initialize {provider_type} client: {e}",
+            exc_info=True,
+        )
+        raise RuntimeError(f"Could not connect to {provider_type} services.")
+
+
+
+if __name__ == "__main__":
+    # 1. Simulate environment/credentials
+    # SARVAM_API_KEY is already defined in the notebook
+    SARVAM_API_KEY = ""  # for testing only
+
+    if not SARVAM_API_KEY:
+        print("SARVAM_API_KEY is not set.")
+        exit(1)
+
+    # This dictionary mimics what get_provider_credential would return from the DB
+    mock_credentials = {"api_key": SARVAM_API_KEY}
+
+    # 2. Idiomatic Initialization via Registry
+
+
+
+    provider_type = "sarvamai-native"
+    # Adding SarvamAIProvider to the registry
+    if "sarvamai-native" not in LLMProvider._registry:
+        LLMProvider._registry["sarvamai-native"] = SarvamAIProvider
+        print("SarvamAIProvider registered successfully in LLMProvider.")
+    else:
+        print("SarvamAIProvider was already registered.")
+
+
+    print(f"Initializing provider: {provider_type}...")
+
+    # This block mimics the core logic of your get_llm_provider function
+    ProviderClass = LLMProvider.get_provider_class(provider_type)
+    client = ProviderClass.create_client(credentials=mock_credentials)
+    instance = ProviderClass(client=client)
+
+    # Save the base64 decoded audio data to a temporary file
+    temp_audio_file_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
+            temp_audio_file.write(mydata)
+            temp_audio_file_path = temp_audio_file.name
+
+        # 3. Setup Config and Query
+        test_config = NativeCompletionConfig(
+            provider="sarvamai-native",
+            type="stt",
+            params={
+                #"model": "saarika:v2.5", # Using SarvamAI's model for STT
+                "model": "saaras:v3", # Using SarvamAI's model for STT
+                "input_language":"unknown",  # Let SarvamAI auto-detect the language with 'unknown' or specify if known (e.g., "ta-IN", "hi-IN")
+                # SarvamAI's transcribe method doesn't directly take 'prompt instructions' like LLMs 
+            },
+        )
+
+
+        test_query = QueryParams(
+            input={"type": "text", "content": {"value": "Transcription request"}}
+        )
+
+        # 4. Execution
+        print("Executing STT with SarvamAIProvider...")
+        # For STT, resolved_input needs to be the file path
+        result, error = instance.execute(completion_config=test_config, query=test_query, resolved_input=temp_audio_file_path)
+
+        if error:
+            print(f"Error: {error}")
+        else:
+            print(f"\n--- SarvamAI STT Result ---")
-            print(f"\n--- SarvamAI STT Result ---")
+            print("\n--- SarvamAI STT Result ---")
-            print(f"\n--- SarvamAI STT Result ---")
+            print("\n--- SarvamAI STT Result ---")
+            print(f"Transcribed Text: {result.response.output.content.value}")
+            print(f"Provider Model: {result.response.model}")
+            print("\n--- Usage Information ---")
+            print(f"Input Tokens: {result.usage.input_tokens}")
+            print(f"Output Tokens: {result.usage.output_tokens}")
+            print(f"Total Tokens: {result.usage.total_tokens}")
+            if result.usage.reasoning_tokens:
+                print(f"Reasoning Tokens: {result.usage.reasoning_tokens}")
+            # Uncomment to see the raw response:
+            # import json
+            # print("\n--- Raw Provider Response ---")
+            # print(result.provider_raw_response)
+
+    finally:
+        # Clean up the temporary file
+        if temp_audio_file_path and os.path.exists(temp_audio_file_path):
+            os.remove(temp_audio_file_path)
+            print(f"Cleaned up temporary file: {temp_audio_file_path}")
+