gemini-python-tutor/llm_client.py at main · kangwonlee/gemini-python-tutor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# begin llm_client.py
import logging
import time
from typing import Optional

import requests

from llm_configs import LLMConfig


class LLMAPIClient:
    """Generic client for interacting with LLM APIs using a configuration.

    This class provides a robust interface for making API calls to various
    Large Language Model services, handling retries, timeouts, and errors
    in a standardized way.

    Attributes:
        config (LLMConfig): Configuration object containing API-specific details
        retry_delay_sec (float): Base delay between retry attempts in seconds
        max_retry_attempt (int): Maximum number of retry attempts
        timeout_sec (int): Request timeout duration in seconds
        logger (logging.Logger): Logger instance for tracking operations
    """

    def __init__(self, config: LLMConfig, retry_delay_sec: float = 5.0,
                 max_retry_attempt: int = 3, timeout_sec: int = 60):
        """Initialize the LLM API client with retry and timeout settings.

        Args:
            config (LLMConfig): Configuration object containing API endpoint, key, and model details
            retry_delay_sec (float, optional): Base delay between retries in seconds. Defaults to 5.0
            max_retry_attempt (int, optional): Maximum number of retry attempts. Defaults to 3
            timeout_sec (int, optional): Maximum time allowed per request in seconds. Defaults to 60

        Raises:
            ValueError: If retry_delay_sec or timeout_sec is not positive, or max_retry_attempt is negative
        """
        # Validate input parameters
        if retry_delay_sec <= 0:
            raise ValueError("retry_delay_sec must be a positive number")
        if max_retry_attempt < 0:
            raise ValueError("max_retry_attempt must be a non-negative integer")
        if timeout_sec <= 0:
            raise ValueError("timeout_sec must be a positive integer")

        # Assign instance variables
        self.config = config
        self.retry_delay_sec = retry_delay_sec
        self.max_retry_attempt = max_retry_attempt
        self.timeout_sec = timeout_sec
        self.logger = logging.getLogger(__name__)  # Logger for this module
        self.last_raw_response = None  # Store last API response for token usage extraction

    def call_api(self, question: str) -> Optional[str]:
        """Send a question to the LLM API with retry and timeout handling.

        Implements a robust API calling mechanism with:
        - Exponential backoff for rate limit (429) retries
        - Timeout handling
        - Network error handling
        - Response parsing with error logging

        Args:
            question (str): The input prompt or question to send to the API

        Returns:
            Optional[str]: The parsed answer from the API, or None if the request fails after all retries

        Notes:
            - Retries only on HTTP 429 (Too Many Requests) with exponential backoff
            - Logs detailed errors for debugging and monitoring
            - Returns None for any unrecoverable error (timeout, network, parsing, etc.)
        """
        # Prepare request components from config
        headers = self.config.get_headers()
        data = self.config.format_request_data(question)

        # Retry loop for handling rate limits and transient failures
        for attempt in range(self.max_retry_attempt + 1):
            try:
                # Make the POST request with timeout
                response = requests.post(
                    self.config.api_url,
                    headers=headers,
                    json=data,
                    timeout=self.timeout_sec
                )
            except requests.Timeout:
                # Log timeout errors and fail immediately
                self.logger.error(f"Request timed out after {self.timeout_sec}s for question: {question if len(question) < 100 else question[:10]}")
                return None
            except requests.RequestException as e:
                # Log general network errors (connection issues, etc.) and fail
                self.logger.error(f"Network error occurred for question '{question if len(question) < 100 else question[:10]}': {str(e)}")
                return None

            # Handle successful response
            if response.status_code == 200:
                try:
                    # Parse JSON and extract response using config-specific method
                    result = response.json()
                    self.last_raw_response = result
                    return self.config.parse_response(result)
                except (ValueError, KeyError) as e:
                    # Log parsing errors (invalid JSON or unexpected structure)
                    self.logger.exception(f"Failed to parse API response for question '{question if len(question) < 100 else question[:10]}': {str(e)}")
                    return None
            elif response.status_code == 429:  # Rate limit exceeded
                if attempt < self.max_retry_attempt:
                    # Calculate exponential backoff delay: base_delay * 2^attempt
                    delay = self.retry_delay_sec * (2 ** attempt)
                    self.logger.warning(
                        f"Rate limit (429) hit. Retrying in {delay:.1f}s "
                        f"(attempt {attempt + 1}/{self.max_retry_attempt})"
                    )
                    time.sleep(delay)
                    continue  # Retry the request
                else:
                    # Log final failure after exhausting retries
                    self.logger.error(f"Max retries ({self.max_retry_attempt}) exceeded for rate limit on question: {question if len(question) < 100 else question[:10]}")
                    return None
            else:
                # Log unexpected status codes with response details
                self.logger.error(
                    f"API request failed with status {response.status_code} "
                    f"{response.text}"
                )
                return None

        # This line is theoretically unreachable due to the loop structure,
        # but included for completeness and static analysis tools
        return None

# end llm_client.py