diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py index ef4357d1ca2..1f149313793 100644 --- a/litellm/passthrough/utils.py +++ b/litellm/passthrough/utils.py @@ -52,6 +52,7 @@ def forward_headers_from_request( # Header We Should NOT forward request_headers.pop("content-length", None) request_headers.pop("host", None) + request_headers.pop("x-litellm-api-key", None) # Combine request headers with custom headers headers = {**request_headers, **headers} diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 4e3e04a8474..29f1e5980d8 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -596,11 +596,24 @@ async def anthropic_proxy_route( base_url = httpx.URL(base_target_url) updated_url = base_url.copy_with(path=encoded_endpoint) - # Add or update query parameters - anthropic_api_key = passthrough_endpoint_router.get_credentials( - custom_llm_provider="anthropic", - region_name=None, - ) + # Credential priority: client-provided credentials take precedence over + # server credentials. This allows mixed mode where some users bring their + # own key (BYOK) or OAuth token (Claude Code Max) while others use the + # server's API key. + x_api_key_header = request.headers.get("x-api-key", "") + auth_header = request.headers.get("authorization", "") + + if x_api_key_header or auth_header: + custom_headers = {} + else: + anthropic_api_key = passthrough_endpoint_router.get_credentials( + custom_llm_provider="anthropic", + region_name=None, + ) + if anthropic_api_key: + custom_headers = {"x-api-key": anthropic_api_key} + else: + custom_headers = {} ## check for streaming is_streaming_request = await is_streaming_request_fn(request) @@ -609,7 +622,7 @@ async def anthropic_proxy_route( endpoint_func = create_pass_through_route( endpoint=endpoint, target=str(updated_url), - custom_headers={"x-api-key": "{}".format(anthropic_api_key)}, + custom_headers=custom_headers, _forward_headers=True, is_streaming_request=is_streaming_request, ) # dynamically construct pass-through endpoint based on incoming path diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py index 97ef05100de..b7e7359e9ff 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py @@ -10,7 +10,7 @@ from fastapi import Request, Response from fastapi.testclient import TestClient -from litellm.passthrough.utils import CommonUtils +from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils sys.path.insert( 0, os.path.abspath("../../../..") @@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases(): endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke" expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke" result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint) - assert result == expected \ No newline at end of file + assert result == expected + + +def test_forward_headers_strips_litellm_api_key(): + """x-litellm-api-key should not be forwarded to upstream providers.""" + request_headers = { + "x-litellm-api-key": "sk-litellm-secret-key", + "content-type": "application/json", + "x-api-key": "sk-ant-api-key", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "x-litellm-api-key" not in result + assert result.get("content-type") == "application/json" + assert result.get("x-api-key") == "sk-ant-api-key" + + +def test_forward_headers_strips_host_and_content_length(): + """host and content-length should not be forwarded.""" + request_headers = { + "host": "api.anthropic.com", + "content-length": "1234", + "content-type": "application/json", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "host" not in result + assert "content-length" not in result + assert result.get("content-type") == "application/json" \ No newline at end of file