diff --git a/src/strands/agent/agent_result.py b/src/strands/agent/agent_result.py index 63b7a0d4a..f0a399f81 100644 --- a/src/strands/agent/agent_result.py +++ b/src/strands/agent/agent_result.py @@ -35,6 +35,15 @@ class AgentResult: interrupts: Sequence[Interrupt] | None = None structured_output: BaseModel | None = None + @property + def context_size(self) -> int | None: + """Most recent context size in tokens from the last LLM call. + + Returns: + The input token count from the most recent cycle, or None if no data is available. + """ + return self.metrics.latest_context_size + def __str__(self) -> str: """Return a string representation of the agent result. diff --git a/src/strands/telemetry/metrics.py b/src/strands/telemetry/metrics.py index 163df803a..dae05965e 100644 --- a/src/strands/telemetry/metrics.py +++ b/src/strands/telemetry/metrics.py @@ -202,6 +202,19 @@ class EventLoopMetrics: accumulated_usage: Usage = field(default_factory=lambda: Usage(inputTokens=0, outputTokens=0, totalTokens=0)) accumulated_metrics: Metrics = field(default_factory=lambda: Metrics(latencyMs=0)) + @property + def latest_context_size(self) -> int | None: + """Most recent context size from the last LLM call. + + This represents the current context size as reported by the model. + + Returns: + The input token count from the most recent cycle, or None if no data is available. + """ + if self.agent_invocations and self.agent_invocations[-1].cycles: + return self.agent_invocations[-1].cycles[-1].usage.get("inputTokens") + return None + @property def _metrics_client(self) -> "MetricsClient": """Get the singleton MetricsClient instance.""" diff --git a/tests/strands/agent/test_agent_result.py b/tests/strands/agent/test_agent_result.py index a4478c3ca..64391f299 100644 --- a/tests/strands/agent/test_agent_result.py +++ b/tests/strands/agent/test_agent_result.py @@ -370,3 +370,17 @@ def test__str__empty_interrupts_returns_agent_message(mock_metrics, simple_messa # Empty list is falsy, should fall through to text content assert message_string == "Hello world!\n" + + +def test_context_size_delegates_to_metrics(mock_metrics, simple_message: Message): + """Test that context_size delegates to metrics.latest_context_size.""" + mock_metrics.latest_context_size = 12345 + result = AgentResult(stop_reason="end_turn", message=simple_message, metrics=mock_metrics, state={}) + assert result.context_size == 12345 + + +def test_context_size_none_when_no_data(mock_metrics, simple_message: Message): + """Test that context_size returns None when metrics has no data.""" + mock_metrics.latest_context_size = None + result = AgentResult(stop_reason="end_turn", message=simple_message, metrics=mock_metrics, state={}) + assert result.context_size is None diff --git a/tests/strands/telemetry/test_metrics.py b/tests/strands/telemetry/test_metrics.py index 800bcebc4..c38fa6a18 100644 --- a/tests/strands/telemetry/test_metrics.py +++ b/tests/strands/telemetry/test_metrics.py @@ -566,3 +566,50 @@ def test_reset_usage_metrics(usage, event_loop_metrics, mock_get_meter_provider) # Verify accumulated_usage is NOT cleared assert event_loop_metrics.accumulated_usage["inputTokens"] == 11 + + +def test_latest_context_size_no_invocations(event_loop_metrics): + assert event_loop_metrics.latest_context_size is None + + +def test_latest_context_size_invocation_with_no_cycles(event_loop_metrics): + event_loop_metrics.reset_usage_metrics() + assert event_loop_metrics.latest_context_size is None + + +def test_latest_context_size_returns_last_cycle(event_loop_metrics, mock_get_meter_provider): + event_loop_metrics.reset_usage_metrics() + event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "c1"}) + event_loop_metrics.update_usage(Usage(inputTokens=100, outputTokens=50, totalTokens=150)) + + event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "c2"}) + event_loop_metrics.update_usage(Usage(inputTokens=250, outputTokens=80, totalTokens=330)) + + assert event_loop_metrics.latest_context_size == 250 + + +def test_latest_context_size_returns_from_latest_invocation(event_loop_metrics, mock_get_meter_provider): + # First invocation + event_loop_metrics.reset_usage_metrics() + event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "c1"}) + event_loop_metrics.update_usage(Usage(inputTokens=100, outputTokens=50, totalTokens=150)) + + # Second invocation + event_loop_metrics.reset_usage_metrics() + event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "c2"}) + event_loop_metrics.update_usage(Usage(inputTokens=500, outputTokens=80, totalTokens=580)) + + assert event_loop_metrics.latest_context_size == 500 + + +def test_latest_context_size_missing_input_tokens_key(event_loop_metrics): + """Returns None when usage dict is missing inputTokens (e.g. provider bug).""" + event_loop_metrics.reset_usage_metrics() + invocation = event_loop_metrics.agent_invocations[-1] + invocation.cycles.append( + strands.telemetry.metrics.EventLoopCycleMetric( + event_loop_cycle_id="c1", + usage={"outputTokens": 50, "totalTokens": 50}, + ) + ) + assert event_loop_metrics.latest_context_size is None