Skip to content

Commit b206669

Browse files
fix(llma): use distinct_id from outer context if not provided (#449)
* fix(llma): use distinct_id from outer context if not provided * fix(llma): distinct_id from context is now explicitly passed to capture method * fix(llma): fix $process_person_profile with outer context distinct_id, add tests - Fix personless check to consider outer context distinct_id (not just the explicit param), so events from users who set distinct_id via outer context are not incorrectly marked as personless. - Fix typo: "district_id" -> "distinct_id" in comments. - Add test coverage for distinct_id resolution: no id (personless), explicit param, outer context, and explicit overriding outer context. * chore: add sampo changeset for distinct_id context fix * style: ruff format --------- Co-authored-by: Andrew Maguire <andrewm4894@gmail.com>
1 parent 16e1802 commit b206669

3 files changed

Lines changed: 128 additions & 5 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
pypi/posthog: patch
3+
---
4+
5+
fix(llma): use distinct_id from outer context if not provided, fix $process_person_profile for context-based identity

posthog/ai/utils.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import uuid
33
from typing import Any, Callable, Dict, List, Optional, cast
44

5-
from posthog import get_tags, identify_context, new_context, tag
5+
from posthog import get_tags, identify_context, new_context, tag, contexts
66
from posthog.ai.sanitization import (
77
sanitize_anthropic,
88
sanitize_gemini,
@@ -366,6 +366,16 @@ def call_llm_and_track_usage(
366366
if posthog_trace_id is None:
367367
posthog_trace_id = str(uuid.uuid4())
368368

369+
# Check if we have a real user distinct_id (from param or outer context)
370+
has_person_distinct_id = (
371+
posthog_distinct_id is not None
372+
or contexts.get_context_distinct_id() is not None
373+
)
374+
375+
if not has_person_distinct_id:
376+
# Fall back to trace_id as distinct_id when no real user id is available.
377+
identify_context(posthog_trace_id)
378+
369379
if response and (
370380
hasattr(response, "usage")
371381
or (provider == "gemini" and hasattr(response, "usage_metadata"))
@@ -421,7 +431,7 @@ def call_llm_and_track_usage(
421431
# Already serialized by converters
422432
tag("$ai_usage", raw_usage)
423433

424-
if posthog_distinct_id is None:
434+
if not has_person_distinct_id:
425435
tag("$process_person_profile", False)
426436

427437
# Process instructions for Responses API
@@ -445,7 +455,7 @@ def call_llm_and_track_usage(
445455
sdk_tags, posthog_properties
446456
)
447457
ph_client.capture(
448-
distinct_id=posthog_distinct_id or posthog_trace_id,
458+
distinct_id=contexts.get_context_distinct_id(),
449459
event="$ai_generation",
450460
properties=merged_properties,
451461
groups=posthog_groups,
@@ -501,6 +511,16 @@ async def call_llm_and_track_usage_async(
501511
if posthog_trace_id is None:
502512
posthog_trace_id = str(uuid.uuid4())
503513

514+
# Check if we have a real user distinct_id (from param or outer context)
515+
has_person_distinct_id = (
516+
posthog_distinct_id is not None
517+
or contexts.get_context_distinct_id() is not None
518+
)
519+
520+
if not has_person_distinct_id:
521+
# Fall back to trace_id as distinct_id when no real user id is available.
522+
identify_context(posthog_trace_id)
523+
504524
if response and (
505525
hasattr(response, "usage")
506526
or (provider == "gemini" and hasattr(response, "usage_metadata"))
@@ -556,7 +576,7 @@ async def call_llm_and_track_usage_async(
556576
# Already serialized by converters
557577
tag("$ai_usage", raw_usage)
558578

559-
if posthog_distinct_id is None:
579+
if not has_person_distinct_id:
560580
tag("$process_person_profile", False)
561581

562582
# Process instructions for Responses API
@@ -580,7 +600,7 @@ async def call_llm_and_track_usage_async(
580600
sdk_tags, posthog_properties
581601
)
582602
ph_client.capture(
583-
distinct_id=posthog_distinct_id or posthog_trace_id,
603+
distinct_id=contexts.get_context_distinct_id(),
584604
event="$ai_generation",
585605
properties=merged_properties,
586606
groups=posthog_groups,

posthog/test/ai/anthropic/test_anthropic.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import pytest
55

6+
from posthog import identify_context, new_context
7+
68
try:
79
from anthropic.types import Message, Usage
810

@@ -1302,3 +1304,99 @@ async def run_test():
13021304
assert props["$ai_web_search_count"] == 2
13031305
assert props["$ai_input_tokens"] == 50
13041306
assert props["$ai_output_tokens"] == 25
1307+
1308+
1309+
# =======================
1310+
# Distinct ID Context Tests
1311+
# =======================
1312+
1313+
1314+
def test_no_distinct_id_uses_trace_id_and_personless(
1315+
mock_client, mock_anthropic_response
1316+
):
1317+
"""When no distinct_id is provided and no outer context, trace_id is used and event is personless."""
1318+
with patch(
1319+
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
1320+
):
1321+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
1322+
client.messages.create(
1323+
model="claude-3-opus-20240229",
1324+
messages=[{"role": "user", "content": "Hello"}],
1325+
posthog_trace_id="trace-123",
1326+
)
1327+
1328+
call_args = mock_client.capture.call_args[1]
1329+
props = call_args["properties"]
1330+
1331+
assert call_args["distinct_id"] == "trace-123"
1332+
assert props["$process_person_profile"] is False
1333+
1334+
1335+
def test_explicit_distinct_id_creates_person_profile(
1336+
mock_client, mock_anthropic_response
1337+
):
1338+
"""When posthog_distinct_id is explicitly passed, it is used and event is not personless."""
1339+
with patch(
1340+
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
1341+
):
1342+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
1343+
client.messages.create(
1344+
model="claude-3-opus-20240229",
1345+
messages=[{"role": "user", "content": "Hello"}],
1346+
posthog_distinct_id="user-123",
1347+
posthog_trace_id="trace-123",
1348+
)
1349+
1350+
call_args = mock_client.capture.call_args[1]
1351+
props = call_args["properties"]
1352+
1353+
assert call_args["distinct_id"] == "user-123"
1354+
assert (
1355+
"$process_person_profile" not in props
1356+
or props["$process_person_profile"] is not False
1357+
)
1358+
1359+
1360+
def test_outer_context_distinct_id_is_used(mock_client, mock_anthropic_response):
1361+
"""When an outer context has a distinct_id, it should be used instead of trace_id."""
1362+
with patch(
1363+
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
1364+
):
1365+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
1366+
with new_context():
1367+
identify_context("outer-user-456")
1368+
client.messages.create(
1369+
model="claude-3-opus-20240229",
1370+
messages=[{"role": "user", "content": "Hello"}],
1371+
posthog_trace_id="trace-123",
1372+
)
1373+
1374+
call_args = mock_client.capture.call_args[1]
1375+
props = call_args["properties"]
1376+
1377+
assert call_args["distinct_id"] == "outer-user-456"
1378+
assert (
1379+
"$process_person_profile" not in props
1380+
or props["$process_person_profile"] is not False
1381+
)
1382+
1383+
1384+
def test_explicit_distinct_id_overrides_outer_context(
1385+
mock_client, mock_anthropic_response
1386+
):
1387+
"""When both outer context and explicit posthog_distinct_id are set, explicit wins."""
1388+
with patch(
1389+
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
1390+
):
1391+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
1392+
with new_context():
1393+
identify_context("outer-user-456")
1394+
client.messages.create(
1395+
model="claude-3-opus-20240229",
1396+
messages=[{"role": "user", "content": "Hello"}],
1397+
posthog_distinct_id="explicit-user-789",
1398+
posthog_trace_id="trace-123",
1399+
)
1400+
1401+
call_args = mock_client.capture.call_args[1]
1402+
assert call_args["distinct_id"] == "explicit-user-789"

0 commit comments

Comments
 (0)