Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions defaults/code-talk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ steps:
# ──────────────────────────────────────────────────────────────────
skip_code_context: true
enableDelegate: true
enableTasks: true
enableExecutePlan: false
max_iterations: 100
prompt_type: code-explorer
Expand Down Expand Up @@ -463,8 +464,19 @@ steps:

Delegate usage:

Task protocol:
- Before substantive work begins, create the task or tasks first.
- This applies to long-running single-goal investigations too, not only multi-goal requests.
- If there is a clear list of independent investigation jobs, create one task per job before starting.
- When a task starts, mark it in_progress immediately.
- When a task is actually done, complete it immediately before moving on.
- Do not leave finished tasks pending or in_progress.
- Prefer one active in_progress task at a time unless work is truly parallel.
- Before the final answer, every created task must be completed or cancelled.

- Each delegate should answer ONE specific question (not "look at the code")
- Run multiple delegates in PARALLEL for different hypotheses or components
- If you spawn delegates for independent jobs, create matching tasks first and keep them updated in real time
- Ask delegates to return specific file paths and line numbers
- Do NOT delegate or re-search the same question twice in one investigation
- If a delegate returns enough evidence for the current claim, stop and use it
Expand All @@ -473,6 +485,7 @@ steps:
delegate 4 for "session context metadata" again. Use the results you have.
- Before spawning a delegate, review results from ALL prior delegates.
If the information is already available, use it instead of re-delegating.
- If jobs are not truly independent, do not parallelize them. Keep work sequential and keep task state accurate.

Relay complete data from tools — do not summarize or compress tool output.

Expand Down
4 changes: 4 additions & 0 deletions deploy/observability/local/Dockerfile.otelcol
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM busybox:1.36.1-musl AS busybox

FROM otel/opentelemetry-collector-contrib:0.147.0
COPY --from=busybox /bin/busybox /bin/busybox
4 changes: 4 additions & 0 deletions deploy/observability/local/Dockerfile.tempo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM busybox:1.36.1-musl AS busybox

FROM grafana/tempo:2.10.1
COPY --from=busybox /bin/busybox /bin/busybox
41 changes: 41 additions & 0 deletions deploy/observability/local/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Visor Local Observability

This is the canonical local observability stack for Visor.

It replaces the single-container `grafana/otel-lgtm` setup with separate services:
- `tempo`
- `otelcol`
- `prometheus`
- `grafana`
- `autoheal`

Ports:
- `8001` Grafana
- `4317` OTLP gRPC
- `4318` OTLP HTTP
- `3200` Tempo HTTP API
- `9091` Prometheus

Start from the Visor repo root:

```bash
docker compose -f deploy/observability/local/docker-compose.yml up -d
```

Stop:

```bash
docker compose -f deploy/observability/local/docker-compose.yml down
```

If the old all-in-one LGTM container is still running, remove it first:

```bash
docker rm -f grafana-otel
```

Point Visor-based apps at this stack with:
- `OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`
- `GRAFANA_URL=http://localhost:8001`

This stack is generic Visor infrastructure. Project-specific apps like Oel should reference it rather than owning their own copy.
134 changes: 134 additions & 0 deletions deploy/observability/local/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
services:
autoheal:
image: willfarrell/autoheal:1.2.0
container_name: visor-autoheal
restart: unless-stopped
environment:
AUTOHEAL_CONTAINER_LABEL: autoheal
AUTOHEAL_INTERVAL: 30
AUTOHEAL_START_PERIOD: 120
CURL_TIMEOUT: 10
volumes:
- /var/run/docker.sock:/var/run/docker.sock
networks:
- observability

tempo:
build:
context: .
dockerfile: Dockerfile.tempo
image: visor/tempo-with-busybox:2.10.1
container_name: visor-tempo
restart: unless-stopped
command: ["-config.file=/etc/tempo/tempo.yaml"]
labels:
autoheal: "true"
volumes:
- ./tempo.yaml:/etc/tempo/tempo.yaml:ro
- tempo-data:/var/tempo
ports:
- "3200:3200"
healthcheck:
test: ["CMD", "/bin/busybox", "wget", "-qO-", "http://127.0.0.1:3200/ready"]
interval: 30s
timeout: 5s
retries: 3
start_period: 20s
networks:
- observability

otelcol:
build:
context: .
dockerfile: Dockerfile.otelcol
image: visor/otelcol-with-busybox:0.147.0
container_name: visor-otelcol
restart: unless-stopped
command: ["--config=/etc/otelcol/config.yaml"]
labels:
autoheal: "true"
volumes:
- ./otelcol.yaml:/etc/otelcol/config.yaml:ro
ports:
- "4317:4317"
- "4318:4318"
depends_on:
tempo:
condition: service_healthy
healthcheck:
test: ["CMD", "/bin/busybox", "wget", "-qO-", "http://127.0.0.1:13133/"]
interval: 30s
timeout: 5s
retries: 3
start_period: 20s
networks:
- observability

prometheus:
image: prom/prometheus:v3.10.0
container_name: visor-prometheus
restart: unless-stopped
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --web.enable-otlp-receiver
labels:
autoheal: "true"
volumes:
- ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
ports:
- "9091:9090"
depends_on:
otelcol:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:9090/-/healthy | grep -q 'Prometheus' || wget -qO- http://127.0.0.1:9090/api/v1/status/runtimeinfo >/dev/null"]
interval: 30s
timeout: 5s
retries: 3
start_period: 20s
networks:
- observability

grafana:
image: grafana/grafana:12.4.0
container_name: visor-grafana
restart: unless-stopped
environment:
GF_SERVER_HTTP_PORT: 3000
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
GF_AUTH_DISABLE_LOGIN_FORM: "true"
GF_USERS_DEFAULT_THEME: light
labels:
autoheal: "true"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- grafana-data:/var/lib/grafana
ports:
- "8001:3000"
depends_on:
tempo:
condition: service_healthy
prometheus:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3000/api/health | grep -q 'ok'"]
interval: 30s
timeout: 5s
retries: 5
start_period: 30s
networks:
- observability

volumes:
tempo-data:
prometheus-data:
grafana-data:

networks:
observability:
name: visor-observability
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: 1

datasources:
- name: Tempo
uid: tempo
type: tempo
access: proxy
url: http://tempo:3200
isDefault: true
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: prometheus
nodeGraph:
enabled: true
tracesToMetrics:
datasourceUid: prometheus
search:
hide: false
spanBar:
type: none

- name: Prometheus
uid: prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
jsonData:
httpMethod: POST
49 changes: 49 additions & 0 deletions deploy/observability/local/otelcol.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

processors:
batch:
send_batch_size: 2048
timeout: 5s
memory_limiter:
check_interval: 1s
limit_mib: 1024
spike_limit_mib: 256

exporters:
otlp/tempo:
endpoint: tempo:9095
tls:
insecure: true
prometheus:
endpoint: 0.0.0.0:8889
send_timestamps: true
metric_expiration: 5m
enable_open_metrics: true
debug:
verbosity: basic

extensions:
health_check:
endpoint: 0.0.0.0:13133

service:
extensions: [health_check]
pipelines:
traces:
receivers: [otlp]
processors: [memory_limiter, batch]
exporters: [otlp/tempo]
metrics:
receivers: [otlp]
processors: [memory_limiter, batch]
exporters: [prometheus]
logs:
receivers: [otlp]
processors: [memory_limiter, batch]
exporters: [debug]
12 changes: 12 additions & 0 deletions deploy/observability/local/prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
global:
scrape_interval: 15s
evaluation_interval: 15s

scrape_configs:
- job_name: otelcol
static_configs:
- targets: ['otelcol:8889']

- job_name: tempo
static_configs:
- targets: ['tempo:3200']
65 changes: 65 additions & 0 deletions deploy/observability/local/tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
server:
http_listen_port: 3200
grpc_listen_port: 9095
log_level: info

query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 5s
max_outstanding_per_tenant: 8192

querier:
frontend_worker:
frontend_address: 127.0.0.1:9095
parallelism: 4
max_concurrent_queries: 20

compactor:
compaction:
block_retention: 168h

metrics_generator:
storage:
path: /var/tempo/generator/wal
traces_storage:
path: /var/tempo/generator/traces
processor:
local_blocks:
filter_server_spans: false
span_metrics:
dimensions:
- service.name
- operation
- status.code
registry:
external_labels:
source: oel-local

ingester:
max_block_duration: 5m
trace_idle_period: 10s
flush_check_period: 30s
lifecycler:
ring:
kvstore:
store: inmemory
replication_factor: 1

storage:
trace:
backend: local
wal:
path: /var/tempo/wal
local:
path: /var/tempo/blocks

memberlist:
abort_if_cluster_join_fails: false
bind_addr:
- 127.0.0.1

overrides:
max_bytes_per_trace: 5000000
Loading
Loading