probelabs · buger · Mar 26, 2026 · Mar 30, 2026 · Mar 31, 2026 · Apr 1, 2026
diff --git a/defaults/code-talk.yaml b/defaults/code-talk.yaml
@@ -310,6 +310,7 @@ steps:
       # ──────────────────────────────────────────────────────────────────
       skip_code_context: true
       enableDelegate: true
+      enableTasks: true
       enableExecutePlan: false
       max_iterations: 100
       prompt_type: code-explorer
@@ -463,8 +464,19 @@ steps:
 
       Delegate usage:
 
+      Task protocol:
+      - Before substantive work begins, create the task or tasks first.
+      - This applies to long-running single-goal investigations too, not only multi-goal requests.
+      - If there is a clear list of independent investigation jobs, create one task per job before starting.
+      - When a task starts, mark it in_progress immediately.
+      - When a task is actually done, complete it immediately before moving on.
+      - Do not leave finished tasks pending or in_progress.
+      - Prefer one active in_progress task at a time unless work is truly parallel.
+      - Before the final answer, every created task must be completed or cancelled.
+
       - Each delegate should answer ONE specific question (not "look at the code")
       - Run multiple delegates in PARALLEL for different hypotheses or components
+      - If you spawn delegates for independent jobs, create matching tasks first and keep them updated in real time
       - Ask delegates to return specific file paths and line numbers
       - Do NOT delegate or re-search the same question twice in one investigation
       - If a delegate returns enough evidence for the current claim, stop and use it
@@ -473,6 +485,7 @@ steps:
         delegate 4 for "session context metadata" again. Use the results you have.
       - Before spawning a delegate, review results from ALL prior delegates.
         If the information is already available, use it instead of re-delegating.
+      - If jobs are not truly independent, do not parallelize them. Keep work sequential and keep task state accurate.
 
       Relay complete data from tools — do not summarize or compress tool output.
 

diff --git a/deploy/observability/local/Dockerfile.otelcol b/deploy/observability/local/Dockerfile.otelcol
@@ -0,0 +1,4 @@
+FROM busybox:1.36.1-musl AS busybox
+
+FROM otel/opentelemetry-collector-contrib:0.147.0
+COPY --from=busybox /bin/busybox /bin/busybox
diff --git a/deploy/observability/local/Dockerfile.tempo b/deploy/observability/local/Dockerfile.tempo
@@ -0,0 +1,4 @@
+FROM busybox:1.36.1-musl AS busybox
+
+FROM grafana/tempo:2.10.1
+COPY --from=busybox /bin/busybox /bin/busybox
diff --git a/deploy/observability/local/README.md b/deploy/observability/local/README.md
@@ -0,0 +1,41 @@
+# Visor Local Observability
+
+This is the canonical local observability stack for Visor.
+
+It replaces the single-container `grafana/otel-lgtm` setup with separate services:
+- `tempo`
+- `otelcol`
+- `prometheus`
+- `grafana`
+- `autoheal`
+
+Ports:
+- `8001` Grafana
+- `4317` OTLP gRPC
+- `4318` OTLP HTTP
+- `3200` Tempo HTTP API
+- `9091` Prometheus
+
+Start from the Visor repo root:
+
+```bash
+docker compose -f deploy/observability/local/docker-compose.yml up -d
+```
+
+Stop:
+
+```bash
+docker compose -f deploy/observability/local/docker-compose.yml down
+```
+
+If the old all-in-one LGTM container is still running, remove it first:
+
+```bash
+docker rm -f grafana-otel
+```
+
+Point Visor-based apps at this stack with:
+- `OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`
+- `GRAFANA_URL=http://localhost:8001`
+
+This stack is generic Visor infrastructure. Project-specific apps like Oel should reference it rather than owning their own copy.
diff --git a/deploy/observability/local/docker-compose.yml b/deploy/observability/local/docker-compose.yml
@@ -0,0 +1,134 @@
+services:
+  autoheal:
+    image: willfarrell/autoheal:1.2.0
+    container_name: visor-autoheal
+    restart: unless-stopped
+    environment:
+      AUTOHEAL_CONTAINER_LABEL: autoheal
+      AUTOHEAL_INTERVAL: 30
+      AUTOHEAL_START_PERIOD: 120
+      CURL_TIMEOUT: 10
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    networks:
+      - observability
+
+  tempo:
+    build:
+      context: .
+      dockerfile: Dockerfile.tempo
+    image: visor/tempo-with-busybox:2.10.1
+    container_name: visor-tempo
+    restart: unless-stopped
+    command: ["-config.file=/etc/tempo/tempo.yaml"]
+    labels:
+      autoheal: "true"
+    volumes:
+      - ./tempo.yaml:/etc/tempo/tempo.yaml:ro
+      - tempo-data:/var/tempo
+    ports:
+      - "3200:3200"
+    healthcheck:
+      test: ["CMD", "/bin/busybox", "wget", "-qO-", "http://127.0.0.1:3200/ready"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 20s
+    networks:
+      - observability
+
+  otelcol:
+    build:
+      context: .
+      dockerfile: Dockerfile.otelcol
+    image: visor/otelcol-with-busybox:0.147.0
+    container_name: visor-otelcol
+    restart: unless-stopped
+    command: ["--config=/etc/otelcol/config.yaml"]
+    labels:
+      autoheal: "true"
+    volumes:
+      - ./otelcol.yaml:/etc/otelcol/config.yaml:ro
+    ports:
+      - "4317:4317"
+      - "4318:4318"
+    depends_on:
+      tempo:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "/bin/busybox", "wget", "-qO-", "http://127.0.0.1:13133/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 20s
+    networks:
+      - observability
+
+  prometheus:
+    image: prom/prometheus:v3.10.0
+    container_name: visor-prometheus
+    restart: unless-stopped
+    command:
+      - --config.file=/etc/prometheus/prometheus.yml
+      - --storage.tsdb.path=/prometheus
+      - --web.enable-otlp-receiver
+    labels:
+      autoheal: "true"
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    ports:
+      - "9091:9090"
+    depends_on:
+      otelcol:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:9090/-/healthy | grep -q 'Prometheus' || wget -qO- http://127.0.0.1:9090/api/v1/status/runtimeinfo >/dev/null"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 20s
+    networks:
+      - observability
+
+  grafana:
+    image: grafana/grafana:12.4.0
+    container_name: visor-grafana
+    restart: unless-stopped
+    environment:
+      GF_SERVER_HTTP_PORT: 3000
+      GF_SECURITY_ADMIN_USER: admin
+      GF_SECURITY_ADMIN_PASSWORD: admin
+      GF_AUTH_ANONYMOUS_ENABLED: "true"
+      GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
+      GF_AUTH_DISABLE_LOGIN_FORM: "true"
+      GF_USERS_DEFAULT_THEME: light
+    labels:
+      autoheal: "true"
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning:ro
+      - grafana-data:/var/lib/grafana
+    ports:
+      - "8001:3000"
+    depends_on:
+      tempo:
+        condition: service_healthy
+      prometheus:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3000/api/health | grep -q 'ok'"]
+      interval: 30s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    networks:
+      - observability
+
+volumes:
+  tempo-data:
+  prometheus-data:
+  grafana-data:
+
+networks:
+  observability:
+    name: visor-observability
diff --git a/deploy/observability/local/grafana/provisioning/datasources/datasources.yaml b/deploy/observability/local/grafana/provisioning/datasources/datasources.yaml
@@ -0,0 +1,29 @@
+apiVersion: 1
+
+datasources:
+  - name: Tempo
+    uid: tempo
+    type: tempo
+    access: proxy
+    url: http://tempo:3200
+    isDefault: true
+    jsonData:
+      httpMethod: GET
+      serviceMap:
+        datasourceUid: prometheus
+      nodeGraph:
+        enabled: true
+      tracesToMetrics:
+        datasourceUid: prometheus
+      search:
+        hide: false
+      spanBar:
+        type: none
+
+  - name: Prometheus
+    uid: prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    jsonData:
+      httpMethod: POST
diff --git a/deploy/observability/local/otelcol.yaml b/deploy/observability/local/otelcol.yaml
@@ -0,0 +1,49 @@
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+      http:
+        endpoint: 0.0.0.0:4318
+
+processors:
+  batch:
+    send_batch_size: 2048
+    timeout: 5s
+  memory_limiter:
+    check_interval: 1s
+    limit_mib: 1024
+    spike_limit_mib: 256
+
+exporters:
+  otlp/tempo:
+    endpoint: tempo:9095
+    tls:
+      insecure: true
+  prometheus:
+    endpoint: 0.0.0.0:8889
+    send_timestamps: true
+    metric_expiration: 5m
+    enable_open_metrics: true
+  debug:
+    verbosity: basic
+
+extensions:
+  health_check:
+    endpoint: 0.0.0.0:13133
+
+service:
+  extensions: [health_check]
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [memory_limiter, batch]
+      exporters: [otlp/tempo]
+    metrics:
+      receivers: [otlp]
+      processors: [memory_limiter, batch]
+      exporters: [prometheus]
+    logs:
+      receivers: [otlp]
+      processors: [memory_limiter, batch]
+      exporters: [debug]
diff --git a/deploy/observability/local/prometheus.yaml b/deploy/observability/local/prometheus.yaml
@@ -0,0 +1,12 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: otelcol
+    static_configs:
+      - targets: ['otelcol:8889']
+
+  - job_name: tempo
+    static_configs:
+      - targets: ['tempo:3200']
diff --git a/deploy/observability/local/tempo.yaml b/deploy/observability/local/tempo.yaml
@@ -0,0 +1,65 @@
+server:
+  http_listen_port: 3200
+  grpc_listen_port: 9095
+  log_level: info
+
+query_frontend:
+  search:
+    duration_slo: 5s
+    throughput_bytes_slo: 1.073741824e+09
+  trace_by_id:
+    duration_slo: 5s
+  max_outstanding_per_tenant: 8192
+
+querier:
+  frontend_worker:
+    frontend_address: 127.0.0.1:9095
+    parallelism: 4
+  max_concurrent_queries: 20
+
+compactor:
+  compaction:
+    block_retention: 168h
+
+metrics_generator:
+  storage:
+    path: /var/tempo/generator/wal
+  traces_storage:
+    path: /var/tempo/generator/traces
+  processor:
+    local_blocks:
+      filter_server_spans: false
+    span_metrics:
+      dimensions:
+        - service.name
+        - operation
+        - status.code
+  registry:
+    external_labels:
+      source: oel-local
+
+ingester:
+  max_block_duration: 5m
+  trace_idle_period: 10s
+  flush_check_period: 30s
+  lifecycler:
+    ring:
+      kvstore:
+        store: inmemory
+      replication_factor: 1
+
+storage:
+  trace:
+    backend: local
+    wal:
+      path: /var/tempo/wal
+    local:
+      path: /var/tempo/blocks
+
+memberlist:
+  abort_if_cluster_join_fails: false
+  bind_addr:
+    - 127.0.0.1
+
+overrides:
+  max_bytes_per_trace: 5000000