Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions backend/app/alembic/versions/047_add_tts_evaluation_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""add tts evaluation tables

Revision ID: 047
Revises: 046
Create Date: 2026-02-14 12:00:00.000000

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "047"
down_revision = "046"
branch_labels = None
depends_on = None


def upgrade():
# Create tts_result table
op.create_table(
"tts_result",
sa.Column(
"id",
sa.Integer(),
nullable=False,
comment="Unique identifier for the TTS result",
),
sa.Column(
"sample_text",
sa.Text(),
nullable=False,
comment="Input text that was synthesized to speech",
),
sa.Column(
"object_store_url",
sa.String(),
nullable=True,
comment="S3 URL of the generated WAV audio file",
),
sa.Column(
"metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
comment="Audio metadata: {duration_seconds, size_bytes}",
),
sa.Column(
"provider",
sa.String(length=100),
nullable=False,
comment="TTS provider used (e.g., gemini-2.5-pro-preview-tts)",
),
sa.Column(
"status",
sa.String(length=20),
nullable=False,
server_default="PENDING",
comment="Result status: PENDING, SUCCESS, FAILED",
),
sa.Column(
"score",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
comment="Extensible evaluation metrics (null in Phase 1)",
),
sa.Column(
"is_correct",
sa.Boolean(),
nullable=True,
comment="Human feedback: audio quality correctness (null=not reviewed)",
),
sa.Column(
"comment",
sa.Text(),
nullable=True,
comment="Human feedback comment",
),
sa.Column(
"error_message",
sa.Text(),
nullable=True,
comment="Error message if synthesis failed",
),
sa.Column(
"evaluation_run_id",
sa.Integer(),
nullable=False,
comment="Reference to the evaluation run",
),
sa.Column(
"organization_id",
sa.Integer(),
nullable=False,
comment="Reference to the organization",
),
sa.Column(
"project_id",
sa.Integer(),
nullable=False,
comment="Reference to the project",
),
sa.Column(
"inserted_at",
sa.DateTime(),
nullable=False,
comment="Timestamp when the result was created",
),
sa.Column(
"updated_at",
sa.DateTime(),
nullable=False,
comment="Timestamp when the result was last updated",
),
sa.ForeignKeyConstraint(
["evaluation_run_id"],
["evaluation_run.id"],
name="fk_tts_result_run_id",
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["organization_id"],
["organization.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["project_id"],
["project.id"],
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_tts_result_run_id",
"tts_result",
["evaluation_run_id"],
unique=False,
)
op.create_index(
"idx_tts_result_feedback",
"tts_result",
["evaluation_run_id", "is_correct"],
unique=False,
)
op.create_index(
"idx_tts_result_status",
"tts_result",
["evaluation_run_id", "status"],
unique=False,
)


def downgrade():
op.drop_index("idx_tts_result_status", table_name="tts_result")
op.drop_index("idx_tts_result_feedback", table_name="tts_result")
op.drop_index("ix_tts_result_run_id", table_name="tts_result")
op.drop_table("tts_result")
4 changes: 4 additions & 0 deletions backend/app/api/docs/tts_evaluation/create_dataset.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Create a new TTS evaluation dataset with text samples.

Each sample requires:
- **text**: Text string to be synthesized into speech
3 changes: 3 additions & 0 deletions backend/app/api/docs/tts_evaluation/get_dataset.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Get a TTS evaluation dataset by ID.

Returns dataset metadata including sample count.
3 changes: 3 additions & 0 deletions backend/app/api/docs/tts_evaluation/get_result.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Get a single TTS synthesis result by ID.

Returns the result including audio URL, metadata, and human feedback status.
6 changes: 6 additions & 0 deletions backend/app/api/docs/tts_evaluation/get_run.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Get a TTS evaluation run by ID with optional results.

Use query parameters to control result inclusion and pagination:
- `include_results`: Include synthesis results (default: true)
- `result_limit` / `result_offset`: Paginate results
- `provider` / `status`: Filter results
3 changes: 3 additions & 0 deletions backend/app/api/docs/tts_evaluation/list_datasets.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
List all TTS evaluation datasets for the current project.

Supports pagination with `limit` and `offset` parameters.
3 changes: 3 additions & 0 deletions backend/app/api/docs/tts_evaluation/list_runs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
List TTS evaluation runs for the current project.

Supports filtering by `dataset_id` and `status`, with pagination via `limit` and `offset`.
8 changes: 8 additions & 0 deletions backend/app/api/docs/tts_evaluation/start_evaluation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Start a TTS evaluation run on a dataset.

The evaluation will:
1. Process each text sample through the specified TTS providers
2. Generate speech audio using Gemini Batch API
3. Store WAV audio files in S3 for human review

**Supported providers:** gemini-2.5-pro-preview-tts
5 changes: 5 additions & 0 deletions backend/app/api/docs/tts_evaluation/update_feedback.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Update human feedback on a TTS synthesis result.

Fields:
- **is_correct**: Whether the synthesized audio quality is acceptable (null to clear)
- **comment**: Optional feedback comment
2 changes: 2 additions & 0 deletions backend/app/api/routes/evaluations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

from app.api.routes.evaluations import dataset, evaluation
from app.api.routes.stt_evaluations.router import router as stt_router
from app.api.routes.tts_evaluations.router import router as tts_router

router = APIRouter()

router.include_router(dataset.router)
router.include_router(stt_router)
router.include_router(tts_router)
router.include_router(evaluation.router)
1 change: 1 addition & 0 deletions backend/app/api/routes/tts_evaluations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""TTS Evaluation API routes."""
144 changes: 144 additions & 0 deletions backend/app/api/routes/tts_evaluations/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""TTS dataset API routes."""

import logging

from fastapi import APIRouter, Body, Depends, HTTPException, Query

from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.crud.language import get_language_by_id
from app.crud.tts_evaluations import (
get_tts_dataset_by_id,
list_tts_datasets,
)
from app.models.tts_evaluation import (
TTSDatasetCreate,
TTSDatasetPublic,
)
from app.services.tts_evaluations.dataset import upload_tts_dataset
from app.utils import APIResponse, load_description

logger = logging.getLogger(__name__)

router = APIRouter()


@router.post(
"/datasets",
response_model=APIResponse[TTSDatasetPublic],
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
summary="Create TTS dataset",
description=load_description("tts_evaluation/create_dataset.md"),
)
def create_dataset(
_session: SessionDep,
auth_context: AuthContextDep,
dataset_create: TTSDatasetCreate = Body(...),
) -> APIResponse[TTSDatasetPublic]:
"""Create a TTS evaluation dataset."""
# Validate language_id if provided
if dataset_create.language_id is not None:
language = get_language_by_id(
session=_session, language_id=dataset_create.language_id
)
if not language:
raise HTTPException(
status_code=400, detail="Invalid language_id: language not found"
)

dataset = upload_tts_dataset(
session=_session,
name=dataset_create.name,
samples=dataset_create.samples,
organization_id=auth_context.organization_.id,
project_id=auth_context.project_.id,
description=dataset_create.description,
language_id=dataset_create.language_id,
)

return APIResponse.success_response(
data=TTSDatasetPublic(
id=dataset.id,
name=dataset.name,
description=dataset.description,
type=dataset.type,
language_id=dataset.language_id,
object_store_url=dataset.object_store_url,
dataset_metadata=dataset.dataset_metadata,
organization_id=dataset.organization_id,
project_id=dataset.project_id,
inserted_at=dataset.inserted_at,
updated_at=dataset.updated_at,
)
)


@router.get(
"/datasets",
response_model=APIResponse[list[TTSDatasetPublic]],
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
summary="List TTS datasets",
description=load_description("tts_evaluation/list_datasets.md"),
)
def list_datasets(
_session: SessionDep,
auth_context: AuthContextDep,
limit: int = Query(50, ge=1, le=100, description="Maximum results to return"),
offset: int = Query(0, ge=0, description="Number of results to skip"),
) -> APIResponse[list[TTSDatasetPublic]]:
"""List TTS evaluation datasets."""
datasets, total = list_tts_datasets(
session=_session,
org_id=auth_context.organization_.id,
project_id=auth_context.project_.id,
limit=limit,
offset=offset,
)

return APIResponse.success_response(
data=datasets,
metadata={"total": total, "limit": limit, "offset": offset},
)


@router.get(
"/datasets/{dataset_id}",
response_model=APIResponse[TTSDatasetPublic],
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
summary="Get TTS dataset",
description=load_description("tts_evaluation/get_dataset.md"),
)
def get_dataset(
_session: SessionDep,
auth_context: AuthContextDep,
dataset_id: int,
) -> APIResponse[TTSDatasetPublic]:
"""Get a TTS evaluation dataset."""
dataset = get_tts_dataset_by_id(
session=_session,
dataset_id=dataset_id,
org_id=auth_context.organization_.id,
project_id=auth_context.project_.id,
)

if not dataset:
raise HTTPException(status_code=404, detail="Dataset not found")

return APIResponse.success_response(
data=TTSDatasetPublic(
id=dataset.id,
name=dataset.name,
description=dataset.description,
type=dataset.type,
language_id=dataset.language_id,
object_store_url=dataset.object_store_url,
dataset_metadata=dataset.dataset_metadata,
organization_id=dataset.organization_id,
project_id=dataset.project_id,
inserted_at=dataset.inserted_at,
updated_at=dataset.updated_at,
),
metadata={
"sample_count": (dataset.dataset_metadata or {}).get("sample_count", 0)
},
)
Loading
Loading