From d81e8f50b9d4ea76cbe2ee051d4f353dda854d2c Mon Sep 17 00:00:00 2001 From: nishika26 Date: Fri, 13 Feb 2026 01:02:21 +0530 Subject: [PATCH 1/5] UI: STT Evals --- .../stt/datasets/[dataset_id]/route.ts | 26 ++++++++ app/api/evaluations/stt/datasets/route.ts | 64 +++++++++++++++++++ app/api/evaluations/stt/files/route.ts | 49 ++++++++++++++ .../stt/results/[result_id]/route.ts | 26 ++++++++ .../evaluations/stt/runs/[run_id]/route.ts | 26 ++++++++ app/api/evaluations/stt/runs/route.ts | 64 +++++++++++++++++++ 6 files changed, 255 insertions(+) create mode 100644 app/api/evaluations/stt/datasets/[dataset_id]/route.ts create mode 100644 app/api/evaluations/stt/datasets/route.ts create mode 100644 app/api/evaluations/stt/files/route.ts create mode 100644 app/api/evaluations/stt/results/[result_id]/route.ts create mode 100644 app/api/evaluations/stt/runs/[run_id]/route.ts create mode 100644 app/api/evaluations/stt/runs/route.ts diff --git a/app/api/evaluations/stt/datasets/[dataset_id]/route.ts b/app/api/evaluations/stt/datasets/[dataset_id]/route.ts new file mode 100644 index 0000000..b01336b --- /dev/null +++ b/app/api/evaluations/stt/datasets/[dataset_id]/route.ts @@ -0,0 +1,26 @@ +import { NextResponse, NextRequest } from 'next/server'; + +export async function GET( + request: Request, + { params }: { params: Promise<{ dataset_id: string }> } +) { + const { dataset_id } = await params; + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + const apiKey = request.headers.get('X-API-KEY'); + + try { + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/datasets/${dataset_id}`, { + headers: { + 'X-API-KEY': apiKey || '', + }, + }); + + const data = await response.json(); + return NextResponse.json(data, { status: response.status }); + } catch (error) { + return NextResponse.json( + { success: false, error: 'Failed to fetch config', data: null }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/app/api/evaluations/stt/datasets/route.ts b/app/api/evaluations/stt/datasets/route.ts new file mode 100644 index 0000000..ba41c5b --- /dev/null +++ b/app/api/evaluations/stt/datasets/route.ts @@ -0,0 +1,64 @@ +import { NextResponse, NextRequest } from 'next/server'; + + + +export async function GET(request: Request) { + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + const apiKey = request.headers.get('X-API-KEY'); + + try { + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/datasets`, { + headers: { + 'X-API-KEY': apiKey || '', + }, + }); + + const data = await response.json(); + return NextResponse.json(data); + } catch (error) { + return NextResponse.json( + { success: false, error: error.message, data: null }, + { status: 500 } + ); + } +} + + +export async function POST(request: NextRequest) { + try { + const apiKey = request.headers.get('X-API-KEY'); + if (!apiKey) { + return NextResponse.json({ + error: 'Missing X-API-KEY. Either generate an API Key. Contact Kaapi team for more details' + }, + { + status: 401 + } + + ) + } + const body=await request.json(); + const backendUrl=process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + + const response=await fetch(`${backendUrl}/api/v1/evaluations/stt/datasets`, { + method:'POST', + body:JSON.stringify(body), + headers:{ + 'X-API-KEY':apiKey, + 'Content-Type':'application/json' + }, + }); + const data=await response.json(); + return NextResponse.json(data, {status:response.status}) + + + + } catch (error) { + console.error('Proxy error:', error); + return NextResponse.json( + {error:'Failed to forward request', details:error.message}, + {status:500} + ); + } + +} \ No newline at end of file diff --git a/app/api/evaluations/stt/files/route.ts b/app/api/evaluations/stt/files/route.ts new file mode 100644 index 0000000..ee93781 --- /dev/null +++ b/app/api/evaluations/stt/files/route.ts @@ -0,0 +1,49 @@ +import { NextRequest, NextResponse } from 'next/server'; + + +export async function POST(request: NextRequest) { + try { + // Get the API key from request headers + const apiKey = request.headers.get('X-API-KEY'); + + if (!apiKey) { + return NextResponse.json( + { error: 'Missing X-API-KEY header' }, + { status: 401 } + ); + } + + // Get the form data from the request + const formData = await request.formData(); + + // Get backend URL from environment variable + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + + // Forward the request to the actual backend + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/files`, { + method: 'POST', + body: formData, + headers: { + 'X-API-KEY': apiKey, + + }, + }); + + // Handle empty responses (204 No Content, etc.) + const text = await response.text(); + const data = text ? JSON.parse(text) : { success: true }; + + // Return the response with the same status code + if (!response.ok) { + return NextResponse.json(data, { status: response.status }); + } + + return NextResponse.json(data, { status: response.status }); + } catch (error: any) { + console.error('Proxy error:', error); + return NextResponse.json( + { error: 'Failed to forward request to backend', details: error.message }, + { status: 500 } + ); + } +} diff --git a/app/api/evaluations/stt/results/[result_id]/route.ts b/app/api/evaluations/stt/results/[result_id]/route.ts new file mode 100644 index 0000000..95ec3a0 --- /dev/null +++ b/app/api/evaluations/stt/results/[result_id]/route.ts @@ -0,0 +1,26 @@ +import { NextResponse, NextRequest } from 'next/server'; + +export async function GET( + request: Request, + { params }: { params: Promise<{ result_id: string }> } +) { + const { result_id } = await params; + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + const apiKey = request.headers.get('X-API-KEY'); + + try { + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/results/${result_id}`, { + headers: { + 'X-API-KEY': apiKey || '', + }, + }); + + const data = await response.json(); + return NextResponse.json(data, { status: response.status }); + } catch (error) { + return NextResponse.json( + { success: false, error: 'Failed to fetch config', data: null }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/app/api/evaluations/stt/runs/[run_id]/route.ts b/app/api/evaluations/stt/runs/[run_id]/route.ts new file mode 100644 index 0000000..d7d708d --- /dev/null +++ b/app/api/evaluations/stt/runs/[run_id]/route.ts @@ -0,0 +1,26 @@ +import { NextResponse, NextRequest } from 'next/server'; + +export async function GET( + request: Request, + { params }: { params: Promise<{ run_id: string }> } +) { + const { run_id } = await params; + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + const apiKey = request.headers.get('X-API-KEY'); + + try { + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/runs/${run_id}`, { + headers: { + 'X-API-KEY': apiKey || '', + }, + }); + + const data = await response.json(); + return NextResponse.json(data, { status: response.status }); + } catch (error) { + return NextResponse.json( + { success: false, error: 'Failed to fetch config', data: null }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/app/api/evaluations/stt/runs/route.ts b/app/api/evaluations/stt/runs/route.ts new file mode 100644 index 0000000..df41f40 --- /dev/null +++ b/app/api/evaluations/stt/runs/route.ts @@ -0,0 +1,64 @@ +import { NextResponse, NextRequest } from 'next/server'; + + + +export async function GET(request: Request) { + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + const apiKey = request.headers.get('X-API-KEY'); + + try { + const response = await fetch(`${backendUrl}/api/v1/evaluations/stt/runs`, { + headers: { + 'X-API-KEY': apiKey || '', + }, + }); + + const data = await response.json(); + return NextResponse.json(data); + } catch (error) { + return NextResponse.json( + { success: false, error: error.message, data: null }, + { status: 500 } + ); + } +} + + +export async function POST(request: NextRequest) { + try { + const apiKey = request.headers.get('X-API-KEY'); + if (!apiKey) { + return NextResponse.json({ + error: 'Missing X-API-KEY. Either generate an API Key. Contact Kaapi team for more details' + }, + { + status: 401 + } + + ) + } + const body=await request.json(); + const backendUrl=process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; + + const response=await fetch(`${backendUrl}/api/v1/evaluations/stt/runs`, { + method:'POST', + body:JSON.stringify(body), + headers:{ + 'X-API-KEY':apiKey, + 'Content-Type':'application/json' + }, + }); + const data=await response.json(); + return NextResponse.json(data, {status:response.status}) + + + + } catch (error) { + console.error('Proxy error:', error); + return NextResponse.json( + {error:'Failed to forward request', details:error.message}, + {status:500} + ); + } + +} \ No newline at end of file From bfbb91bde646acc1a9d3d280031f598aaae9c6e8 Mon Sep 17 00:00:00 2001 From: nishika26 Date: Mon, 23 Feb 2026 14:09:00 +0530 Subject: [PATCH 2/5] New UI for speech to text --- .../datasets/[dataset_id]/route.ts | 2 +- app/api/evaluations/datasets/route.ts | 4 +- .../stt/datasets/[dataset_id]/route.ts | 2 +- app/api/evaluations/stt/datasets/route.ts | 9 +- app/api/evaluations/stt/files/route.ts | 2 +- .../stt/results/[result_id]/route.ts | 2 +- .../evaluations/stt/runs/[run_id]/route.ts | 2 +- app/api/evaluations/stt/runs/route.ts | 6 +- app/api/speech-to-text/evaluate/route.ts | 49 - app/api/speech-to-text/parse-csv/route.ts | 69 - app/api/v1/audio/transcriptions/route.ts | 202 -- app/api/v1/evaluations/stt/wer/route.ts | 191 -- app/components/Sidebar.tsx | 4 +- app/speech-to-text/page.tsx | 3012 +++++++---------- 14 files changed, 1293 insertions(+), 2263 deletions(-) delete mode 100644 app/api/speech-to-text/evaluate/route.ts delete mode 100644 app/api/speech-to-text/parse-csv/route.ts delete mode 100644 app/api/v1/audio/transcriptions/route.ts delete mode 100644 app/api/v1/evaluations/stt/wer/route.ts diff --git a/app/api/evaluations/datasets/[dataset_id]/route.ts b/app/api/evaluations/datasets/[dataset_id]/route.ts index ae533ab..fb66ff6 100644 --- a/app/api/evaluations/datasets/[dataset_id]/route.ts +++ b/app/api/evaluations/datasets/[dataset_id]/route.ts @@ -51,7 +51,7 @@ export async function DELETE( } catch (error: any) { console.error('Proxy error:', error); return NextResponse.json( - { error: 'Failed to forward request to backend', details: error.message }, + { error: 'Failed to forward request to backend', details: error }, { status: 500 } ); } diff --git a/app/api/evaluations/datasets/route.ts b/app/api/evaluations/datasets/route.ts index 0952446..68b8437 100644 --- a/app/api/evaluations/datasets/route.ts +++ b/app/api/evaluations/datasets/route.ts @@ -40,7 +40,7 @@ export async function GET(request: NextRequest) { } catch (error: any) { console.error('Proxy error:', error); return NextResponse.json( - { error: 'Failed to forward request to backend', details: error.message }, + { error: 'Failed to forward request to backend', details: error }, { status: 500 } ); } @@ -92,7 +92,7 @@ export async function POST(request: NextRequest) { } catch (error: any) { console.error('Proxy error:', error); return NextResponse.json( - { error: 'Failed to forward request to backend', details: error.message }, + { error: 'Failed to forward request to backend', details: error }, { status: 500 } ); } diff --git a/app/api/evaluations/stt/datasets/[dataset_id]/route.ts b/app/api/evaluations/stt/datasets/[dataset_id]/route.ts index b01336b..f5f8284 100644 --- a/app/api/evaluations/stt/datasets/[dataset_id]/route.ts +++ b/app/api/evaluations/stt/datasets/[dataset_id]/route.ts @@ -19,7 +19,7 @@ export async function GET( return NextResponse.json(data, { status: response.status }); } catch (error) { return NextResponse.json( - { success: false, error: 'Failed to fetch config', data: null }, + { success: false, error: 'Failed to fetch dataset', data: null }, { status: 500 } ); } diff --git a/app/api/evaluations/stt/datasets/route.ts b/app/api/evaluations/stt/datasets/route.ts index ba41c5b..de42fc3 100644 --- a/app/api/evaluations/stt/datasets/route.ts +++ b/app/api/evaluations/stt/datasets/route.ts @@ -2,7 +2,8 @@ import { NextResponse, NextRequest } from 'next/server'; -export async function GET(request: Request) { +export async function GET(request: + Request) { const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; const apiKey = request.headers.get('X-API-KEY'); @@ -14,10 +15,10 @@ export async function GET(request: Request) { }); const data = await response.json(); - return NextResponse.json(data); + return NextResponse.json(data, { status: response.status }); } catch (error) { return NextResponse.json( - { success: false, error: error.message, data: null }, + { success: false, error: error, data: null }, { status: 500 } ); } @@ -56,7 +57,7 @@ export async function POST(request: NextRequest) { } catch (error) { console.error('Proxy error:', error); return NextResponse.json( - {error:'Failed to forward request', details:error.message}, + {error:'Failed to forward request', details:error}, {status:500} ); } diff --git a/app/api/evaluations/stt/files/route.ts b/app/api/evaluations/stt/files/route.ts index ee93781..8d8b8d6 100644 --- a/app/api/evaluations/stt/files/route.ts +++ b/app/api/evaluations/stt/files/route.ts @@ -42,7 +42,7 @@ export async function POST(request: NextRequest) { } catch (error: any) { console.error('Proxy error:', error); return NextResponse.json( - { error: 'Failed to forward request to backend', details: error.message }, + { error: 'Failed to forward request to backend', details: error }, { status: 500 } ); } diff --git a/app/api/evaluations/stt/results/[result_id]/route.ts b/app/api/evaluations/stt/results/[result_id]/route.ts index 95ec3a0..19e83c4 100644 --- a/app/api/evaluations/stt/results/[result_id]/route.ts +++ b/app/api/evaluations/stt/results/[result_id]/route.ts @@ -19,7 +19,7 @@ export async function GET( return NextResponse.json(data, { status: response.status }); } catch (error) { return NextResponse.json( - { success: false, error: 'Failed to fetch config', data: null }, + { success: false, error: 'Failed to fetch results', data: null }, { status: 500 } ); } diff --git a/app/api/evaluations/stt/runs/[run_id]/route.ts b/app/api/evaluations/stt/runs/[run_id]/route.ts index d7d708d..1e0368c 100644 --- a/app/api/evaluations/stt/runs/[run_id]/route.ts +++ b/app/api/evaluations/stt/runs/[run_id]/route.ts @@ -19,7 +19,7 @@ export async function GET( return NextResponse.json(data, { status: response.status }); } catch (error) { return NextResponse.json( - { success: false, error: 'Failed to fetch config', data: null }, + { success: false, error: 'Failed to fetch the run', data: null }, { status: 500 } ); } diff --git a/app/api/evaluations/stt/runs/route.ts b/app/api/evaluations/stt/runs/route.ts index df41f40..3072c0e 100644 --- a/app/api/evaluations/stt/runs/route.ts +++ b/app/api/evaluations/stt/runs/route.ts @@ -14,10 +14,10 @@ export async function GET(request: Request) { }); const data = await response.json(); - return NextResponse.json(data); + return NextResponse.json(data, { status: response.status }); } catch (error) { return NextResponse.json( - { success: false, error: error.message, data: null }, + { success: false, error: error, data: null }, { status: 500 } ); } @@ -56,7 +56,7 @@ export async function POST(request: NextRequest) { } catch (error) { console.error('Proxy error:', error); return NextResponse.json( - {error:'Failed to forward request', details:error.message}, + {error:'Failed to forward request', details:error}, {status:500} ); } diff --git a/app/api/speech-to-text/evaluate/route.ts b/app/api/speech-to-text/evaluate/route.ts deleted file mode 100644 index 8d05cd2..0000000 --- a/app/api/speech-to-text/evaluate/route.ts +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Speech-to-Text Evaluation API Route - * - * POST: Run STT evaluation on audio files with selected models - * Returns transcriptions and WER scores - */ - -import { NextRequest, NextResponse } from 'next/server'; - -const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; - -export async function POST(request: NextRequest) { - try { - const apiKey = request.headers.get('X-API-KEY'); - - if (!apiKey) { - return NextResponse.json( - { error: 'API key is required' }, - { status: 401 } - ); - } - - const body = await request.json(); - - // Forward the request to the backend - const response = await fetch(`${BACKEND_URL}/api/v1/speech-to-text/evaluate`, { - method: 'POST', - headers: { - 'X-API-KEY': apiKey, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({ error: 'Unknown error' })); - return NextResponse.json(errorData, { status: response.status }); - } - - const data = await response.json(); - return NextResponse.json(data); - } catch (error) { - console.error('STT evaluation error:', error); - return NextResponse.json( - { error: 'Failed to run STT evaluation' }, - { status: 500 } - ); - } -} diff --git a/app/api/speech-to-text/parse-csv/route.ts b/app/api/speech-to-text/parse-csv/route.ts deleted file mode 100644 index ba4d6f5..0000000 --- a/app/api/speech-to-text/parse-csv/route.ts +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Speech-to-Text CSV Parsing API Route - * - * POST: Parse CSV file with audio_url, ground_truth columns - * Downloads audio files and returns base64 encoded data - */ - -import { NextRequest, NextResponse } from 'next/server'; - -const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; - -export async function POST(request: NextRequest) { - try { - const apiKey = request.headers.get('X-API-KEY'); - - if (!apiKey) { - console.error('[STT Parse CSV] Missing API key'); - return NextResponse.json( - { error: 'API key is required' }, - { status: 401 } - ); - } - - const formData = await request.formData(); - console.log('[STT Parse CSV] Forwarding request to backend...'); - - // Forward the request to the backend - const response = await fetch(`${BACKEND_URL}/api/v1/evaluations/stt/dataset`, { - method: 'POST', - headers: { - 'X-API-KEY': apiKey, - }, - body: formData, - }); - - console.log('[STT Parse CSV] Backend response status:', response.status); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({ error: 'Unknown error' })); - console.error('[STT Parse CSV] Backend error:', errorData); - return NextResponse.json(errorData, { status: response.status }); - } - - const data = await response.json(); - console.log('[STT Parse CSV] Successfully parsed CSV, rows:', data.data?.success?.length || 0, 'errors:', data.data?.errors?.length || 0); - - // Transform the response to match frontend expectations - const transformedData = { - rows: [ - ...(data.data?.success || []), - ...(data.data?.errors || []).map((err: { row: number; audio_url: string; error: string }) => ({ - status: 'error', - row: err.row, - audio_url: err.audio_url, - error: err.error, - })), - ].sort((a, b) => a.row - b.row), - }; - - console.log('[STT Parse CSV] Transformed data:', transformedData.rows.length, 'total rows'); - return NextResponse.json(transformedData); - } catch (error) { - console.error('[STT Parse CSV] Error:', error); - return NextResponse.json( - { error: 'Failed to parse CSV file' }, - { status: 500 } - ); - } -} diff --git a/app/api/v1/audio/transcriptions/route.ts b/app/api/v1/audio/transcriptions/route.ts deleted file mode 100644 index 61357ad..0000000 --- a/app/api/v1/audio/transcriptions/route.ts +++ /dev/null @@ -1,202 +0,0 @@ -/** - * Audio Transcriptions API Route - * - * POST: Transcribe audio files using multiple STT providers/models - * - * Request body: - * { - * "files": [ - * { - * "file_id": "abc123", - * "audio_base64": "base64_encoded_audio..." - * } - * ], - * "providers": [ - * {"provider": "openai", "model": "gpt-4o-transcribe"}, - * {"provider": "gemini", "model": "gemini-2.5-flash"} - * ] - * } - * - * Response: - * { - * "success": true, - * "data": { - * "success": [...transcription results...], - * "errors": [...failed transcriptions...], - * "total_tasks": 8, - * "processed": 8, - * "failed": 0 - * } - * } - */ - -import { NextRequest, NextResponse } from 'next/server'; - -const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; - -interface AudioFile { - file_id: string; - audio_base64: string; -} - -interface Provider { - provider: string; - model: string; -} - -interface TranscriptionRequest { - files: AudioFile[]; - providers: Provider[]; -} - -interface TranscriptionResult { - status: 'success' | 'error'; - file_id: string; - ground_truth: string | null; - provider: string; - model: string; - transcript?: string; - error?: string; -} - -interface TranscriptionResponse { - success: boolean; - data: { - success: TranscriptionResult[]; - errors: TranscriptionResult[]; - total_tasks: number; - processed: number; - failed: number; - }; - error: string | null; - metadata: Record | null; -} - -export async function POST(request: NextRequest) { - try { - const apiKey = request.headers.get('X-API-KEY'); - - if (!apiKey) { - console.error('[Audio Transcriptions] Missing API key'); - return NextResponse.json( - { - success: false, - error: 'API key is required', - data: null, - metadata: null - }, - { status: 401 } - ); - } - - const body: TranscriptionRequest = await request.json(); - - // Validate request body - if (!body.files || !Array.isArray(body.files) || body.files.length === 0) { - return NextResponse.json( - { - success: false, - error: 'At least one audio file is required', - data: null, - metadata: null - }, - { status: 400 } - ); - } - - if (!body.providers || !Array.isArray(body.providers) || body.providers.length === 0) { - return NextResponse.json( - { - success: false, - error: 'At least one provider/model is required', - data: null, - metadata: null - }, - { status: 400 } - ); - } - - // Validate each file has required fields - for (const file of body.files) { - if (!file.file_id || !file.audio_base64) { - return NextResponse.json( - { - success: false, - error: 'Each file must have file_id and audio_base64', - data: null, - metadata: null - }, - { status: 400 } - ); - } - } - - // Validate each provider has required fields - for (const provider of body.providers) { - if (!provider.provider || !provider.model) { - return NextResponse.json( - { - success: false, - error: 'Each provider must have provider and model fields', - data: null, - metadata: null - }, - { status: 400 } - ); - } - } - - console.log('[Audio Transcriptions] Processing request:', { - fileCount: body.files.length, - providerCount: body.providers.length, - totalTasks: body.files.length * body.providers.length, - }); - - // Forward the request to the backend - const response = await fetch(`${BACKEND_URL}/api/v1/audio/transcriptions`, { - method: 'POST', - headers: { - 'X-API-KEY': apiKey, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - - console.log('[Audio Transcriptions] Backend response status:', response.status); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({ error: 'Unknown error' })); - console.error('[Audio Transcriptions] Backend error:', errorData); - return NextResponse.json( - { - success: false, - error: errorData.error || errorData.detail || 'Backend error', - data: null, - metadata: null - }, - { status: response.status } - ); - } - - const data: TranscriptionResponse = await response.json(); - - console.log('[Audio Transcriptions] Success:', { - totalTasks: data.data?.total_tasks, - processed: data.data?.processed, - failed: data.data?.failed, - }); - - return NextResponse.json(data); - } catch (error) { - console.error('[Audio Transcriptions] Error:', error); - return NextResponse.json( - { - success: false, - error: 'Failed to process transcription request', - data: null, - metadata: null - }, - { status: 500 } - ); - } -} diff --git a/app/api/v1/evaluations/stt/wer/route.ts b/app/api/v1/evaluations/stt/wer/route.ts deleted file mode 100644 index 10f0e8f..0000000 --- a/app/api/v1/evaluations/stt/wer/route.ts +++ /dev/null @@ -1,191 +0,0 @@ -/** - * WER (Word Error Rate) Evaluation API Route - * - * POST: Calculate WER between ground truth and hypothesis transcriptions - * - * Request body: - * { - * "items": [ - * { - * "id": "unique_id", - * "ground_truth": "reference text", - * "hypothesis": "transcribed text", - * "model": "provider/model-name" - * } - * ], - * "mode": "both" // "strict", "lenient", or "both" - * } - * - * Response: - * { - * "success": true, - * "data": { - * "results": [...per item WER results...], - * "summary": { strict: {...}, lenient: {...} }, - * "total_items": 2, - * "processed": 2 - * } - * } - */ - -import { NextRequest, NextResponse } from 'next/server'; - -const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'; - -interface WerItem { - id: string; - ground_truth: string; - hypothesis: string; - model?: string; -} - -interface WerRequest { - items: WerItem[]; - mode?: 'strict' | 'lenient' | 'both'; -} - -interface WerMetrics { - wer: number; - substitutions: number; - deletions: number; - insertions: number; - semantic_errors: number; - reference_word_count: number; - hypothesis_word_count: number; -} - -interface WerResult { - id: string; - ground_truth: string; - hypothesis: string; - strict: WerMetrics; - lenient: WerMetrics; -} - -interface WerSummary { - avg_wer: number; - min_wer: number; - max_wer: number; - avg_substitutions: number; - avg_deletions: number; - avg_insertions: number; - avg_semantic_errors: number; - total_reference_words: number; - total_hypothesis_words: number; -} - -interface WerResponse { - success: boolean; - data: { - results: WerResult[]; - summary: { - strict: WerSummary; - lenient: WerSummary; - }; - total_items: number; - processed: number; - }; - error: string | null; - metadata: Record | null; -} - -export async function POST(request: NextRequest) { - try { - const apiKey = request.headers.get('X-API-KEY'); - - if (!apiKey) { - console.error('[WER Evaluation] Missing API key'); - return NextResponse.json( - { - success: false, - error: 'API key is required', - data: null, - metadata: null - }, - { status: 401 } - ); - } - - const body: WerRequest = await request.json(); - - // Validate request body - if (!body.items || !Array.isArray(body.items) || body.items.length === 0) { - return NextResponse.json( - { - success: false, - error: 'At least one item is required', - data: null, - metadata: null - }, - { status: 400 } - ); - } - - // Validate each item has required fields - for (const item of body.items) { - if (!item.id || item.ground_truth === undefined || item.hypothesis === undefined) { - return NextResponse.json( - { - success: false, - error: 'Each item must have id, ground_truth, and hypothesis', - data: null, - metadata: null - }, - { status: 400 } - ); - } - } - - console.log('[WER Evaluation] Processing request:', { - itemCount: body.items.length, - }); - - // Forward the request to the backend - const response = await fetch(`${BACKEND_URL}/api/v1/evaluations/stt/wer`, { - method: 'POST', - headers: { - 'X-API-KEY': apiKey, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - - console.log('[WER Evaluation] Backend response status:', response.status); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({ error: 'Unknown error' })); - console.error('[WER Evaluation] Backend error:', errorData); - return NextResponse.json( - { - success: false, - error: errorData.error || errorData.detail || 'Backend error', - data: null, - metadata: null - }, - { status: response.status } - ); - } - - const data: WerResponse = await response.json(); - - console.log('[WER Evaluation] Success:', { - totalItems: data.data?.total_items, - processed: data.data?.processed, - avgStrictWer: data.data?.summary?.strict?.avg_wer, - avgLenientWer: data.data?.summary?.lenient?.avg_wer, - }); - - return NextResponse.json(data); - } catch (error) { - console.error('[WER Evaluation] Error:', error); - return NextResponse.json( - { - success: false, - error: 'Failed to process WER evaluation request', - data: null, - metadata: null - }, - { status: 500 } - ); - } -} diff --git a/app/components/Sidebar.tsx b/app/components/Sidebar.tsx index c9e060a..0f0c6d6 100644 --- a/app/components/Sidebar.tsx +++ b/app/components/Sidebar.tsx @@ -1,5 +1,5 @@ /** - * Sidebar - Navigation sidebar with collapse/expand functionality + * Sidebad - Navigation sidebar with collapse/expand functionality * Provides hierarchical navigation with expandable submenus */ @@ -68,7 +68,7 @@ export default function Sidebar({ collapsed, activeRoute = '/evaluations' }: Sid name: 'Evaluations', submenu: [ { name: 'Text Generation', route: '/evaluations' }, - // { name: 'Speech-to-Text', route: '/speech-to-text', comingSoon:true }, + { name: 'Speech-to-Text', route: '/speech-to-text' }, // { name: 'Text-to-Speech', route: '/text-to-speech', comingSoon: true }, ] }, diff --git a/app/speech-to-text/page.tsx b/app/speech-to-text/page.tsx index b5bfc75..db40705 100644 --- a/app/speech-to-text/page.tsx +++ b/app/speech-to-text/page.tsx @@ -1,44 +1,22 @@ /** - * Speech-to-Text Evaluation Page - Redesigned + * Speech-to-Text Evaluation Page * - * Split-panel workbench layout: - * - Left Panel: Input configuration (audio files, model selection) - * - Right Panel: Live results with model comparison cards and diff viewer - * - * Features: - * - Real-time streaming results - * - Model comparison cards with best performer highlighting - * - Inline diff viewer for transcription comparison - * - Floating action bar + * Tab 1 - Datasets: Create datasets with audio uploads + * Tab 2 - Evaluations: Run and monitor STT evaluations */ "use client" -import { useState, useEffect, useRef, useCallback, useMemo } from 'react'; +import { useState, useEffect, useRef } from 'react'; import { colors } from '@/app/lib/colors'; import Sidebar from '@/app/components/Sidebar'; import { useToast } from '@/app/components/Toast'; import { APIKey, STORAGE_KEY } from '@/app/keystore/page'; -import ModelComparisonCard from '@/app/components/speech-to-text/ModelComparisonCard'; -import TranscriptionDiffViewer from '@/app/components/speech-to-text/TranscriptionDiffViewer'; import WaveformVisualizer from '@/app/components/speech-to-text/WaveformVisualizer'; -// Types -interface AudioNerdStats { - format: string; - codec: string; - mimeType: string; - durationMs: number; - durationFormatted: string; - sampleRate: number | null; - channels: number | null; - bitrate: number | null; - fileSize: number; - fileSizeFormatted: string; - base64Length: number; - compressionRatio: number | null; -} +type Tab = 'datasets' | 'evaluations'; -interface UploadedAudioFile { +// Types +interface AudioFile { id: string; file: File; name: string; @@ -46,78 +24,66 @@ interface UploadedAudioFile { base64: string; mediaType: string; groundTruth: string; + fileId?: string; // Backend file ID after upload } -interface ParsedRow { - status: 'success' | 'error'; - row: number; - audio_url: string; - ground_truth: string; - audio_base64?: string; - media_type?: string; - file_size?: number; - error?: string; -} - -interface ModelConfig { - id: string; +interface Dataset { + id: number; name: string; - provider: string; -} - -interface WerMetrics { - wer: number; - substitutions: number; - deletions: number; - insertions: number; - semantic_errors: number; - reference_word_count: number; - hypothesis_word_count: number; + description?: string; + type: string; + language_id: number | null; + object_store_url: string | null; + dataset_metadata: { + sample_count?: number; + [key: string]: any; + }; + organization_id: number; + project_id: number; + inserted_at: string; + updated_at: string; } -interface TranscriptionResult { - model: string; - text: string; - strict?: WerMetrics; - lenient?: WerMetrics; - status: 'success' | 'error' | 'pending'; - error?: string; +interface STTRun { + id: number; + run_name: string; + dataset_name: string; + dataset_id: number; + type: string; + language_id: number | null; + models: string[] | null; + status: string; + total_items: number; + score: { + [key: string]: any; + } | null; + error_message: string | null; + organization_id: number; + project_id: number; + inserted_at: string; + updated_at: string; } -interface EvaluationResult { - row: number; - fileId: string; - audio_url: string; - ground_truth: string; - transcriptions: Record; +interface STTResult { + id: number; + transcription: string | null; + provider: string; + status: string; + score: { + [key: string]: any; + } | null; + is_correct: boolean | null; + comment: string | null; + error_message: string | null; + stt_sample_id: number; + evaluation_run_id: number; + organization_id: number; + project_id: number; + inserted_at: string; + updated_at: string; + sampleName?: string; // Enriched field } -type InputMode = 'single' | 'batch'; -type ResultsView = 'cards' | 'table' | 'diff'; - -// Available STT Models -const STT_MODELS: ModelConfig[] = [ - { id: 'gemini:gemini-2.5-flash', name: 'Gemini 2.5 Flash', provider: 'Google' }, - { id: 'gemini:gemini-2.5-pro', name: 'Gemini 2.5 Pro', provider: 'Google' }, - { id: 'google-stt:chirp_3', name: 'Chirp 3', provider: 'Google' }, - { id: 'openai:gpt-4o-transcribe', name: 'GPT-4o Transcribe', provider: 'OpenAI' }, - { id: 'openai:whisper-1', name: 'Whisper-1', provider: 'OpenAI' }, - { id: 'ai4b:indic-conformer-600m-multilingual', name: 'Indic Conformer 600M', provider: 'AI4Bharat' }, -]; - -// Group models by provider -const MODEL_GROUPS = STT_MODELS.reduce((acc, model) => { - if (!acc[model.provider]) acc[model.provider] = []; - acc[model.provider].push(model); - return acc; -}, {} as Record); - -// Parse model ID -const parseModelId = (modelId: string) => { - const [provider, model] = modelId.split(':'); - return { provider, model }; -}; - // Audio Player Component with Waveform function AudioPlayer({ audioBase64, @@ -227,44 +193,78 @@ function AudioPlayer({ ); } +// Helper function to map language ID to language name +const getLanguageName = (languageId: number | null): string => { + const languageMap: Record = { + 1: 'English', + 2: 'Hindi', + }; + return languageId ? languageMap[languageId] || 'Unknown' : 'N/A'; +}; + +// Helper function to map language code to language ID +const getLanguageId = (languageCode: string): number => { + const languageCodeToIdMap: Record = { + 'en': 1, // English + 'hi': 2, // Hindi + }; + return languageCodeToIdMap[languageCode] || 1; // Default to English if not found +}; + +// Helper function to format status +const getStatusColor = (status: string) => { + switch (status.toLowerCase()) { + case 'completed': + return { bg: colors.bg.primary, border: colors.status.success, text: colors.status.success }; + case 'failed': + return { bg: colors.bg.primary, border: colors.status.error, text: colors.status.error }; + case 'running': + case 'processing': + return { bg: colors.bg.primary, border: colors.accent.primary, text: colors.accent.primary }; + default: + return { bg: colors.bg.primary, border: colors.border, text: colors.text.secondary }; + } +}; + export default function SpeechToTextPage() { const toast = useToast(); + // Tab state + const [activeTab, setActiveTab] = useState('datasets'); + // UI State const [sidebarCollapsed, setSidebarCollapsed] = useState(false); - const [leftPanelWidth, setLeftPanelWidth] = useState(380); - const [inputMode, setInputMode] = useState('single'); - const [resultsView, setResultsView] = useState('cards'); + const [leftPanelWidth] = useState(450); // API Keys const [apiKeys, setApiKeys] = useState([]); - // Single file mode - const [uploadedFiles, setUploadedFiles] = useState([]); + // Dataset form (Tab 1) + const [datasetName, setDatasetName] = useState(''); + const [datasetDescription, setDatasetDescription] = useState(''); + const [datasetLanguage, setDatasetLanguage] = useState('en'); + const [audioFiles, setAudioFiles] = useState([]); const [playingFileId, setPlayingFileId] = useState(null); + const [isCreating, setIsCreating] = useState(false); - // Batch mode - const [csvFile, setCsvFile] = useState(null); - const [parsedRows, setParsedRows] = useState([]); - const [isParsing, setIsParsing] = useState(false); - const [selectedRows, setSelectedRows] = useState>(new Set()); - - // Model selection - const [selectedModels, setSelectedModels] = useState(['openai:whisper-1']); + // Datasets list (both tabs) + const [datasets, setDatasets] = useState([]); + const [isLoadingDatasets, setIsLoadingDatasets] = useState(false); - // Processing state - const [isTranscribing, setIsTranscribing] = useState(false); - const [isEvaluating, setIsEvaluating] = useState(false); - const [evaluationResults, setEvaluationResults] = useState([]); - const [transcriptionProgress, setTranscriptionProgress] = useState({ current: 0, total: 0 }); + // Evaluation form (Tab 2) + const [evaluationName, setEvaluationName] = useState(''); + const [selectedDatasetId, setSelectedDatasetId] = useState(null); + const [selectedModel, setSelectedModel] = useState('gemini-2.5-pro'); + const [isRunning, setIsRunning] = useState(false); - // Selected result for diff view - const [selectedResultIndex, setSelectedResultIndex] = useState(0); - const [selectedModelForDiff, setSelectedModelForDiff] = useState(null); + // Evaluation runs (Tab 2) + const [runs, setRuns] = useState([]); + const [isLoadingRuns, setIsLoadingRuns] = useState(false); - // Expanded file details in batch mode - const [expandedFileId, setExpandedFileId] = useState(null); - const [fileNerdStats, setFileNerdStats] = useState>(new Map()); + // Result viewing + const [selectedRunId, setSelectedRunId] = useState(null); + const [results, setResults] = useState([]); + const [isLoadingResults, setIsLoadingResults] = useState(false); // Load API keys useEffect(() => { @@ -278,677 +278,402 @@ export default function SpeechToTextPage() { } }, []); - // Get ready file count - const readyFileCount = useMemo(() => { - if (inputMode === 'single') { - return uploadedFiles.filter(f => f.groundTruth.trim()).length; + // Load datasets + const loadDatasets = async () => { + if (apiKeys.length === 0) return; + + setIsLoadingDatasets(true); + try { + const response = await fetch('/api/evaluations/stt/datasets', { + headers: { 'X-API-KEY': apiKeys[0].key }, + }); + + if (!response.ok) throw new Error('Failed to load datasets'); + + const data = await response.json(); + + let datasetsList = []; + if (Array.isArray(data)) { + datasetsList = data; + } else if (data.datasets && Array.isArray(data.datasets)) { + datasetsList = data.datasets; + } else if (data.data && Array.isArray(data.data)) { + datasetsList = data.data; + } + + setDatasets(datasetsList); + } catch (error) { + console.error('Failed to load datasets:', error); + toast.error('Failed to load datasets'); + setDatasets([]); + } finally { + setIsLoadingDatasets(false); } - return parsedRows.filter(r => r.status === 'success' && selectedRows.has(r.row)).length; - }, [inputMode, uploadedFiles, parsedRows, selectedRows]); + }; - // Find best/worst model for a result - const getBestWorstModels = useCallback((result: EvaluationResult) => { - const models = Object.entries(result.transcriptions) - .filter(([_, t]) => t.status === 'success' && t.strict) - .map(([id, t]) => ({ id, wer: t.strict!.wer })); + // Load evaluation runs + const loadRuns = async () => { + if (apiKeys.length === 0) return; - if (models.length === 0) return { best: null, worst: null }; + setIsLoadingRuns(true); + try { + const response = await fetch('/api/evaluations/stt/runs', { + headers: { 'X-API-KEY': apiKeys[0].key }, + }); - const sorted = models.sort((a, b) => a.wer - b.wer); - return { - best: sorted[0]?.id || null, - worst: sorted[sorted.length - 1]?.id || null, - }; - }, []); + if (!response.ok) throw new Error('Failed to load runs'); - // Handle file upload - const handleAudioFileSelect = (event: React.ChangeEvent) => { - const files = event.target.files; - if (!files || files.length === 0) return; + const data = await response.json(); - const validTypes = ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.webm']; - const validFiles = Array.from(files).filter(file => - validTypes.some(ext => file.name.toLowerCase().endsWith(ext)) + let runsList = []; + if (Array.isArray(data)) { + runsList = data; + } else if (data.runs && Array.isArray(data.runs)) { + runsList = data.runs; + } else if (data.data && Array.isArray(data.data)) { + runsList = data.data; + } + + setRuns(runsList); + } catch (error) { + console.error('Failed to load runs:', error); + toast.error('Failed to load evaluation runs'); + setRuns([]); + } finally { + setIsLoadingRuns(false); + } + }; + + useEffect(() => { + loadDatasets(); + if (activeTab === 'evaluations') { + loadRuns(); + } + }, [apiKeys, activeTab]); + + // Auto-refresh runs every 10 seconds if there are running evaluations + useEffect(() => { + if (activeTab !== 'evaluations') return; + + const hasRunningEvals = runs.some(run => + run.status === 'running' || run.status === 'processing' || run.status === 'pending' ); - if (validFiles.length === 0) { - toast.error('Please select valid audio files (mp3, wav, m4a, ogg, flac, webm)'); + if (hasRunningEvals) { + const interval = setInterval(() => { + loadRuns(); + }, 10000); + return () => clearInterval(interval); + } + }, [runs, activeTab]); + + // Handle audio file selection and upload + const handleAudioFileSelect = async (event: React.ChangeEvent) => { + const files = event.target.files; + + if (!files || files.length === 0) return; + + if (apiKeys.length === 0) { + toast.error('Please add an API key in Keystore first'); return; } - validFiles.forEach(file => { + const validTypes = ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.webm']; + + for (const file of Array.from(files)) { + if (!validTypes.some(ext => file.name.toLowerCase().endsWith(ext))) { + toast.error(`${file.name}: Invalid file type`); + continue; + } + const reader = new FileReader(); - reader.onload = () => { - const result = reader.result as string; - const base64 = result.split(',')[1]; + const base64Promise = new Promise((resolve, reject) => { + reader.onload = () => { + const result = reader.result as string; + const base64 = result.split(',')[1]; + resolve(base64); + }; + reader.onerror = reject; + reader.readAsDataURL(file); + }); + + try { + const base64 = await base64Promise; + const localId = `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`; - setUploadedFiles(prev => [...prev, { - id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + setAudioFiles(prev => [...prev, { + id: localId, file, name: file.name, size: file.size, base64, mediaType: file.type || 'audio/mpeg', groundTruth: '', + fileId: undefined, }]); - }; - reader.readAsDataURL(file); - }); - event.target.value = ''; - }; + const formData = new FormData(); + formData.append('file', file); - // Handle CSV upload - const handleCsvFileSelect = async (event: React.ChangeEvent) => { - const file = event.target.files?.[0]; - if (!file || !file.name.toLowerCase().endsWith('.csv')) { - toast.error('Please select a CSV file'); - return; - } + const uploadResponse = await fetch('/api/evaluations/stt/files', { + method: 'POST', + headers: { 'X-API-KEY': apiKeys[0].key }, + body: formData, + }); - if (apiKeys.length === 0) { - toast.error('Please add an API key in Keystore first'); - return; - } + if (!uploadResponse.ok) { + const errorText = await uploadResponse.text(); + console.error(`Upload failed with status ${uploadResponse.status}:`, errorText); + throw new Error(`Upload failed: ${uploadResponse.status}`); + } - setCsvFile(file); - setParsedRows([]); - setSelectedRows(new Set()); - setIsParsing(true); + const uploadData = await uploadResponse.json(); + console.log('Backend upload response:', uploadData); - try { - const formData = new FormData(); - formData.append('file', file); + const backendFileId = uploadData.file_id || uploadData.id || uploadData.data?.file_id || uploadData.data?.id; - const response = await fetch('/api/speech-to-text/parse-csv', { - method: 'POST', - headers: { 'X-API-KEY': apiKeys[0].key }, - body: formData, - }); + if (!backendFileId) { + console.error('No file ID found in response. Full response:', JSON.stringify(uploadData, null, 2)); + throw new Error(`No file ID returned from backend. Response: ${JSON.stringify(uploadData)}`); + } - if (!response.ok) throw new Error(`Failed to parse CSV: ${response.status}`); + setAudioFiles(prev => prev.map(f => + f.id === localId ? { ...f, fileId: backendFileId } : f + )); - const data = await response.json(); - setParsedRows(data.rows || []); + toast.success(`${file.name} uploaded`); + } catch (error) { + console.error(`Error uploading ${file.name}:`, error); + toast.error(`Failed to upload ${file.name}`); + setAudioFiles(prev => prev.filter(f => f.name !== file.name)); + } + } - const successfulRows = (data.rows || []).filter((r: ParsedRow) => r.status === 'success'); - setSelectedRows(new Set(successfulRows.map((r: ParsedRow) => r.row))); + event.target.value = ''; + }; - toast.success(`Parsed ${successfulRows.length} audio files`); - } catch (error) { - toast.error('Failed to parse CSV file'); - } finally { - setIsParsing(false); - } + const triggerAudioUpload = () => { + const input = document.getElementById('audio-upload') as HTMLInputElement; + if (input) input.click(); }; - // Toggle model selection - triggers evaluation for new models if results exist - const toggleModel = async (modelId: string) => { - const isAdding = !selectedModels.includes(modelId); + const removeAudioFile = (id: string) => { + setAudioFiles(prev => prev.filter(f => f.id !== id)); + if (playingFileId === id) setPlayingFileId(null); + }; - // Update selection immediately - setSelectedModels(prev => - prev.includes(modelId) ? prev.filter(id => id !== modelId) : [...prev, modelId] - ); + const updateGroundTruth = (id: string, groundTruth: string) => { + setAudioFiles(prev => prev.map(f => + f.id === id ? { ...f, groundTruth } : f + )); + }; - // If we're adding a model and have existing results, run evaluation for this model - if (isAdding && evaluationResults.length > 0) { - await evaluateNewModel(modelId); + const handleCreateDataset = async () => { + if (!datasetName.trim()) { + toast.error('Please enter a dataset name'); + return; + } + + if (audioFiles.length === 0) { + toast.error('Please add at least one audio file'); + return; } - }; - // Evaluate a single new model against existing files - const evaluateNewModel = async (modelId: string) => { if (apiKeys.length === 0) { toast.error('Please add an API key in Keystore first'); return; } - // Get files from existing results - const files = evaluationResults.map(result => { - // Find the original file data - if (inputMode === 'single') { - const file = uploadedFiles.find(f => f.id === result.fileId); - return file ? { - file_id: result.fileId, - audio_base64: file.base64, - ground_truth: result.ground_truth, - } : null; - } else { - const row = parsedRows.find(r => `row-${r.row}` === result.fileId); - return row ? { - file_id: result.fileId, - audio_base64: row.audio_base64 || '', - ground_truth: result.ground_truth, - } : null; - } - }).filter(Boolean) as { file_id: string; audio_base64: string; ground_truth: string }[]; - - if (files.length === 0) return; - - // Add pending state for new model to existing results - setEvaluationResults(prev => prev.map(result => ({ - ...result, - transcriptions: { - ...result.transcriptions, - [modelId]: { - model: modelId, - text: '', - status: 'pending' as const, - }, - }, - }))); + const filesNotUploaded = audioFiles.filter(f => !f.fileId); + if (filesNotUploaded.length > 0) { + toast.error(`${filesNotUploaded.length} file(s) still uploading. Please wait...`); + return; + } - const provider = parseModelId(modelId); + setIsCreating(true); try { - // Step 1: Transcription for new model only - const transcribeResponse = await fetch('/api/v1/audio/transcriptions', { + const samples = audioFiles.map(audioFile => ({ + file_id: audioFile.fileId!, + ground_truth: audioFile.groundTruth.trim() || undefined, + })); + + const createDatasetResponse = await fetch('/api/evaluations/stt/datasets', { method: 'POST', headers: { - 'Content-Type': 'application/json', 'X-API-KEY': apiKeys[0].key, + 'Content-Type': 'application/json', }, body: JSON.stringify({ - files: files.map(f => ({ file_id: f.file_id, audio_base64: f.audio_base64 })), - providers: [provider], + name: datasetName.trim(), + description: datasetDescription.trim() || undefined, + language_id: getLanguageId(datasetLanguage), + samples: samples, }), }); - if (!transcribeResponse.ok) { - throw new Error('Transcription failed'); + if (!createDatasetResponse.ok) { + const errorData = await createDatasetResponse.json(); + throw new Error(errorData.error || 'Failed to create dataset'); } - const transcribeData = await transcribeResponse.json(); - - // Update results with transcriptions - setEvaluationResults(prev => { - const updated = [...prev]; - - (transcribeData.data?.success || []).forEach((result: any) => { - const resultIdx = updated.findIndex(r => r.fileId === result.file_id); - if (resultIdx !== -1) { - const modelKey = `${result.provider}:${result.model}`; - updated[resultIdx] = { - ...updated[resultIdx], - transcriptions: { - ...updated[resultIdx].transcriptions, - [modelKey]: { - model: modelKey, - text: result.transcript, - status: 'success', - }, - }, - }; - } - }); + await createDatasetResponse.json(); - (transcribeData.data?.errors || []).forEach((result: any) => { - const resultIdx = updated.findIndex(r => r.fileId === result.file_id); - if (resultIdx !== -1) { - const modelKey = `${result.provider}:${result.model}`; - updated[resultIdx] = { - ...updated[resultIdx], - transcriptions: { - ...updated[resultIdx].transcriptions, - [modelKey]: { - model: modelKey, - text: '', - status: 'error', - error: result.error, - }, - }, - }; - } - }); - - return updated; - }); + toast.success(`Dataset "${datasetName}" created successfully!`); - // Step 2: WER Evaluation for new model - // Get updated results to build WER items - const currentResults = evaluationResults; - const werItems = files.map(f => { - const result = currentResults.find(r => r.fileId === f.file_id); - // Find the transcription from the API response - const successResult = (transcribeData.data?.success || []).find( - (s: any) => s.file_id === f.file_id - ); - - if (!successResult || !f.ground_truth) return null; - - return { - id: `${f.file_id}_${modelId}`, - ground_truth: f.ground_truth, - hypothesis: successResult.transcript, - model: modelId.replace(':', '/'), - }; - }).filter(Boolean); - - if (werItems.length > 0) { - const werResponse = await fetch('/api/v1/evaluations/stt/wer', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'X-API-KEY': apiKeys[0].key, - }, - body: JSON.stringify({ - items: werItems, - mode: 'both', - }), - }); + setDatasetName(''); + setDatasetDescription(''); + setDatasetLanguage('en'); + setAudioFiles([]); - if (werResponse.ok) { - const werData = await werResponse.json(); - - const werResultsMap = new Map(); - (werData.data?.results || []).forEach((result: any) => { - werResultsMap.set(result.id, { strict: result.strict, lenient: result.lenient }); - }); - - // Update with WER metrics - setEvaluationResults(prev => prev.map(result => { - const werId = `${result.fileId}_${modelId}`; - const werResult = werResultsMap.get(werId); - - if (werResult && result.transcriptions[modelId]) { - return { - ...result, - transcriptions: { - ...result.transcriptions, - [modelId]: { - ...result.transcriptions[modelId], - strict: werResult.strict, - lenient: werResult.lenient, - }, - }, - }; - } - return result; - })); - } - } - - toast.success(`Added ${STT_MODELS.find(m => m.id === modelId)?.name || modelId}`); + await loadDatasets(); } catch (error) { - // Mark as error on failure - setEvaluationResults(prev => prev.map(result => ({ - ...result, - transcriptions: { - ...result.transcriptions, - [modelId]: { - model: modelId, - text: '', - status: 'error', - error: error instanceof Error ? error.message : 'Failed to evaluate', - }, - }, - }))); - toast.error(`Failed to evaluate ${STT_MODELS.find(m => m.id === modelId)?.name || modelId}`); + console.error('Failed to create dataset:', error); + toast.error(error); + } finally { + setIsCreating(false); } }; - // Run transcription and evaluation - const runEvaluation = async () => { - if (selectedModels.length === 0) { - toast.error('Please select at least one model'); - return; - } - + const handleRunEvaluation = async () => { if (apiKeys.length === 0) { toast.error('Please add an API key in Keystore first'); return; } - // Prepare files - const files = inputMode === 'single' - ? uploadedFiles.filter(f => f.groundTruth.trim()).map(f => ({ - file_id: f.id, - audio_base64: f.base64, - ground_truth: f.groundTruth, - audio_url: f.name, - })) - : parsedRows - .filter(r => r.status === 'success' && selectedRows.has(r.row)) - .map(r => ({ - file_id: `row-${r.row}`, - audio_base64: r.audio_base64 || '', - ground_truth: r.ground_truth, - audio_url: r.audio_url, - })); + if (!selectedDatasetId) { + toast.error('Please select a dataset'); + return; + } - if (files.length === 0) { - toast.error('No files ready for evaluation'); + if (!evaluationName.trim()) { + toast.error('Please enter an evaluation name'); return; } - setIsTranscribing(true); - setEvaluationResults([]); - setSelectedResultIndex(0); - setSelectedModelForDiff(null); - - // Initialize results with pending state - const initialResults: EvaluationResult[] = files.map((f, idx) => ({ - row: idx + 1, - fileId: f.file_id, - audio_url: f.audio_url, - ground_truth: f.ground_truth, - transcriptions: Object.fromEntries( - selectedModels.map(modelId => [modelId, { - model: modelId, - text: '', - status: 'pending' as const, - }]) - ), - })); - setEvaluationResults(initialResults); - - const providers = selectedModels.map(modelId => parseModelId(modelId)); - const totalTasks = files.length * providers.length; - setTranscriptionProgress({ current: 0, total: totalTasks }); + setIsRunning(true); try { - // Step 1: Transcription - const transcribeResponse = await fetch('/api/v1/audio/transcriptions', { + const response = await fetch('/api/evaluations/stt/runs', { method: 'POST', headers: { - 'Content-Type': 'application/json', 'X-API-KEY': apiKeys[0].key, + 'Content-Type': 'application/json', }, body: JSON.stringify({ - files: files.map(f => ({ file_id: f.file_id, audio_base64: f.audio_base64 })), - providers, + run_name: evaluationName.trim(), + dataset_id: selectedDatasetId, + model: selectedModel, }), }); - if (!transcribeResponse.ok) { - throw new Error('Transcription failed'); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.error || 'Failed to start evaluation'); } - const transcribeData = await transcribeResponse.json(); - setTranscriptionProgress({ current: totalTasks, total: totalTasks }); - - // Update results with transcriptions - const updatedResults = [...initialResults]; - - (transcribeData.data?.success || []).forEach((result: any) => { - const resultIdx = updatedResults.findIndex(r => r.fileId === result.file_id); - if (resultIdx !== -1) { - const modelKey = `${result.provider}:${result.model}`; - updatedResults[resultIdx].transcriptions[modelKey] = { - model: modelKey, - text: result.transcript, - status: 'success', - }; - } - }); - - (transcribeData.data?.errors || []).forEach((result: any) => { - const resultIdx = updatedResults.findIndex(r => r.fileId === result.file_id); - if (resultIdx !== -1) { - const modelKey = `${result.provider}:${result.model}`; - updatedResults[resultIdx].transcriptions[modelKey] = { - model: modelKey, - text: '', - status: 'error', - error: result.error, - }; - } - }); + await response.json(); - setEvaluationResults(updatedResults); - setIsTranscribing(false); - - // Step 2: WER Evaluation - setIsEvaluating(true); - - const werItems = updatedResults.flatMap(result => - Object.entries(result.transcriptions) - .filter(([_, t]) => t.status === 'success' && result.ground_truth) - .map(([modelKey, t]) => ({ - id: `${result.fileId}_${modelKey}`, - ground_truth: result.ground_truth, - hypothesis: t.text, - model: modelKey.replace(':', '/'), - file_id: result.fileId, - model_key: modelKey, - })) - ); - - if (werItems.length > 0) { - const werResponse = await fetch('/api/v1/evaluations/stt/wer', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'X-API-KEY': apiKeys[0].key, - }, - body: JSON.stringify({ - items: werItems.map(item => ({ - id: item.id, - ground_truth: item.ground_truth, - hypothesis: item.hypothesis, - model: item.model, - })), - mode: 'both', - }), - }); + toast.success(`Evaluation "${evaluationName}" started successfully!`); + setSelectedModel('gemini-2.5-pro'); - if (werResponse.ok) { - const werData = await werResponse.json(); - - const werResultsMap = new Map(); - (werData.data?.results || []).forEach((result: any) => { - werResultsMap.set(result.id, { strict: result.strict, lenient: result.lenient }); - }); - - // Update with WER metrics - const finalResults = updatedResults.map(result => { - const newTranscriptions = { ...result.transcriptions }; - Object.keys(newTranscriptions).forEach(modelKey => { - const werId = `${result.fileId}_${modelKey}`; - const werResult = werResultsMap.get(werId); - if (werResult) { - newTranscriptions[modelKey] = { - ...newTranscriptions[modelKey], - strict: werResult.strict, - lenient: werResult.lenient, - }; - } - }); - return { ...result, transcriptions: newTranscriptions }; - }); - - setEvaluationResults(finalResults); - } - } + setEvaluationName(''); + setSelectedDatasetId(null); - toast.success('Evaluation completed'); - setResultsView('table'); // Switch to table view after evaluation + await loadRuns(); } catch (error) { - toast.error(error instanceof Error ? error.message : 'Evaluation failed'); + console.error('Failed to run evaluation:', error); + toast.error(error); } finally { - setIsTranscribing(false); - setIsEvaluating(false); + setIsRunning(false); } }; - // Download results - const downloadResultsCSV = () => { - if (evaluationResults.length === 0) return; - - const escapeCSV = (value: any) => { - if (value === undefined || value === null) return ''; - const str = String(value); - if (str.includes(',') || str.includes('"') || str.includes('\n')) { - return `"${str.replace(/"/g, '""')}"`; - } - return str; - }; - - // Build comprehensive headers for each model - const modelHeaders = selectedModels.flatMap(m => { - const name = STT_MODELS.find(model => model.id === m)?.name || m.split(':')[1]; - return [ - // Strict mode metrics - `${name}_Strict_WER`, - `${name}_Strict_Substitutions`, - `${name}_Strict_Deletions`, - `${name}_Strict_Insertions`, - `${name}_Strict_Semantic_Errors`, - // Lenient mode metrics - `${name}_Lenient_WER`, - `${name}_Lenient_Substitutions`, - `${name}_Lenient_Deletions`, - `${name}_Lenient_Insertions`, - `${name}_Lenient_Semantic_Errors`, - // Word counts - `${name}_Reference_Words`, - `${name}_Hypothesis_Words`, - // Transcription - `${name}_Transcription`, - ]; - }); - - const headers = ['Row', 'Audio_URL', 'Ground_Truth', ...modelHeaders]; - - const rows = evaluationResults.map(result => { - const modelValues = selectedModels.flatMap(m => { - const t = result.transcriptions[m]; - const strict = t?.strict; - const lenient = t?.lenient; - - return [ - // Strict mode metrics - strict?.wer !== undefined ? (strict.wer * 100).toFixed(2) : 'N/A', - strict?.substitutions ?? 'N/A', - strict?.deletions ?? 'N/A', - strict?.insertions ?? 'N/A', - strict?.semantic_errors ?? 'N/A', - // Lenient mode metrics - lenient?.wer !== undefined ? (lenient.wer * 100).toFixed(2) : 'N/A', - lenient?.substitutions ?? 'N/A', - lenient?.deletions ?? 'N/A', - lenient?.insertions ?? 'N/A', - lenient?.semantic_errors ?? 'N/A', - // Word counts - strict?.reference_word_count ?? 'N/A', - strict?.hypothesis_word_count ?? 'N/A', - // Transcription - escapeCSV(t?.text || ''), - ]; - }); - - return [result.row, escapeCSV(result.audio_url), escapeCSV(result.ground_truth), ...modelValues].join(','); - }); - - const csvContent = [headers.join(','), ...rows].join('\n'); - const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); - const url = URL.createObjectURL(blob); - const link = document.createElement('a'); - link.href = url; - link.download = `stt-evaluation-${new Date().toISOString().slice(0, 10)}.csv`; - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); - URL.revokeObjectURL(url); - }; - - // Format file size const formatFileSize = (bytes: number) => { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; }; - // Compute nerd stats from audio base64 (client-side) - const computeNerdStats = async (fileId: string, audioBase64: string, mediaType: string) => { - if (fileNerdStats.has(fileId)) return; // Already computed + // Load results for a specific run + const loadResults = async (runId: number) => { + if (apiKeys.length === 0) return; + setIsLoadingResults(true); try { - // Get codec info from media type - const getCodecInfo = (type: string): { format: string; codec: string } => { - const typeMap: Record = { - 'audio/ogg': { format: 'OGG', codec: 'Opus/Vorbis' }, - 'audio/opus': { format: 'OGG', codec: 'Opus' }, - 'audio/mpeg': { format: 'MP3', codec: 'MPEG Layer III' }, - 'audio/mp3': { format: 'MP3', codec: 'MPEG Layer III' }, - 'audio/wav': { format: 'WAV', codec: 'PCM' }, - 'audio/wave': { format: 'WAV', codec: 'PCM' }, - 'audio/x-wav': { format: 'WAV', codec: 'PCM' }, - 'audio/webm': { format: 'WebM', codec: 'Opus/Vorbis' }, - 'audio/flac': { format: 'FLAC', codec: 'FLAC' }, - 'audio/x-flac': { format: 'FLAC', codec: 'FLAC' }, - 'audio/aac': { format: 'AAC', codec: 'AAC-LC' }, - 'audio/mp4': { format: 'M4A', codec: 'AAC' }, - 'audio/x-m4a': { format: 'M4A', codec: 'AAC' }, - }; - return typeMap[type.toLowerCase()] || { format: type.split('/')[1]?.toUpperCase() || 'Unknown', codec: 'Unknown' }; - }; + // Fetch run details with results + const runResponse = await fetch(`/api/evaluations/stt/runs/${runId}?include_results=true`, { + headers: { 'X-API-KEY': apiKeys[0].key }, + }); - // Decode base64 to get file size - const binaryString = atob(audioBase64); - const fileSize = binaryString.length; + if (!runResponse.ok) throw new Error('Failed to load results'); + + const runData = await runResponse.json(); + console.log('Run API Response:', runData); + + // Extract results + let resultsList = []; + if (Array.isArray(runData)) { + resultsList = runData; + } else if (runData.results && Array.isArray(runData.results)) { + resultsList = runData.results; + } else if (runData.data && Array.isArray(runData.data)) { + resultsList = runData.data; + } else if (runData.data && runData.data.results && Array.isArray(runData.data.results)) { + resultsList = runData.data.results; + } - // Create audio element to get duration - const audio = new Audio(`data:${mediaType};base64,${audioBase64}`); + // Get dataset_id from the run + const datasetId = runData.dataset_id || runData.data?.dataset_id; - await new Promise((resolve, reject) => { - audio.addEventListener('loadedmetadata', () => resolve()); - audio.addEventListener('error', () => reject(new Error('Failed to load audio'))); - }); + if (datasetId) { + // Fetch dataset with samples + const datasetResponse = await fetch(`/api/evaluations/stt/datasets/${datasetId}?include_samples=true`, { + headers: { 'X-API-KEY': apiKeys[0].key }, + }); - const { format, codec } = getCodecInfo(mediaType); - const durationMs = Math.round(audio.duration * 1000); - const bitrate = audio.duration > 0 ? Math.round((fileSize * 8) / audio.duration) : null; - - // Format duration as mm:ss.ms - const mins = Math.floor(audio.duration / 60); - const secs = Math.floor(audio.duration % 60); - const ms = Math.round((audio.duration % 1) * 1000); - const durationFormatted = `${mins}:${secs.toString().padStart(2, '0')}.${ms.toString().padStart(3, '0')}`; - - // Format file size - const formatSize = (bytes: number) => { - if (bytes < 1024) return `${bytes} B`; - if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; - return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; - }; - - const stats: AudioNerdStats = { - format, - codec, - mimeType: mediaType, - durationMs, - durationFormatted, - sampleRate: null, // Web Audio API doesn't expose this easily - channels: null, - bitrate, - fileSize, - fileSizeFormatted: formatSize(fileSize), - base64Length: audioBase64.length, - compressionRatio: null, - }; - - setFileNerdStats(prev => new Map(prev).set(fileId, stats)); - } catch (error) { - console.error('Failed to compute nerd stats:', error); - } - }; + if (datasetResponse.ok) { + const datasetData = await datasetResponse.json(); + console.log('Dataset API Response:', datasetData); - // Toggle file details expansion - const toggleFileDetails = (fileId: string, audioBase64?: string, mediaType?: string) => { - if (expandedFileId === fileId) { - setExpandedFileId(null); - } else { - setExpandedFileId(fileId); - if (audioBase64 && mediaType && !fileNerdStats.has(fileId)) { - computeNerdStats(fileId, audioBase64, mediaType); + // Extract samples + let samples = []; + if (datasetData.samples && Array.isArray(datasetData.samples)) { + samples = datasetData.samples; + } else if (datasetData.data && datasetData.data.samples && Array.isArray(datasetData.data.samples)) { + samples = datasetData.data.samples; + } + + // Create a map of sample_id to sample name + const sampleMap = new Map(); + samples.forEach((sample: any) => { + const sampleName = sample.sample_metadata?.original_filename || + sample.metadata?.original_filename || + `Sample ${sample.id}`; + sampleMap.set(sample.id, sampleName); + }); + + // Enrich results with sample names + resultsList = resultsList.map((result: any) => ({ + ...result, + sampleName: sampleMap.get(result.stt_sample_id) || '-' + })); + } } + + console.log('Enriched results:', resultsList); + setResults(resultsList); + setSelectedRunId(runId); + } catch (error) { + console.error('Failed to load results:', error); + toast.error('Failed to load evaluation results'); + setResults([]); + } finally { + setIsLoadingResults(false); } }; - // Current result for diff view - const currentResult = evaluationResults[selectedResultIndex]; + const selectedDataset = datasets.find(d => d.id === selectedDatasetId); return (
@@ -980,1109 +705,924 @@ export default function SpeechToTextPage() {

+ - {/* Results View Toggle */} - {evaluationResults.length > 0 && ( -
- {[ - { id: 'table', label: 'Table', icon: 'M3 4a1 1 0 011-1h12a1 1 0 011 1v2a1 1 0 01-1 1H4a1 1 0 01-1-1V4zm0 6a1 1 0 011-1h12a1 1 0 011 1v2a1 1 0 01-1 1H4a1 1 0 01-1-1v-2zm0 6a1 1 0 011-1h12a1 1 0 011 1v2a1 1 0 01-1 1H4a1 1 0 01-1-1v-2z' }, - { id: 'cards', label: 'Cards', icon: 'M4 5a1 1 0 011-1h4a1 1 0 011 1v4a1 1 0 01-1 1H5a1 1 0 01-1-1V5zm10 0a1 1 0 011-1h4a1 1 0 011 1v4a1 1 0 01-1 1h-4a1 1 0 01-1-1V5zM4 15a1 1 0 011-1h4a1 1 0 011 1v4a1 1 0 01-1 1H5a1 1 0 01-1-1v-4zm10 0a1 1 0 011-1h4a1 1 0 011 1v4a1 1 0 01-1 1h-4a1 1 0 01-1-1v-4z' }, - { id: 'diff', label: 'Diff', icon: 'M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2' }, - ].map(view => ( - - ))} -
- )} + {/* Tab Navigation */} +
+ +
- {/* Main Content - Split Panel */} -
- {/* Left Panel - Configuration */} -
setActiveTab('evaluations')} + /> + ) : ( + + )} +
+
+ + ); +} + +// ============ DATASETS TAB COMPONENT ============ +interface DatasetsTabProps { + leftPanelWidth: number; + datasetName: string; + setDatasetName: (name: string) => void; + datasetDescription: string; + setDatasetDescription: (desc: string) => void; + datasetLanguage: string; + setDatasetLanguage: (lang: string) => void; + audioFiles: AudioFile[]; + playingFileId: string | null; + setPlayingFileId: (id: string | null) => void; + handleAudioFileSelect: (event: React.ChangeEvent) => void; + triggerAudioUpload: () => void; + removeAudioFile: (id: string) => void; + updateGroundTruth: (id: string, groundTruth: string) => void; + formatFileSize: (bytes: number) => string; + isCreating: boolean; + handleCreateDataset: () => void; + datasets: Dataset[]; + isLoadingDatasets: boolean; + loadDatasets: () => void; + onRunEvaluation: () => void; +} + +function DatasetsTab({ + leftPanelWidth, + datasetName, + setDatasetName, + datasetDescription, + setDatasetDescription, + datasetLanguage, + setDatasetLanguage, + audioFiles, + playingFileId, + setPlayingFileId, + handleAudioFileSelect, + triggerAudioUpload, + removeAudioFile, + updateGroundTruth, + formatFileSize, + isCreating, + handleCreateDataset, + datasets, + isLoadingDatasets, + loadDatasets, + onRunEvaluation, +}: DatasetsTabProps) { + return ( +
+ {/* Left Panel - Dataset Creation Form */} +
+
+ {/* Dataset Information */} +
+ + setDatasetName(e.target.value)} + placeholder="e.g., English Podcast Dataset" + className="w-full px-3 py-2 border rounded-md text-sm" style={{ - width: `${leftPanelWidth}px`, backgroundColor: colors.bg.primary, borderColor: colors.border, + color: colors.text.primary, }} - > - {/* Input Mode Tabs */} -
- {(['single', 'batch'] as InputMode[]).map(mode => ( - - ))} -
- - {/* Panel Content */} -
- {/* Upload Section */} - {inputMode === 'single' ? ( -
-
document.getElementById('audio-upload')?.click()} - > - - - - -

- Add audio files -

-
+ /> +
- {/* Uploaded Files */} - {uploadedFiles.map((file, idx) => { - const isExpanded = expandedFileId === file.id; - const stats = fileNerdStats.get(file.id); +
+ +