@@ -192,78 +192,87 @@ router.post('/:type/:id/v1/chat/completions', async (req, res, next) => {
192192 choices : [ { index : 0 , delta : { role : 'assistant' , content : '' } , finish_reason : null } ]
193193 } ) } \n\n`)
194194
195- for await ( const part of result . fullStream ) {
196- if ( part . type === 'text-delta' ) {
197- res . write ( `data: ${ JSON . stringify ( {
198- id : completionId ,
199- object : 'chat.completion.chunk' ,
200- created,
201- model : modelId ,
202- choices : [ { index : 0 , delta : { content : part . text } , finish_reason : null } ]
203- } ) } \n\n`)
204- } else if ( part . type === 'tool-input-start' ) {
205- res . write ( `data: ${ JSON . stringify ( {
206- id : completionId ,
207- object : 'chat.completion.chunk' ,
208- created,
209- model : modelId ,
210- choices : [ {
211- index : 0 ,
212- delta : {
213- tool_calls : [ {
214- index : 0 ,
215- id : part . id ,
216- type : 'function' ,
217- function : { name : part . toolName , arguments : '' }
218- } ]
219- } ,
220- finish_reason : null
221- } ]
222- } ) } \n\n`)
223- } else if ( part . type === 'tool-input-delta' ) {
224- res . write ( `data: ${ JSON . stringify ( {
225- id : completionId ,
226- object : 'chat.completion.chunk' ,
227- created,
228- model : modelId ,
229- choices : [ {
230- index : 0 ,
231- delta : {
232- tool_calls : [ {
233- index : 0 ,
234- function : { arguments : part . delta }
235- } ]
236- } ,
237- finish_reason : null
238- } ]
239- } ) } \n\n`)
240- } else if ( part . type === 'finish' ) {
241- // Record usage for streaming responses (apply ratio for quota accounting)
242- const inputTokens = Math . round ( ( part . totalUsage ?. inputTokens ?? 0 ) * ratio )
243- const outputTokens = Math . round ( ( part . totalUsage ?. outputTokens ?? 0 ) * ratio )
244- if ( inputTokens || outputTokens ) {
245- await recordUsage ( owner , inputTokens , outputTokens , usageUserId )
246- }
195+ try {
196+ for await ( const part of result . fullStream ) {
197+ if ( part . type === 'text-delta' ) {
198+ res . write ( `data: ${ JSON . stringify ( {
199+ id : completionId ,
200+ object : 'chat.completion.chunk' ,
201+ created,
202+ model : modelId ,
203+ choices : [ { index : 0 , delta : { content : part . text } , finish_reason : null } ]
204+ } ) } \n\n`)
205+ } else if ( part . type === 'tool-input-start' ) {
206+ res . write ( `data: ${ JSON . stringify ( {
207+ id : completionId ,
208+ object : 'chat.completion.chunk' ,
209+ created,
210+ model : modelId ,
211+ choices : [ {
212+ index : 0 ,
213+ delta : {
214+ tool_calls : [ {
215+ index : 0 ,
216+ id : part . id ,
217+ type : 'function' ,
218+ function : { name : part . toolName , arguments : '' }
219+ } ]
220+ } ,
221+ finish_reason : null
222+ } ]
223+ } ) } \n\n`)
224+ } else if ( part . type === 'tool-input-delta' ) {
225+ res . write ( `data: ${ JSON . stringify ( {
226+ id : completionId ,
227+ object : 'chat.completion.chunk' ,
228+ created,
229+ model : modelId ,
230+ choices : [ {
231+ index : 0 ,
232+ delta : {
233+ tool_calls : [ {
234+ index : 0 ,
235+ function : { arguments : part . delta }
236+ } ]
237+ } ,
238+ finish_reason : null
239+ } ]
240+ } ) } \n\n`)
241+ } else if ( part . type === 'finish' ) {
242+ // Record usage for streaming responses (apply ratio for quota accounting)
243+ const inputTokens = Math . round ( ( part . totalUsage ?. inputTokens ?? 0 ) * ratio )
244+ const outputTokens = Math . round ( ( part . totalUsage ?. outputTokens ?? 0 ) * ratio )
245+ if ( inputTokens || outputTokens ) {
246+ await recordUsage ( owner , inputTokens , outputTokens , usageUserId )
247+ }
247248
248- res . write ( `data: ${ JSON . stringify ( {
249- id : completionId ,
250- object : 'chat.completion.chunk' ,
251- created,
252- model : modelId ,
253- choices : [ { index : 0 , delta : { } , finish_reason : mapFinishReason ( part . finishReason as FinishReason ) } ] ,
254- usage : part . totalUsage
255- ? {
256- prompt_tokens : part . totalUsage . inputTokens ?? 0 ,
257- completion_tokens : part . totalUsage . outputTokens ?? 0 ,
258- total_tokens : ( part . totalUsage . inputTokens ?? 0 ) + ( part . totalUsage . outputTokens ?? 0 )
259- }
260- : undefined
261- } ) } \n\n`)
249+ res . write ( `data: ${ JSON . stringify ( {
250+ id : completionId ,
251+ object : 'chat.completion.chunk' ,
252+ created,
253+ model : modelId ,
254+ choices : [ { index : 0 , delta : { } , finish_reason : mapFinishReason ( part . finishReason as FinishReason ) } ] ,
255+ usage : part . totalUsage
256+ ? {
257+ prompt_tokens : part . totalUsage . inputTokens ?? 0 ,
258+ completion_tokens : part . totalUsage . outputTokens ?? 0 ,
259+ total_tokens : ( part . totalUsage . inputTokens ?? 0 ) + ( part . totalUsage . outputTokens ?? 0 )
260+ }
261+ : undefined
262+ } ) } \n\n`)
263+ }
262264 }
263- }
264265
265- res . write ( 'data: [DONE]\n\n' )
266- res . end ( )
266+ res . write ( 'data: [DONE]\n\n' )
267+ res . end ( )
268+ } catch ( streamErr : any ) {
269+ const message = streamErr ?. message || 'Stream error'
270+ res . write ( `data: ${ JSON . stringify ( {
271+ error : { message, type : 'server_error' , code : null }
272+ } ) } \n\n`)
273+ res . write ( 'data: [DONE]\n\n' )
274+ res . end ( )
275+ }
267276 } else {
268277 const result = await generateText ( {
269278 model,
0 commit comments