From 7d4030cf7af7c7b28602c5df2b1a510e3bcac1e8 Mon Sep 17 00:00:00 2001 From: Kwaroran Date: Sat, 14 Dec 2024 20:10:07 +0900 Subject: [PATCH] Readd Gemini stream --- src/ts/model/modellist.ts | 24 ++++++++--------- src/ts/process/request.ts | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/src/ts/model/modellist.ts b/src/ts/model/modellist.ts index d319f127..680ac909 100644 --- a/src/ts/model/modellist.ts +++ b/src/ts/model/modellist.ts @@ -768,7 +768,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-exp-1121', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud, }, @@ -777,7 +777,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-exp-1206', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -786,7 +786,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-2.0-flash-exp', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty'], tokenizer: LLMTokenizer.GoogleCloud, recommended: true @@ -796,7 +796,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-1.5-pro-latest', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming, LLMFlags.hasStreaming], recommended: true, parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud @@ -806,7 +806,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-1.5-flash', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], recommended: true, parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud @@ -846,7 +846,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-exp-1114', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -855,7 +855,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-1.5-pro-002', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -864,7 +864,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-1.5-flash-002', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -873,7 +873,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-pro', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -882,7 +882,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-pro-vision', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -891,7 +891,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-ultra', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, @@ -900,7 +900,7 @@ export const LLMModels: LLMModel[] = [ id: 'gemini-ultra-vision', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, - flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt], + flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.hasStreaming], parameters: ['temperature', 'top_k', 'top_p'], tokenizer: LLMTokenizer.GoogleCloud }, diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts index fdbf48f9..2942bf5d 100644 --- a/src/ts/process/request.ts +++ b/src/ts/process/request.ts @@ -1609,9 +1609,65 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise else if(arg.modelInfo.format === LLMFormat.VertexAIGemini){ url =`https://${REGION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${arg.modelInfo.internalID}:streamGenerateContent` } + else if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){ + url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:streamGenerateContent?key=${db.google.accessToken}` + } else{ url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}` } + + + if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){ + headers['Content-Type'] = 'application/json' + const f = await fetchNative(url, { + headers: headers, + body: JSON.stringify(body), + method: 'POST', + chatId: arg.chatId, + }) + + if(f.status !== 200){ + return { + type: 'fail', + result: await textifyReadableStream(f.body) + } + } + + let fullResult:string = '' + + const stream = new TransformStream( { + async transform(chunk, control) { + fullResult += new TextDecoder().decode(chunk) + try { + let reformatted = fullResult + if(reformatted.endsWith(',')){ + reformatted = fullResult.slice(0, -1) + ']' + } + if(!reformatted.endsWith(']')){ + reformatted = fullResult + ']' + } + + const data = JSON.parse(reformatted) + + let r = '' + for(const d of data){ + r += d.candidates[0].content.parts[0].text + } + control.enqueue({ + '0': r + }) + } catch (error) { + console.log(error) + } + } + },) + + return { + type: 'streaming', + result: f.body.pipeThrough(stream) + } + } + const res = await globalFetch(url, { headers: headers, body: body,