feat: add BGE-m3-ko embedding

This commit is contained in:
Bo26fhmC5M
2025-05-18 14:31:35 +09:00
parent a41ac074db
commit ba150a0913
4 changed files with 40 additions and 15 deletions

View File

@@ -5,6 +5,7 @@
import SelectInput from "../UI/GUI/SelectInput.svelte"; import SelectInput from "../UI/GUI/SelectInput.svelte";
import Button from "../UI/GUI/Button.svelte"; import Button from "../UI/GUI/Button.svelte";
import { HypaProcesser } from "src/ts/process/memory/hypamemory"; import { HypaProcesser } from "src/ts/process/memory/hypamemory";
import { DBState } from "src/ts/stores.svelte"
let query = $state(""); let query = $state("");
let model = $state("MiniLM"); let model = $state("MiniLM");
@@ -27,24 +28,43 @@
<h2 class="text-4xl text-textcolor my-6 font-black relative">{language.embedding}</h2> <h2 class="text-4xl text-textcolor my-6 font-black relative">{language.embedding}</h2>
<span class="text-textcolor text-lg">Model</span> <span class="text-textcolor text-lg">Model</span>
<SelectInput bind:value={model}> <SelectInput bind:value={model} className="mb-4">
{#if 'gpu' in navigator}
<OptionInput value="MiniLMGPU">MiniLM L6 v2 (GPU)</OptionInput>
<OptionInput value="nomicGPU">Nomic Embed Text v1.5 (GPU)</OptionInput>
<OptionInput value="bgeSmallEnGPU">BGE Small English (GPU)</OptionInput>
<OptionInput value="bgem3GPU">BGE Medium 3 (GPU)</OptionInput>
<OptionInput value="multiMiniLMGPU">Multilingual MiniLM L12 v2 (GPU)</OptionInput>
<OptionInput value="bgeM3KoGPU">BGE Medium 3 Korean (GPU)</OptionInput>
{/if}
<OptionInput value="MiniLM">MiniLM L6 v2 (CPU)</OptionInput> <OptionInput value="MiniLM">MiniLM L6 v2 (CPU)</OptionInput>
<OptionInput value="nomic">Nomic Embed Text v1.5 (CPU)</OptionInput> <OptionInput value="nomic">Nomic Embed Text v1.5 (CPU)</OptionInput>
<OptionInput value="nomicGPU">Nomic Embed Text v1.5 (GPU)</OptionInput>
<OptionInput value="bgeSmallEn">BGE Small English (CPU)</OptionInput> <OptionInput value="bgeSmallEn">BGE Small English (CPU)</OptionInput>
<OptionInput value="bgeSmallEnGPU">BGE Small English (GPU)</OptionInput>
<OptionInput value="bgem3">BGE Medium 3 (CPU)</OptionInput> <OptionInput value="bgem3">BGE Medium 3 (CPU)</OptionInput>
<OptionInput value="bgem3GPU">BGE Medium 3 (GPU)</OptionInput> <OptionInput value="multiMiniLM">Multilingual MiniLM L12 v2 (CPU)</OptionInput>
<OptionInput value="bgeM3Ko">BGE Medium 3 Korean (CPU)</OptionInput>
<OptionInput value="openai3small">OpenAI text-embedding-3-small</OptionInput> <OptionInput value="openai3small">OpenAI text-embedding-3-small</OptionInput>
<OptionInput value="openai3large">OpenAI text-embedding-3-large</OptionInput> <OptionInput value="openai3large">OpenAI text-embedding-3-large</OptionInput>
<OptionInput value="ada">OpenAI Ada</OptionInput>
<OptionInput value="custom">Custom (OpenAI-compatible)</OptionInput> <OptionInput value="custom">Custom (OpenAI-compatible)</OptionInput>
</SelectInput> </SelectInput>
{#if model === "custom"} {#if model === 'openai3small' || model === 'openai3large' || model === 'ada'}
<span class="text-textcolor text-lg">Custom Server URL</span> <span class="text-textcolor text-lg">OpenAI API Key</span>
<TextInput bind:value={customEmbeddingUrl} size="lg" fullwidth /> <TextInput size="sm" marginBottom bind:value={DBState.db.supaMemoryKey}/>
{/if} {/if}
{#if model === "custom"}
<span class="text-textcolor text-lg">URL</span>
<TextInput size="sm" marginBottom bind:value={DBState.db.hypaCustomSettings.url}/>
<span class="text-textcolor text-lg">Key/Password</span>
<TextInput size="sm" marginBottom bind:value={DBState.db.hypaCustomSettings.key}/>
<span class="text-textcolor text-lg">Request Model</span>
<TextInput size="sm" marginBottom bind:value={DBState.db.hypaCustomSettings.model}/>
{/if}
<div class="mb-4"></div>
<span class="text-textcolor text-lg">Query</span> <span class="text-textcolor text-lg">Query</span>
<TextInput bind:value={query} size="lg" fullwidth /> <TextInput bind:value={query} size="lg" fullwidth />

View File

@@ -917,19 +917,21 @@
{/if} {/if}
<span class="text-textcolor">{language.embedding}</span> <span class="text-textcolor">{language.embedding}</span>
<SelectInput className="mt-2 mb-2" bind:value={DBState.db.hypaModel}> <SelectInput className="mb-4" bind:value={DBState.db.hypaModel}>
{#if 'gpu' in navigator} {#if 'gpu' in navigator}
<OptionInput value="MiniLMGPU">MiniLM L6 v2 (GPU)</OptionInput> <OptionInput value="MiniLMGPU">MiniLM L6 v2 (GPU)</OptionInput>
<OptionInput value="nomicGPU">Nomic Embed Text v1.5 (GPU)</OptionInput> <OptionInput value="nomicGPU">Nomic Embed Text v1.5 (GPU)</OptionInput>
<OptionInput value="bgeSmallEnGPU">BGE Small English (GPU)</OptionInput> <OptionInput value="bgeSmallEnGPU">BGE Small English (GPU)</OptionInput>
<OptionInput value="bgem3GPU">BGE Medium 3 (GPU)</OptionInput> <OptionInput value="bgem3GPU">BGE Medium 3 (GPU)</OptionInput>
<OptionInput value="multiMiniLMGPU">Multilingual MiniLM L12 v2 (GPU)</OptionInput> <OptionInput value="multiMiniLMGPU">Multilingual MiniLM L12 v2 (GPU)</OptionInput>
<OptionInput value="bgeM3KoGPU">BGE Medium 3 Korean (GPU)</OptionInput>
{/if} {/if}
<OptionInput value="MiniLM">MiniLM L6 v2 (CPU)</OptionInput> <OptionInput value="MiniLM">MiniLM L6 v2 (CPU)</OptionInput>
<OptionInput value="nomic">Nomic Embed Text v1.5 (CPU)</OptionInput> <OptionInput value="nomic">Nomic Embed Text v1.5 (CPU)</OptionInput>
<OptionInput value="bgeSmallEn">BGE Small English (CPU)</OptionInput> <OptionInput value="bgeSmallEn">BGE Small English (CPU)</OptionInput>
<OptionInput value="bgem3">BGE Medium 3 (CPU)</OptionInput> <OptionInput value="bgem3">BGE Medium 3 (CPU)</OptionInput>
<OptionInput value="multiMiniLM">Multilingual MiniLM L12 v2 (CPU)</OptionInput> <OptionInput value="multiMiniLM">Multilingual MiniLM L12 v2 (CPU)</OptionInput>
<OptionInput value="bgeM3Ko">BGE Medium 3 Korean (CPU)</OptionInput>
<OptionInput value="openai3small">OpenAI text-embedding-3-small</OptionInput> <OptionInput value="openai3small">OpenAI text-embedding-3-small</OptionInput>
<OptionInput value="openai3large">OpenAI text-embedding-3-large</OptionInput> <OptionInput value="openai3large">OpenAI text-embedding-3-large</OptionInput>
<OptionInput value="ada">OpenAI Ada</OptionInput> <OptionInput value="ada">OpenAI Ada</OptionInput>

View File

@@ -21,6 +21,8 @@ export const localModels = {
'bgem3GPU': 'Xenova/bge-m3', 'bgem3GPU': 'Xenova/bge-m3',
'multiMiniLM': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', 'multiMiniLM': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
'multiMiniLMGPU': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', 'multiMiniLMGPU': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
'bgeM3Ko': 'HyperBlaze/BGE-m3-ko',
'bgeM3KoGPU': 'HyperBlaze/BGE-m3-ko',
}, },
gpuModels:[ gpuModels:[
'MiniLMGPU', 'MiniLMGPU',
@@ -28,6 +30,7 @@ export const localModels = {
'bgeSmallEnGPU', 'bgeSmallEnGPU',
'bgem3GPU', 'bgem3GPU',
'multiMiniLMGPU', 'multiMiniLMGPU',
'bgeM3KoGPU',
] ]
} }

View File

@@ -66,7 +66,7 @@ export const runEmbedding = async (texts: string[], model:EmbeddingModel = 'Xeno
} }
extractor = await pipeline('feature-extraction', model, { extractor = await pipeline('feature-extraction', model, {
// Default dtype for webgpu is fp32, so we can use q8, which is the default dtype in wasm. // Default dtype for webgpu is fp32, so we can use q8, which is the default dtype in wasm.
...(device === 'webgpu' ? { dtype: "q8" } : {}), dtype: "q8",
device: device, device: device,
progress_callback: (progress) => { progress_callback: (progress) => {
console.log(progress) console.log(progress)