From 376fa1641b6bf6c9a3efa8b32ad5aeceaf328fe1 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Tue, 23 Apr 2024 20:41:08 +0900 Subject: [PATCH] Add nomic embedding --- src/ts/process/memory/hypamemory.ts | 8 ++++---- src/ts/process/memory/termMemory.ts | 2 +- src/ts/process/transformers.ts | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ts/process/memory/hypamemory.ts b/src/ts/process/memory/hypamemory.ts index 94ec6a88..9904885a 100644 --- a/src/ts/process/memory/hypamemory.ts +++ b/src/ts/process/memory/hypamemory.ts @@ -8,9 +8,9 @@ export class HypaProcesser{ oaikey:string vectors:memoryVector[] forage:LocalForage - model:'ada'|'MiniLM' + model:'ada'|'MiniLM'|'nomic' - constructor(model:'ada'|'MiniLM'){ + constructor(model:'ada'|'MiniLM'|'nomic'){ this.forage = localforage.createInstance({ name: "hypaVector" }) @@ -36,11 +36,11 @@ export class HypaProcesser{ async getEmbeds(input:string[]|string) { - if(this.model === 'MiniLM'){ + if(this.model === 'MiniLM' || this.model === 'nomic'){ const inputs:string[] = Array.isArray(input) ? input : [input] let results:Float32Array[] = [] for(let i=0;ichat.content)) let scoredResults:{[key:string]:number} diff --git a/src/ts/process/transformers.ts b/src/ts/process/transformers.ts index 27827777..1c64a547 100644 --- a/src/ts/process/transformers.ts +++ b/src/ts/process/transformers.ts @@ -50,12 +50,12 @@ export const runSummarizer = async (text: string) => { } let extractor:FeatureExtractionPipeline = null -export const runEmbedding = async (text: string):Promise => { +export const runEmbedding = async (text: string, model:'Xenova/all-MiniLM-L6-v2'|'nomic-ai/nomic-embed-text-v1.5' = 'Xenova/all-MiniLM-L6-v2'):Promise => { await initTransformers() if(!extractor){ - extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); + extractor = await pipeline('feature-extraction', model); } - const tokenizer = await AutoTokenizer.from_pretrained('Xenova/all-MiniLM-L6-v2'); + const tokenizer = await AutoTokenizer.from_pretrained(model); const tokens = tokenizer.encode(text) if (tokens.length > 1024) { let chunks:string[] = []