Remove unessesary convertions
This commit is contained in:
@@ -17,10 +17,10 @@ export class HypaProcesser{
|
|||||||
this.model = model
|
this.model = model
|
||||||
}
|
}
|
||||||
|
|
||||||
async embedDocuments(texts: string[]): Promise<number[][]> {
|
async embedDocuments(texts: string[]): Promise<VectorArray[]> {
|
||||||
const subPrompts = chunkArray(texts,512);
|
const subPrompts = chunkArray(texts,512);
|
||||||
|
|
||||||
const embeddings: number[][] = [];
|
const embeddings: VectorArray[] = [];
|
||||||
|
|
||||||
for (let i = 0; i < subPrompts.length; i += 1) {
|
for (let i = 0; i < subPrompts.length; i += 1) {
|
||||||
const input = subPrompts[i];
|
const input = subPrompts[i];
|
||||||
@@ -37,22 +37,8 @@ export class HypaProcesser{
|
|||||||
async getEmbeds(input:string[]|string) {
|
async getEmbeds(input:string[]|string) {
|
||||||
if(this.model === 'MiniLM' || this.model === 'nomic'){
|
if(this.model === 'MiniLM' || this.model === 'nomic'){
|
||||||
const inputs:string[] = Array.isArray(input) ? input : [input]
|
const inputs:string[] = Array.isArray(input) ? input : [input]
|
||||||
let results:Float32Array[] = []
|
let results:Float32Array[] = await runEmbedding(inputs, this.model === 'nomic' ? 'nomic-ai/nomic-embed-text-v1.5' : 'Xenova/all-MiniLM-L6-v2')
|
||||||
for(let i=0;i<inputs.length;i++){
|
return results
|
||||||
const res = await runEmbedding(inputs[i], this.model === 'nomic' ? 'nomic-ai/nomic-embed-text-v1.5' : 'Xenova/all-MiniLM-L6-v2')
|
|
||||||
results.push(res)
|
|
||||||
}
|
|
||||||
//convert to number[][]
|
|
||||||
const result:number[][] = []
|
|
||||||
for(let i=0;i<results.length;i++){
|
|
||||||
const res = results[i]
|
|
||||||
const arr:number[] = []
|
|
||||||
for(let j=0;j<res.length;j++){
|
|
||||||
arr.push(res[j])
|
|
||||||
}
|
|
||||||
result.push(arr)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
const gf = await globalFetch("https://api.openai.com/v1/embeddings", {
|
const gf = await globalFetch("https://api.openai.com/v1/embeddings", {
|
||||||
headers: {
|
headers: {
|
||||||
@@ -138,7 +124,7 @@ export class HypaProcesser{
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async similaritySearchVectorWithScore(
|
private async similaritySearchVectorWithScore(
|
||||||
query: number[],
|
query: VectorArray,
|
||||||
): Promise<[string, number][]> {
|
): Promise<[string, number][]> {
|
||||||
const memoryVectors = this.vectors
|
const memoryVectors = this.vectors
|
||||||
const searches = memoryVectors
|
const searches = memoryVectors
|
||||||
@@ -160,12 +146,18 @@ export class HypaProcesser{
|
|||||||
return similarity(query1, query2)
|
return similarity(query1, query2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function similarity(a:number[], b:number[]) {
|
function similarity(a:VectorArray, b:VectorArray) {
|
||||||
return a.reduce((acc, val, i) => acc + val * b[i], 0);
|
let dot = 0;
|
||||||
|
for(let i=0;i<a.length;i++){
|
||||||
|
dot += a[i] * b[i]
|
||||||
|
}
|
||||||
|
return dot
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type VectorArray = number[]|Float32Array
|
||||||
|
|
||||||
type memoryVector = {
|
type memoryVector = {
|
||||||
embedding:number[]
|
embedding:number[]|Float32Array,
|
||||||
content:string,
|
content:string,
|
||||||
alreadySaved?:boolean
|
alreadySaved?:boolean
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,14 +51,13 @@ export const runSummarizer = async (text: string) => {
|
|||||||
|
|
||||||
let extractor:FeatureExtractionPipeline = null
|
let extractor:FeatureExtractionPipeline = null
|
||||||
type EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'|'nomic-ai/nomic-embed-text-v1.5'
|
type EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'|'nomic-ai/nomic-embed-text-v1.5'
|
||||||
export const runEmbedding = async (text: string, model:EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'):Promise<Float32Array> => {
|
export const runEmbedding = async (texts: string[], model:EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'):Promise<Float32Array[]> => {
|
||||||
await initTransformers()
|
await initTransformers()
|
||||||
if(!extractor){
|
if(!extractor){
|
||||||
extractor = await pipeline('feature-extraction', model);
|
extractor = await pipeline('feature-extraction', model);
|
||||||
}
|
}
|
||||||
const tokenizer = await AutoTokenizer.from_pretrained(model);
|
let result = await extractor(texts, { pooling: 'mean', normalize: true });
|
||||||
let result = await extractor(text, { pooling: 'mean', normalize: true });
|
return (result.data as Float32Array[]) ?? null;
|
||||||
return (result?.data as Float32Array) ?? null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const runImageEmbedding = async (dataurl:string) => {
|
export const runImageEmbedding = async (dataurl:string) => {
|
||||||
|
|||||||
Reference in New Issue
Block a user