Fix hypav2 issue where the 'search_document: ' string is not truncated correctly.

This commit is contained in:
Bo26fhmC5M
2024-12-03 22:12:53 +09:00
parent 8d1e7cc5a8
commit dbd7485c50

View File

@@ -230,6 +230,7 @@ export async function hypaMemoryV2(
}
// Fetch additional memory from chunks
const searchDocumentPrefix = "search_document: ";
const processor = new HypaProcesser(db.hypaModel);
processor.oaikey = db.supaMemoryKey;
@@ -249,7 +250,7 @@ export async function hypaMemoryV2(
console.log("Older Chunks:", olderChunks);
// Add older chunks to processor for similarity search
await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => "search_document: " + v.text.trim()));
await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => searchDocumentPrefix + v.text.trim()));
let scoredResults: { [key: string]: number } = {};
for (let i = 0; i < 3; i++) {
@@ -267,9 +268,10 @@ export async function hypaMemoryV2(
let chunkResultTokens = 0;
while (allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && scoredArray.length > 0) {
const [text] = scoredArray.shift();
const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(14) });
const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(searchDocumentPrefix.length) });
if (tokenized > allocatedTokens - mainPromptTokens - chunkResultTokens) break;
chunkResultPrompts += text.substring(14) + '\n\n';
// Ensure strings are truncated correctly using searchDocumentPrefix.length
chunkResultPrompts += text.substring(searchDocumentPrefix.length) + '\n\n';
chunkResultTokens += tokenized;
}