Experimental llamacpp support
This commit is contained in:
@@ -2,7 +2,7 @@ import { invoke } from "@tauri-apps/api/tauri";
|
||||
import { globalFetch } from "src/ts/storage/globalApi";
|
||||
import { sleep } from "src/ts/util";
|
||||
import * as path from "@tauri-apps/api/path";
|
||||
import { exists } from "@tauri-apps/api/fs";
|
||||
import { exists, readTextFile } from "@tauri-apps/api/fs";
|
||||
import { alertClear, alertError, alertMd, alertWait } from "src/ts/alert";
|
||||
import { get } from "svelte/store";
|
||||
import { DataBase } from "src/ts/storage/database";
|
||||
@@ -130,7 +130,7 @@ export async function loadExllamaFull(){
|
||||
}
|
||||
|
||||
|
||||
export async function runLocalModel(prompt:string){
|
||||
async function runLocalModelOld(prompt:string){
|
||||
const db = get(DataBase)
|
||||
|
||||
if(!serverRunning){
|
||||
@@ -155,48 +155,139 @@ export async function runLocalModel(prompt:string){
|
||||
console.log(gen)
|
||||
}
|
||||
|
||||
let initPython = false
|
||||
export async function installPython(){
|
||||
if(initPython){
|
||||
return
|
||||
}
|
||||
initPython = true
|
||||
const appDir = await path.appDataDir()
|
||||
const completedPath = await path.join(appDir, 'python', 'completed.txt')
|
||||
if(await exists(completedPath)){
|
||||
const dependencies = [
|
||||
'pydantic',
|
||||
'scikit-build',
|
||||
'scikit-build-core',
|
||||
'pyproject_metadata',
|
||||
'pathspec',
|
||||
'llama-cpp-python',
|
||||
'uvicorn[standard]',
|
||||
'fastapi'
|
||||
]
|
||||
for(const dep of dependencies){
|
||||
alertWait("Installing Python Dependencies (" + dep + ")")
|
||||
await invoke('install_py_dependencies', {
|
||||
path: appDir,
|
||||
dependency: dep
|
||||
})
|
||||
}
|
||||
|
||||
const srvPath = await resolveResource('/src-python/')
|
||||
await invoke('run_py_server', {
|
||||
pyPath: appDir,
|
||||
alertWait("Python is already installed, skipping")
|
||||
}
|
||||
else{
|
||||
alertWait("Installing Python")
|
||||
await invoke("install_python", {
|
||||
path: appDir
|
||||
})
|
||||
alertWait("Installing Pip")
|
||||
await invoke("install_pip", {
|
||||
path: appDir
|
||||
})
|
||||
alertWait("Rewriting requirements")
|
||||
await invoke('post_py_install', {
|
||||
path: appDir
|
||||
})
|
||||
|
||||
alertClear()
|
||||
}
|
||||
const dependencies = [
|
||||
'pydantic',
|
||||
'scikit-build',
|
||||
'scikit-build-core',
|
||||
'pyproject_metadata',
|
||||
'pathspec',
|
||||
'llama-cpp-python',
|
||||
'uvicorn[standard]',
|
||||
'fastapi'
|
||||
]
|
||||
for(const dep of dependencies){
|
||||
alertWait("Installing Python Dependencies (" + dep + ")")
|
||||
await invoke('install_py_dependencies', {
|
||||
path: appDir,
|
||||
dependency: dep
|
||||
})
|
||||
alertMd("Python Server is running at: " + srvPath)
|
||||
return
|
||||
}
|
||||
|
||||
alertWait("Installing Python")
|
||||
await invoke("install_python", {
|
||||
path: appDir
|
||||
await invoke('run_py_server', {
|
||||
pyPath: appDir,
|
||||
})
|
||||
alertWait("Installing Pip")
|
||||
await invoke("install_pip", {
|
||||
path: appDir
|
||||
})
|
||||
alertWait("Rewriting requirements")
|
||||
await invoke('post_py_install', {
|
||||
path: appDir
|
||||
await sleep(4000)
|
||||
alertClear()
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
export async function getLocalKey(retry = true) {
|
||||
try {
|
||||
const ft = await fetch("http://localhost:10026/")
|
||||
const keyJson = await ft.json()
|
||||
const keyPath = keyJson.dir
|
||||
const key = await readTextFile(keyPath)
|
||||
return key
|
||||
} catch (error) {
|
||||
if(!retry){
|
||||
throw `Error when getting local key: ${error}`
|
||||
}
|
||||
//if is cors error
|
||||
if(
|
||||
error.message.includes("NetworkError when attempting to fetch resource.")
|
||||
|| error.message.includes("Failed to fetch")
|
||||
){
|
||||
await installPython()
|
||||
return await getLocalKey(false)
|
||||
}
|
||||
else{
|
||||
throw `Error when getting local key: ${error}`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runGGUFModel(arg:{
|
||||
prompt: string
|
||||
modelPath: string
|
||||
temperature: number
|
||||
top_p: number
|
||||
top_k: number
|
||||
maxTokens: number
|
||||
presencePenalty: number
|
||||
frequencyPenalty: number
|
||||
repeatPenalty: number
|
||||
maxContext: number
|
||||
stop: string[]
|
||||
}) {
|
||||
const key = await getLocalKey()
|
||||
const b = await fetch("http://localhost:10026/llamacpp", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-risu-auth": key
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: arg.prompt,
|
||||
model_path: arg.modelPath,
|
||||
temperature: arg.temperature,
|
||||
top_p: arg.top_p,
|
||||
top_k: arg.top_k,
|
||||
max_tokens: arg.maxTokens,
|
||||
presence_penalty: arg.presencePenalty,
|
||||
frequency_penalty: arg.frequencyPenalty,
|
||||
repeat_penalty: arg.repeatPenalty,
|
||||
n_ctx: arg.maxContext,
|
||||
stop: arg.stop
|
||||
})
|
||||
})
|
||||
|
||||
alertClear()
|
||||
return b.body
|
||||
}
|
||||
|
||||
export async function tokenizeGGUFModel(prompt:string):Promise<number[]> {
|
||||
const key = await getLocalKey()
|
||||
const db = get(DataBase)
|
||||
const modelPath = db.aiModel.replace('local_', '')
|
||||
const b = await fetch("http://localhost:10026/llamacpp/tokenize", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-risu-auth": key
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: prompt,
|
||||
n_ctx: db.maxContext,
|
||||
model_path: modelPath
|
||||
})
|
||||
})
|
||||
|
||||
return await b.json()
|
||||
}
|
||||
@@ -10,7 +10,7 @@ import { createDeep } from "./deepai";
|
||||
import { hubURL } from "../characterCards";
|
||||
import { NovelAIBadWordIds, stringlizeNAIChat } from "./models/nai";
|
||||
import { strongBan, tokenizeNum } from "../tokenizer";
|
||||
import { runLocalModel } from "./models/local";
|
||||
import { runGGUFModel } from "./models/local";
|
||||
import { risuChatParser } from "../parser";
|
||||
import { SignatureV4 } from "@smithy/signature-v4";
|
||||
import { HttpRequest } from "@smithy/protocol-http";
|
||||
@@ -1685,7 +1685,36 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
|
||||
const suggesting = model === "submodel"
|
||||
const proompt = stringlizeChatOba(formated, currentChar.name, suggesting, arg.continue)
|
||||
const stopStrings = getStopStrings(suggesting)
|
||||
await runLocalModel(proompt)
|
||||
const modelPath = aiModel.replace('local_', '')
|
||||
const res = await runGGUFModel({
|
||||
prompt: proompt,
|
||||
modelPath: modelPath,
|
||||
temperature: temperature,
|
||||
top_p: db.top_p,
|
||||
top_k: db.top_k,
|
||||
maxTokens: maxTokens,
|
||||
presencePenalty: arg.PresensePenalty || (db.PresensePenalty / 100),
|
||||
frequencyPenalty: arg.frequencyPenalty || (db.frequencyPenalty / 100),
|
||||
repeatPenalty: 0,
|
||||
maxContext: db.maxContext,
|
||||
stop: stopStrings,
|
||||
})
|
||||
let decoded = ''
|
||||
const transtream = new TransformStream<Uint8Array, StreamResponseChunk>({
|
||||
async transform(chunk, control) {
|
||||
const decodedChunk = new TextDecoder().decode(chunk)
|
||||
decoded += decodedChunk
|
||||
control.enqueue({
|
||||
"0": decoded
|
||||
})
|
||||
}
|
||||
})
|
||||
res.pipeTo(transtream.writable)
|
||||
|
||||
return {
|
||||
type: 'streaming',
|
||||
result: transtream.readable
|
||||
}
|
||||
}
|
||||
return {
|
||||
type: 'fail',
|
||||
|
||||
@@ -5,6 +5,7 @@ import { get } from "svelte/store";
|
||||
import type { OpenAIChat } from "./process";
|
||||
import { supportsInlayImage } from "./image";
|
||||
import { risuChatParser } from "./parser";
|
||||
import { tokenizeGGUFModel } from "./process/models/local";
|
||||
|
||||
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||
let db = get(DataBase)
|
||||
@@ -21,12 +22,14 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||
if(db.aiModel.startsWith('mistral')){
|
||||
return await tokenizeWebTokenizers(data, 'mistral')
|
||||
}
|
||||
if(db.aiModel.startsWith('local_') ||
|
||||
db.aiModel === 'mancer' ||
|
||||
if(db.aiModel === 'mancer' ||
|
||||
db.aiModel === 'textgen_webui' ||
|
||||
(db.aiModel === 'reverse_proxy' && db.reverseProxyOobaMode)){
|
||||
return await tokenizeWebTokenizers(data, 'llama')
|
||||
}
|
||||
if(db.aiModel.startsWith('local_')){
|
||||
return await tokenizeGGUFModel(data)
|
||||
}
|
||||
if(db.aiModel === 'ooba'){
|
||||
if(db.reverseProxyOobaArgs.tokenizer === 'mixtral' || db.reverseProxyOobaArgs.tokenizer === 'mistral'){
|
||||
return await tokenizeWebTokenizers(data, 'mistral')
|
||||
|
||||
Reference in New Issue
Block a user