Files
risuai/src/ts/process/prompt.ts
2024-09-09 18:17:26 +09:00

469 lines
13 KiB
TypeScript

import { get } from "svelte/store";
import { tokenizeAccurate } from "../tokenizer";
import { DataBase, presetTemplate, setDatabase, type Database } from "../storage/database";
import { alertError, alertNormal } from "../alert";
import type { OobaChatCompletionRequestParams } from "../model/ooba";
export type PromptItem = PromptItemPlain|PromptItemTyped|PromptItemChat|PromptItemAuthorNote;
export type PromptType = PromptItem['type'];
export type PromptSettings = {
assistantPrefill: string
postEndInnerFormat: string
sendChatAsSystem: boolean
sendName: boolean
utilOverride: boolean
customChainOfThought?: boolean
maxThoughtTagDepth?: number
}
export interface PromptItemPlain {
type: 'plain'|'jailbreak'|'cot';
type2: 'normal'|'globalNote'|'main'
text: string;
role: 'user'|'bot'|'system';
}
export interface PromptItemTyped {
type: 'persona'|'description'|'lorebook'|'postEverything'|'memory'
innerFormat?: string
}
export interface PromptItemAuthorNote {
type : 'authornote'
innerFormat?: string
defaultText?: string
}
export interface PromptItemChat {
type: 'chat';
rangeStart: number;
rangeEnd: number|'end';
chatAsOriginalOnSystem?: boolean;
}
export async function tokenizePreset(prompts:PromptItem[], consti:boolean = false){
let total = 0
for(const prompt of prompts){
switch(prompt.type){
case 'plain':
case 'jailbreak':{
total += await tokenizeAccurate(prompt.text, consti)
break
}
case 'persona':
case 'description':
case 'lorebook':
case 'postEverything':
case 'authornote':
case 'memory':{
if(prompt.innerFormat){
total += await tokenizeAccurate(prompt.innerFormat, consti)
}
break
}
}
}
return total
}
export function detectPromptJSONType(text:string){
function notNull<T>(x:T|null):x is T{
return x !== null && x !== undefined
}
try {
const parsed = JSON.parse(text)
if(notNull(parsed.chat_completion_source) && Array.isArray(parsed.prompts)&& Array.isArray(parsed.prompt_order)){
return "STCHAT"
}
else if(notNull(parsed.temp) && notNull(parsed.rep_pen) && notNull(parsed.min_length)){
return "PARAMETERS"
}
else if(notNull(parsed.story_string) && notNull(parsed.chat_start)){
return "STCONTEXT"
}
else if(notNull(parsed.input_sequence) && notNull(parsed.output_sequence)){
return "STINST"
}
} catch (e) {}
return 'NOTSUPPORTED'
}
const typePriority = [
'STINST',
'PARAMETERS',
'STCONTEXT',
'STCHAT',
]
type InstData = {
"system_prompt": string,
"input_sequence": string,
"output_sequence": string,
"last_output_sequence": string,
"system_sequence": string,
"stop_sequence": string,
"system_sequence_prefix": string,
"system_sequence_suffix": string,
"first_output_sequence": string,
"output_suffix": string,
"input_suffix": string,
"system_suffix": string,
"user_alignment_message": string,
"system_same_as_user": boolean,
"last_system_sequence": string,
"first_input_sequence": string,
"last_input_sequence": string,
"name": string
}
export function stChatConvert(pre:any){
//ST preset
let promptTemplate = []
function findPrompt(identifier:number){
return pre.prompts.find((p:any) => p.identifier === identifier)
}
for(const prompt of pre?.prompt_order?.[0]?.order){
if(!prompt?.enabled){
continue
}
const p = findPrompt(prompt?.identifier ?? '')
if(p){
switch(p.identifier){
case 'main':{
promptTemplate.push({
type: 'plain',
type2: 'main',
text: p.content ?? "",
role: p.role ?? "system"
})
break
}
case 'jailbreak':
case 'nsfw':{
promptTemplate.push({
type: 'jailbreak',
type2: 'normal',
text: p.content ?? "",
role: p.role ?? "system"
})
break
}
case 'dialogueExamples':
case 'charPersonality':
case 'scenario':{
break //ignore
}
case 'chatHistory':{
promptTemplate.push({
type: 'chat',
rangeEnd: 'end',
rangeStart: 0
})
break
}
case 'worldInfoBefore':{
promptTemplate.push({
type: 'lorebook'
})
break
}
case 'worldInfoAfter':{
break
}
case 'charDescription':{
promptTemplate.push({
type: 'description'
})
break
}
case 'personaDescription':{
promptTemplate.push({
type: 'persona'
})
break
}
default:{
console.log(p)
promptTemplate.push({
type: 'plain',
type2: 'normal',
text: p.content ?? "",
role: p.role ?? "system"
})
}
}
}
else{
console.log("Prompt not found", prompt)
}
}
if(pre?.assistant_prefill){
promptTemplate.push({
type: 'postEverything'
})
promptTemplate.push({
type: 'plain',
type2: 'main',
text: `{{#if {{prefill_supported}}}}${pre?.assistant_prefill}{{/if}}`,
role: 'bot'
})
}
return promptTemplate
}
export const OobaParams = [
"tokenizer",
"min_p",
"top_k",
"repetition_penalty",
"repetition_penalty_range",
"typical_p",
"tfs",
"top_a",
"epsilon_cutoff",
"eta_cutoff",
"guidance_scale",
"negative_prompt",
"penalty_alpha",
"mirostat_mode",
"mirostat_tau",
"mirostat_eta",
"temperature_last",
"do_sample",
"seed",
"encoder_repetition_penalty",
"no_repeat_ngram_size",
"min_length",
"num_beams",
"length_penalty",
"early_stopping",
"truncation_length",
"max_tokens_second",
"custom_token_bans",
"auto_max_new_tokens",
"ban_eos_token",
"add_bos_token",
"skip_special_tokens",
"grammar_string"
]
export function promptConvertion(files:{ name: string, content: string, type:string }[]){
let preset = structuredClone(presetTemplate)
let instData = {
"system_prompt": "",
"input_sequence": "",
"output_sequence": "",
"last_output_sequence": "",
"system_sequence": "",
"stop_sequence": "",
"system_sequence_prefix": "",
"system_sequence_suffix": "",
"first_output_sequence": "",
"output_suffix": "",
"input_suffix": "",
"system_suffix": "",
"user_alignment_message": "",
"system_same_as_user": false,
"last_system_sequence": "",
"first_input_sequence": "",
"last_input_sequence": "",
"name": ""
}
let story_string = ''
let chat_start = ''
preset.name = ''
let type = ''
files = files.filter(x=>x.type !== 'NOTSUPPORTED').sort((a,b)=>{
return typePriority.indexOf(a.type) - typePriority.indexOf(b.type)
})
if(files.findIndex(x=>x.type === 'STINST') !== -1){
type = 'STINST'
}
if(files.findIndex(x=>x.type === 'STCHAT') !== -1){
if(type !== ''){
alertError(`Both ${type} and STCHAT are not supported together.`)
return
}
type = 'STCHAT'
}
let samplers:string[] = []
let oobaData:OobaChatCompletionRequestParams = {
mode: 'instruct',
}
for(let i=0;i<files.length;i++){
const file = files[i]
const data = JSON.parse(file.content)
const getParam = (setname:keyof(typeof preset), getname:string = '', arg:{
multiplier?: number
}={}) => {
if(getname === ''){
getname = setname
}
let multiplier = arg.multiplier ?? 1
if(samplers.includes(getname)){
//@ts-ignore
preset[setname] = data[getname] * multiplier
}
else{
// @ts-ignore
preset[setname] = -1000
}
if(OobaParams.includes(getname)){
oobaData[getname] = data[getname]
}
}
preset.name ||= instData.name ?? ''
switch(file.type){
case 'STINST':{
instData = data as InstData
if(data.system_same_as_user){
instData.system_sequence = ''
instData.system_sequence_prefix = instData.input_sequence
instData.system_sequence_suffix = instData.output_sequence
}
break
}
case 'PARAMETERS':{
samplers = data.samplers
getParam('temperature', 'temp', {multiplier: 100})
getParam('top_p')
getParam('top_k')
getParam('top_a')
getParam('min_p')
getParam('repetition_penalty', 'rep_pen')
getParam('frequencyPenalty', 'freq_pen', {multiplier: 100})
getParam('PresensePenalty', 'presence_penalty', {multiplier: 100})
for(const key of OobaParams){
if(samplers.includes(key) && (data[key] !== undefined) && (data[key] !== null)){
oobaData[key] = data[key]
}
}
break
}
case 'STCONTEXT':{
story_string = data.story_string
chat_start = data.chat_start
break
}
case 'STCHAT':{
samplers = []
getParam('temperature', 'temperature', {multiplier: 100})
getParam('top_p')
getParam('top_k')
getParam('top_a')
getParam('min_p')
getParam('repetition_penalty', 'repetition_penalty')
getParam('frequencyPenalty', 'frequency_penalty', {multiplier: 100})
getParam('PresensePenalty', 'presence_penalty', {multiplier: 100})
const prompts = stChatConvert(data)
preset.promptTemplate = prompts
}
}
}
if(type === 'STCHAT'){
preset.aiModel = 'openrouter'
preset.subModel = 'openrouter'
const db = get(DataBase)
db.botPresets.push(preset)
setDatabase(db)
alertNormal('Preset converted successfully. You can find it in bot setting presets')
return
}
preset.reverseProxyOobaArgs = oobaData
preset.promptTemplate = [{
type: 'plain',
type2: 'main',
text: '',
role: 'system'
},{
type: 'description',
},{
type: 'persona',
},{
type: 'lorebook',
},{
type: 'chat',
rangeStart: 0,
rangeEnd: 'end',
}, {
type: 'authornote',
}, {
type: 'plain',
type2: 'globalNote',
text: '',
role: 'system'
}]
//build a jinja template from the instData
let jinja = ''
jinja += story_string
.replace(/{{user}}/gi, '{{risu_user}}')
.replace(/{{user}}/gi, '{{risu_user}}')
.replace(/{{system_prompt}}/gi, instData.system_prompt)
.replace(/{{system}}/gi, instData.system_prompt)
.replace(/{{#if (.+?){{\/if}}/gis, '')
.replace(/{{(.+?)}}/gi, '')
.replace(/\n\n+/g, '\n\n')
jinja += chat_start
jinja += `{% for message in messages %}`
jinja += `{% if message.role == 'user' %}`
jinja += instData.input_sequence
jinja += `{{ message.content }}`
jinja += instData.input_suffix
jinja += `{% endif %}`
jinja += `{% if message.role == 'assistant' %}`
jinja += instData.output_sequence
jinja += `{{ message.content }}`
jinja += instData.output_suffix
jinja += `{% endif %}`
jinja += `{% if message.role == 'system' %}`
jinja += instData.system_sequence
jinja += instData.system_sequence_prefix
jinja += `{{ message.content }}`
jinja += instData.system_sequence_suffix
jinja += instData.system_suffix
jinja += `{% endif %}`
jinja += `{% endfor %}`
jinja += instData.output_sequence
preset.instructChatTemplate = "jinja"
preset.JinjaTemplate = jinja
preset.aiModel = 'openrouter'
preset.subModel = 'openrouter'
preset.useInstructPrompt = true
preset.name ||= 'Converted from JSON'
const db = get(DataBase)
db.botPresets.push(preset)
setDatabase(db)
alertNormal('Preset converted successfully. You can find it in bot setting presets')
}