[feat] new fast parser

This commit is contained in:
kwaroran
2023-07-17 16:30:55 +09:00
parent 1c409a04f7
commit 297568e704
5 changed files with 126 additions and 20 deletions

View File

@@ -18,6 +18,7 @@
"@dqbd/tiktoken": "^1.0.4", "@dqbd/tiktoken": "^1.0.4",
"@mlc-ai/web-tokenizers": "^0.1.0", "@mlc-ai/web-tokenizers": "^0.1.0",
"@tauri-apps/api": "1.4.0", "@tauri-apps/api": "1.4.0",
"@types/marked": "^5.0.1",
"blueimp-md5": "^2.19.0", "blueimp-md5": "^2.19.0",
"body-parser": "^1.20.2", "body-parser": "^1.20.2",
"buffer": "^6.0.3", "buffer": "^6.0.3",
@@ -33,6 +34,7 @@
"localforage": "^1.10.0", "localforage": "^1.10.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"lucide-svelte": "^0.260.0", "lucide-svelte": "^0.260.0",
"marked": "^5.1.1",
"ml-distance": "^4.0.1", "ml-distance": "^4.0.1",
"mobile-drag-drop": "3.0.0-rc.0", "mobile-drag-drop": "3.0.0-rc.0",
"msgpackr": "^1.9.5", "msgpackr": "^1.9.5",

16
pnpm-lock.yaml generated
View File

@@ -17,6 +17,9 @@ dependencies:
'@tauri-apps/api': '@tauri-apps/api':
specifier: 1.4.0 specifier: 1.4.0
version: 1.4.0 version: 1.4.0
'@types/marked':
specifier: ^5.0.1
version: 5.0.1
blueimp-md5: blueimp-md5:
specifier: ^2.19.0 specifier: ^2.19.0
version: 2.19.0 version: 2.19.0
@@ -62,6 +65,9 @@ dependencies:
lucide-svelte: lucide-svelte:
specifier: ^0.260.0 specifier: ^0.260.0
version: 0.260.0(svelte@3.58.0) version: 0.260.0(svelte@3.58.0)
marked:
specifier: ^5.1.1
version: 5.1.1
ml-distance: ml-distance:
specifier: ^4.0.1 specifier: ^4.0.1
version: 4.0.1 version: 4.0.1
@@ -805,6 +811,10 @@ packages:
resolution: {integrity: sha512-r22s9tAS7imvBt2lyHC9B8AGwWnXaYb1tY09oyLkXDs4vArpYJzw09nj8MLx5VfciBPGIb+ZwG0ssYnEPJxn/g==} resolution: {integrity: sha512-r22s9tAS7imvBt2lyHC9B8AGwWnXaYb1tY09oyLkXDs4vArpYJzw09nj8MLx5VfciBPGIb+ZwG0ssYnEPJxn/g==}
dev: true dev: true
/@types/marked@5.0.1:
resolution: {integrity: sha512-Y3pAUzHKh605fN6fvASsz5FDSWbZcs/65Q6xYRmnIP9ZIYz27T4IOmXfH9gWJV1dpi7f1e7z7nBGUTx/a0ptpA==}
dev: false
/@types/node@18.15.11: /@types/node@18.15.11:
resolution: {integrity: sha512-E5Kwq2n4SbMzQOn6wnmBjuK9ouqlURrcZDVfbo9ftDDTFt3nk7ZKK4GMOzoYgnpQJKcxwQw+lGaBvvlMo0qN/Q==} resolution: {integrity: sha512-E5Kwq2n4SbMzQOn6wnmBjuK9ouqlURrcZDVfbo9ftDDTFt3nk7ZKK4GMOzoYgnpQJKcxwQw+lGaBvvlMo0qN/Q==}
@@ -1922,6 +1932,12 @@ packages:
'@jridgewell/sourcemap-codec': 1.4.14 '@jridgewell/sourcemap-codec': 1.4.14
dev: true dev: true
/marked@5.1.1:
resolution: {integrity: sha512-bTmmGdEINWmOMDjnPWDxGPQ4qkDLeYorpYbEtFOXzOruTwUE671q4Guiuchn4N8h/v6NGd7916kXsm3Iz4iUSg==}
engines: {node: '>= 18'}
hasBin: true
dev: false
/media-typer@0.3.0: /media-typer@0.3.0:
resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==} resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
engines: {node: '>= 0.6'} engines: {node: '>= 0.6'}

View File

@@ -1,5 +1,5 @@
<script lang="ts"> <script lang="ts">
import { ParseMarkdown } from "src/ts/parser"; import { ParseMarkdown, risuChatParser } from "src/ts/parser";
import { DataBase, type Database, type character, type groupChat } from "src/ts/storage/database"; import { DataBase, type Database, type character, type groupChat } from "src/ts/storage/database";
import { selectedCharID } from "src/ts/stores"; import { selectedCharID } from "src/ts/stores";
import { onDestroy } from "svelte"; import { onDestroy } from "svelte";
@@ -43,7 +43,7 @@
{#if backgroundHTML} {#if backgroundHTML}
<div class="absolute top-0 left-0 w-full h-full"> <div class="absolute top-0 left-0 w-full h-full">
{#await ParseMarkdown(backgroundHTML, currentChar, 'back') then md} {#await ParseMarkdown(risuChatParser(backgroundHTML, {chara:currentChar}), currentChar, 'back') then md}
{@html md} {@html md}
{/await} {/await}
</div> </div>

View File

@@ -45,6 +45,10 @@ html, body{
color: var(--FontColorItalicBold); color: var(--FontColorItalicBold);
} }
.chattext em strong{
color: var(--FontColorItalicBold);
}
::-webkit-scrollbar { ::-webkit-scrollbar {
width: 5px; width: 5px;
height: 5px; height: 5px;

View File

@@ -1,5 +1,7 @@
import DOMPurify from 'isomorphic-dompurify'; import DOMPurify from 'isomorphic-dompurify';
import showdown from 'showdown'; import showdown from 'showdown';
import { Marked } from 'marked';
import { DataBase, type Database, type character, type groupChat } from './storage/database'; import { DataBase, type Database, type character, type groupChat } from './storage/database';
import { getFileSrc } from './storage/globalApi'; import { getFileSrc } from './storage/globalApi';
import { processScript, processScriptFull } from './process/scripts'; import { processScript, processScriptFull } from './process/scripts';
@@ -7,13 +9,23 @@ import { get } from 'svelte/store';
import css from '@adobe/css-tools' import css from '@adobe/css-tools'
import { selectedCharID } from './stores'; import { selectedCharID } from './stores';
import { calcString } from './process/infunctions'; import { calcString } from './process/infunctions';
import { findCharacterbyId } from './util';
const convertor = new showdown.Converter({ const convertora = new showdown.Converter({
simpleLineBreaks: true, simpleLineBreaks: true,
strikethrough: true, strikethrough: true,
tables: true tables: true
}) })
const mconverted = new Marked({
gfm: true,
breaks: true,
silent: true,
tokenizer: {
}
})
const safeConvertor = new showdown.Converter({ const safeConvertor = new showdown.Converter({
simpleLineBreaks: true, simpleLineBreaks: true,
strikethrough: true, strikethrough: true,
@@ -90,7 +102,7 @@ export async function ParseMarkdown(data:string, char:(character | groupChat) =
if(firstParsed !== data && char && char.type !== 'group'){ if(firstParsed !== data && char && char.type !== 'group'){
data = await parseAdditionalAssets(data, char, mode) data = await parseAdditionalAssets(data, char, mode)
} }
return DOMPurify.sanitize(convertor.makeHtml(data), { return DOMPurify.sanitize(mconverted.parse(data), {
ADD_TAGS: ["iframe"], ADD_TAGS: ["iframe"],
ADD_ATTR: ["allow", "allowfullscreen", "frameborder", "scrolling"], ADD_ATTR: ["allow", "allowfullscreen", "frameborder", "scrolling"],
}) })
@@ -242,15 +254,30 @@ function wppParser(data:string){
return characterDetails; return characterDetails;
} }
const rgx = /(?:{{|<)(.+?)(?:}}|>)/gm const rgx = /(?:{{|<)(.+?)(?:}}|>)/gm
export function risuChatParser(da:string, arg:{ export function risuChatParser(da:string, arg:{
chatID?:number chatID?:number
db?:Database db?:Database
chara?:string|character chara?:string|character|groupChat
} = {}):string{ } = {}):string{
const chatID = arg.chatID ?? -1 const chatID = arg.chatID ?? -1
const db = arg.db ?? get(DataBase) const db = arg.db ?? get(DataBase)
return da.replace(rgx, (v, p1:string) => { const aChara = arg.chara
let chara:character|string = null
if(aChara){
if(typeof(aChara) !== 'string' && aChara.type === 'group'){
const gc = findCharacterbyId(aChara.chats[aChara.chatPage].message.at(-1).saying ?? '')
if(gc.name !== 'Unknown Character'){
chara = gc
}
}
else{
chara = aChara
}
}
const matcher = (p1:string) => {
const lowerCased = p1.toLocaleLowerCase() const lowerCased = p1.toLocaleLowerCase()
switch(lowerCased){ switch(lowerCased){
case 'previous_char_chat':{ case 'previous_char_chat':{
@@ -285,7 +312,6 @@ export function risuChatParser(da:string, arg:{
} }
case 'char': case 'char':
case 'bot':{ case 'bot':{
const chara = arg.chara
if(chara){ if(chara){
if(typeof(chara) === 'string'){ if(typeof(chara) === 'string'){
return chara return chara
@@ -303,21 +329,21 @@ export function risuChatParser(da:string, arg:{
} }
case 'personality': case 'personality':
case 'char_persona':{ case 'char_persona':{
const argChara = arg.chara const argChara = chara
const chara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)]) const achara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(chara.type === 'group'){ if(achara.type === 'group'){
return "" return ""
} }
return chara.personality return achara.personality
} }
case 'persona': case 'persona':
case 'user_persona':{ case 'user_persona':{
const argChara = arg.chara const argChara = chara
const chara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)]) const achara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(chara.type === 'group'){ if(achara.type === 'group'){
return "" return ""
} }
return chara.personality return achara.personality
} }
case 'ujb': case 'ujb':
case 'global_note':{ case 'global_note':{
@@ -350,7 +376,7 @@ export function risuChatParser(da:string, arg:{
return `<button style="padding" x-risu-prompt="${arra[2]}">${arra[1]}</button>` return `<button style="padding" x-risu-prompt="${arra[2]}">${arra[1]}</button>`
} }
case 'risu':{ case 'risu':{
return `<img src="/logo2.png" />` return `<img src="/logo2.png" style="height:${v || 45}px;width:${v || 45}px" />`
} }
} }
} }
@@ -365,11 +391,69 @@ export function risuChatParser(da:string, arg:{
return arr[randomIndex] return arr[randomIndex]
} }
} }
return v return null
}) }
let pointer = 0;
let nested:string[] = [""]
let pf = performance.now()
let v = new Uint8Array(255)
while(pointer < da.length){
switch(da[pointer]){
case '{':{
if(da[pointer + 1] !== '{'){
nested[0] += da[pointer]
break
}
pointer++
nested.unshift('')
v[nested.length] = 1
break
}
case '<':{
nested.unshift('')
v[nested.length] = 2
break
}
case '}':{
if(da[pointer + 1] !== '}' || nested.length === 1 || v[nested.length] !== 1){
nested[0] += da[pointer]
break
}
pointer++
const dat = nested.shift()
const mc = matcher(dat)
nested[0] += mc ?? `{{${dat}}}`
break
}
case '>':{
if(nested.length === 1 || v[nested.length] !== 2){
break
}
const dat = nested.shift()
const mc = matcher(dat)
nested[0] += mc ?? `<${dat}>`
break
}
default:{
nested[0] += da[pointer]
break
}
}
pointer++
}
if(nested.length === 1){
return nested[0]
}
let result = ''
while(nested.length > 1){
let dat = (v[nested.length] === 1) ? '{{' : "<"
dat += nested.shift()
result = dat + result
}
return nested[0] + result
} }
function getVarChat(targetIndex = -1){ function getVarChat(targetIndex = -1){
const db = get(DataBase) const db = get(DataBase)
const selchar = db.characters[get(selectedCharID)] const selchar = db.characters[get(selectedCharID)]
@@ -430,7 +514,7 @@ function getVarChat(targetIndex = -1){
break break
} }
case "min":{ case "min":{
if(parseInt(vars[rule.key]) > parseInt(rule.arg)){ if(parseInt(vars[rule.key]) < parseInt(rule.arg)){
vars[rule.key] = rule.arg vars[rule.key] = rule.arg
} }
break break