[feat] new fast parser

This commit is contained in:
kwaroran
2023-07-17 16:30:55 +09:00
parent 1c409a04f7
commit 297568e704
5 changed files with 126 additions and 20 deletions

View File

@@ -18,6 +18,7 @@
"@dqbd/tiktoken": "^1.0.4",
"@mlc-ai/web-tokenizers": "^0.1.0",
"@tauri-apps/api": "1.4.0",
"@types/marked": "^5.0.1",
"blueimp-md5": "^2.19.0",
"body-parser": "^1.20.2",
"buffer": "^6.0.3",
@@ -33,6 +34,7 @@
"localforage": "^1.10.0",
"lodash": "^4.17.21",
"lucide-svelte": "^0.260.0",
"marked": "^5.1.1",
"ml-distance": "^4.0.1",
"mobile-drag-drop": "3.0.0-rc.0",
"msgpackr": "^1.9.5",

16
pnpm-lock.yaml generated
View File

@@ -17,6 +17,9 @@ dependencies:
'@tauri-apps/api':
specifier: 1.4.0
version: 1.4.0
'@types/marked':
specifier: ^5.0.1
version: 5.0.1
blueimp-md5:
specifier: ^2.19.0
version: 2.19.0
@@ -62,6 +65,9 @@ dependencies:
lucide-svelte:
specifier: ^0.260.0
version: 0.260.0(svelte@3.58.0)
marked:
specifier: ^5.1.1
version: 5.1.1
ml-distance:
specifier: ^4.0.1
version: 4.0.1
@@ -805,6 +811,10 @@ packages:
resolution: {integrity: sha512-r22s9tAS7imvBt2lyHC9B8AGwWnXaYb1tY09oyLkXDs4vArpYJzw09nj8MLx5VfciBPGIb+ZwG0ssYnEPJxn/g==}
dev: true
/@types/marked@5.0.1:
resolution: {integrity: sha512-Y3pAUzHKh605fN6fvASsz5FDSWbZcs/65Q6xYRmnIP9ZIYz27T4IOmXfH9gWJV1dpi7f1e7z7nBGUTx/a0ptpA==}
dev: false
/@types/node@18.15.11:
resolution: {integrity: sha512-E5Kwq2n4SbMzQOn6wnmBjuK9ouqlURrcZDVfbo9ftDDTFt3nk7ZKK4GMOzoYgnpQJKcxwQw+lGaBvvlMo0qN/Q==}
@@ -1922,6 +1932,12 @@ packages:
'@jridgewell/sourcemap-codec': 1.4.14
dev: true
/marked@5.1.1:
resolution: {integrity: sha512-bTmmGdEINWmOMDjnPWDxGPQ4qkDLeYorpYbEtFOXzOruTwUE671q4Guiuchn4N8h/v6NGd7916kXsm3Iz4iUSg==}
engines: {node: '>= 18'}
hasBin: true
dev: false
/media-typer@0.3.0:
resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
engines: {node: '>= 0.6'}

View File

@@ -1,5 +1,5 @@
<script lang="ts">
import { ParseMarkdown } from "src/ts/parser";
import { ParseMarkdown, risuChatParser } from "src/ts/parser";
import { DataBase, type Database, type character, type groupChat } from "src/ts/storage/database";
import { selectedCharID } from "src/ts/stores";
import { onDestroy } from "svelte";
@@ -43,7 +43,7 @@
{#if backgroundHTML}
<div class="absolute top-0 left-0 w-full h-full">
{#await ParseMarkdown(backgroundHTML, currentChar, 'back') then md}
{#await ParseMarkdown(risuChatParser(backgroundHTML, {chara:currentChar}), currentChar, 'back') then md}
{@html md}
{/await}
</div>

View File

@@ -45,6 +45,10 @@ html, body{
color: var(--FontColorItalicBold);
}
.chattext em strong{
color: var(--FontColorItalicBold);
}
::-webkit-scrollbar {
width: 5px;
height: 5px;

View File

@@ -1,5 +1,7 @@
import DOMPurify from 'isomorphic-dompurify';
import showdown from 'showdown';
import { Marked } from 'marked';
import { DataBase, type Database, type character, type groupChat } from './storage/database';
import { getFileSrc } from './storage/globalApi';
import { processScript, processScriptFull } from './process/scripts';
@@ -7,13 +9,23 @@ import { get } from 'svelte/store';
import css from '@adobe/css-tools'
import { selectedCharID } from './stores';
import { calcString } from './process/infunctions';
import { findCharacterbyId } from './util';
const convertor = new showdown.Converter({
const convertora = new showdown.Converter({
simpleLineBreaks: true,
strikethrough: true,
tables: true
})
const mconverted = new Marked({
gfm: true,
breaks: true,
silent: true,
tokenizer: {
}
})
const safeConvertor = new showdown.Converter({
simpleLineBreaks: true,
strikethrough: true,
@@ -90,7 +102,7 @@ export async function ParseMarkdown(data:string, char:(character | groupChat) =
if(firstParsed !== data && char && char.type !== 'group'){
data = await parseAdditionalAssets(data, char, mode)
}
return DOMPurify.sanitize(convertor.makeHtml(data), {
return DOMPurify.sanitize(mconverted.parse(data), {
ADD_TAGS: ["iframe"],
ADD_ATTR: ["allow", "allowfullscreen", "frameborder", "scrolling"],
})
@@ -242,15 +254,30 @@ function wppParser(data:string){
return characterDetails;
}
const rgx = /(?:{{|<)(.+?)(?:}}|>)/gm
export function risuChatParser(da:string, arg:{
chatID?:number
db?:Database
chara?:string|character
chara?:string|character|groupChat
} = {}):string{
const chatID = arg.chatID ?? -1
const db = arg.db ?? get(DataBase)
return da.replace(rgx, (v, p1:string) => {
const aChara = arg.chara
let chara:character|string = null
if(aChara){
if(typeof(aChara) !== 'string' && aChara.type === 'group'){
const gc = findCharacterbyId(aChara.chats[aChara.chatPage].message.at(-1).saying ?? '')
if(gc.name !== 'Unknown Character'){
chara = gc
}
}
else{
chara = aChara
}
}
const matcher = (p1:string) => {
const lowerCased = p1.toLocaleLowerCase()
switch(lowerCased){
case 'previous_char_chat':{
@@ -285,7 +312,6 @@ export function risuChatParser(da:string, arg:{
}
case 'char':
case 'bot':{
const chara = arg.chara
if(chara){
if(typeof(chara) === 'string'){
return chara
@@ -303,21 +329,21 @@ export function risuChatParser(da:string, arg:{
}
case 'personality':
case 'char_persona':{
const argChara = arg.chara
const chara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(chara.type === 'group'){
const argChara = chara
const achara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(achara.type === 'group'){
return ""
}
return chara.personality
return achara.personality
}
case 'persona':
case 'user_persona':{
const argChara = arg.chara
const chara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(chara.type === 'group'){
const argChara = chara
const achara = (argChara && typeof(argChara) !== 'string') ? argChara : (db.characters[get(selectedCharID)])
if(achara.type === 'group'){
return ""
}
return chara.personality
return achara.personality
}
case 'ujb':
case 'global_note':{
@@ -350,7 +376,7 @@ export function risuChatParser(da:string, arg:{
return `<button style="padding" x-risu-prompt="${arra[2]}">${arra[1]}</button>`
}
case 'risu':{
return `<img src="/logo2.png" />`
return `<img src="/logo2.png" style="height:${v || 45}px;width:${v || 45}px" />`
}
}
}
@@ -365,10 +391,68 @@ export function risuChatParser(da:string, arg:{
return arr[randomIndex]
}
}
return v
})
}
return null
}
let pointer = 0;
let nested:string[] = [""]
let pf = performance.now()
let v = new Uint8Array(255)
while(pointer < da.length){
switch(da[pointer]){
case '{':{
if(da[pointer + 1] !== '{'){
nested[0] += da[pointer]
break
}
pointer++
nested.unshift('')
v[nested.length] = 1
break
}
case '<':{
nested.unshift('')
v[nested.length] = 2
break
}
case '}':{
if(da[pointer + 1] !== '}' || nested.length === 1 || v[nested.length] !== 1){
nested[0] += da[pointer]
break
}
pointer++
const dat = nested.shift()
const mc = matcher(dat)
nested[0] += mc ?? `{{${dat}}}`
break
}
case '>':{
if(nested.length === 1 || v[nested.length] !== 2){
break
}
const dat = nested.shift()
const mc = matcher(dat)
nested[0] += mc ?? `<${dat}>`
break
}
default:{
nested[0] += da[pointer]
break
}
}
pointer++
}
if(nested.length === 1){
return nested[0]
}
let result = ''
while(nested.length > 1){
let dat = (v[nested.length] === 1) ? '{{' : "<"
dat += nested.shift()
result = dat + result
}
return nested[0] + result
}
function getVarChat(targetIndex = -1){
const db = get(DataBase)
@@ -430,7 +514,7 @@ function getVarChat(targetIndex = -1){
break
}
case "min":{
if(parseInt(vars[rule.key]) > parseInt(rule.arg)){
if(parseInt(vars[rule.key]) < parseInt(rule.arg)){
vars[rule.key] = rule.arg
}
break