diff --git a/package.json b/package.json index 826f974c..cad419b9 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "fflate": "^0.8.1", "gpt-3-encoder": "^1.1.4", "gpt3-tokenizer": "^1.1.5", + "highlight.js": "^11.9.0", "html-to-image": "^1.11.11", "isomorphic-dompurify": "^1.13.0", "jszip": "^3.10.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 29001994..3a66ddc5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -92,6 +92,9 @@ importers: gpt3-tokenizer: specifier: ^1.1.5 version: 1.1.5 + highlight.js: + specifier: ^11.9.0 + version: 11.9.0 html-to-image: specifier: ^1.11.11 version: 1.11.11 @@ -2024,6 +2027,10 @@ packages: resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==} hasBin: true + highlight.js@11.9.0: + resolution: {integrity: sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==} + engines: {node: '>=12.0.0'} + hosted-git-info@2.8.9: resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==} @@ -5617,6 +5624,8 @@ snapshots: he@1.2.0: {} + highlight.js@11.9.0: {} + hosted-git-info@2.8.9: {} hosted-git-info@4.1.0: diff --git a/src/ts/parser.ts b/src/ts/parser.ts index 4f356c2a..68bc30f6 100644 --- a/src/ts/parser.ts +++ b/src/ts/parser.ts @@ -9,23 +9,43 @@ import { CurrentChat, SizeStore, selectedCharID } from './stores'; import { calcString } from './process/infunctions'; import { findCharacterbyId, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util'; import { getInlayImage, writeInlayImage } from './process/files/image'; -import { risuFormater } from './plugins/automark'; import { getModuleLorebooks } from './process/modules'; import { HypaProcesser } from './process/memory/hypamemory'; import { generateAIImage } from './process/stableDiff'; import { requestChatData } from './process/request'; import type { OpenAIChat } from './process'; import { alertInput, alertNormal } from './alert'; +import hljs from 'highlight.js/lib/core' +import hljavascript from 'highlight.js/lib/languages/javascript'; +import hlpython from 'highlight.js/lib/languages/python'; +import hlcss from 'highlight.js/lib/languages/css'; +import hlhtml from 'highlight.js/lib/languages/xml'; +import hllua from 'highlight.js/lib/languages/lua'; +import 'highlight.js/styles/atom-one-dark.min.css' + +hljs.registerLanguage('javascript', hljavascript); +hljs.registerLanguage('python', hlpython); +hljs.registerLanguage('css', hlcss); +hljs.registerLanguage('html', hlhtml); +hljs.registerLanguage('lua', hllua); const mconverted = markdownit({ html: true, breaks: true, - linkify: false + linkify: false, + typographer: true, + quotes: '\u{E9b0}\u{E9b1}\u{E9b2}\u{E9b3}', //placeholder characters to convert to real quotes + highlight: function (str, lang) { + if (lang && hljs.getLanguage(lang)) { + try { + return '
' + hljs.highlight(lang, str, true).value + '
'; + } catch (__) {} + } + return '' + } }) mconverted.disable(['code']) - - DOMPurify.addHook("uponSanitizeElement", (node: HTMLElement, data) => { if (data.tagName === "iframe") { const src = node.getAttribute("src") || ""; @@ -44,6 +64,9 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => { case 'class':{ if(data.attrValue){ data.attrValue = data.attrValue.split(' ').map((v) => { + if(v.startsWith('hljs')){ + return v + } return "x-risu-" + v }).join(' ') } @@ -60,6 +83,14 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => { } }) +function renderMarkdown(data:string){ + return mconverted.render(data) + .replace(/\uE9b0/gu, '“') + .replace(/\uE9b1/gu, '”') + .replace(/\uE9b2/gu, '‘') + .replace(/\uE9b3/gu, '’') +} + export const assetRegex = /{{(raw|img|video|audio|bg|emotion|asset|video-img)::(.+?)}}/g @@ -180,8 +211,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac data = encodeStyle(data) if(mode === 'normal'){ - data = risuFormater(data) - data = mconverted.render(data) + data = renderMarkdown(data) } return decodeStyle(DOMPurify.sanitize(data, { ADD_TAGS: ["iframe", "style", "risu-style", "x-em"], @@ -190,7 +220,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac } export function postTranslationParse(data:string){ - let lines = risuFormater(data).split('\n') + let lines = data.split('\n') for(let i=0;i','{','}','[',']','(',')','-',':',';','…','—','–','_','*','+','/','\\','|','!','?','.',',',' '] -const symbols = ['<','>','{','}','[',']','(',')','-',':',';','…','—','–','_','*','+','/','\\','|','!','?','.',',',' ', '\n', '。', '、', '!', '?', ',', ';', ':', '(', ')', '【', '】', '「', '」', '『', '』', '“', '”', '‘', '’', '《', '》', '〈', '〉', '‹', '›', '«', '»', '‟', '„'] - -const selfClosingTags = [ - 'br','hr','img','input','meta','link','base','area','col','command','embed','keygen','param','source','track','wbr', - //self closing tags defined by HTML5 - '!', - //for doctype and comment - 'user', 'bot', 'char' - //special tags for user, bot, and char -] - -const checkSelfClosingTag = (dat:string) => { - dat = dat.substring(0, 10) //we only need to check the first 10 characters, to avoid checking the whole string - dat = dat.toLowerCase() //we don't care about the case - for(const tag of selfClosingTags){ - if(dat.startsWith(tag)){ - return true - } - } - return false -} - -export function risuFormater(dat:string){ - const lines:[string,string][] = [['','']] // [type, content] - let htmlType = 0 // 0: not inside tag, 1: closing tag, 2: opening tag - for(let i=0;i { - return lines[lines.length-1] ?? [ - 'not-found', '' - ] - } - //html tag handling - if(dat[i] === '<' && getLastLine()[0] !== 'code-block'){ - lines.push(['html-tag','']) - if(dat[i+1] === '/'){ - htmlType = 1 - } - else{ - htmlType = 2 - } - } - - if(dat[i] === '>' && getLastLine()[0] === 'html-tag'){ - const pop = lines.pop() - const tagAttr = pop[1].substring(1).trim() - if(htmlType === 1){ - const pop2 = lines.pop() //probably html-inner - const chunk = pop2[1] + pop[1] + '>' - if(getLastLine()[0] === ''){ - lines.push(['html-chunk',chunk]) - lines.push(['','']) - } - else{ - getLastLine()[1] += chunk - } - continue - } - else if(checkSelfClosingTag(tagAttr)){ - const chunk = pop[1] + '>' - if(getLastLine()[0] === ''){ - lines.push(['html-chunk',chunk]) - lines.push(['','']) - } - else{ - getLastLine()[1] += chunk - } - continue - } - else{ - lines.push(['html-inner',pop[1]]) - } - htmlType = 0 - } - - //code block handling - - if(dat[i] === '`' && dat[i+1] === '`' && dat[i+2] === '`' && getLastLine()[0] === ''){ - if(getLastLine()[0] === 'code-block'){ - getLastLine()[1] += '```' - lines.push(['','']) - } - else{ - lines.push(['code-block','```']) - } - i += 2 - continue - } - - - if(dat[i] === '\n' && getLastLine()[0] === ''){ - lines.push(['newline','\n']) - lines.push(['','']) - } - else if(lines[lines.length-1]){ - lines[lines.length-1][1] += dat[i] - } - } - - let result = '' - for(let i=0;i') || line.endsWith('}') || line.startsWith('<')){ - endMarked = true - } - - if(isNumbered || endMarked){ - result += line - continue - } - - let depth = 0 - let depthChunk:string[] = [''] - let depthChunkType:string[] = [''] - - //spaces for detection - line = ' ' + line + ' ' - - const isNotCharacter = (t:string) => { - return symbols.includes(t) - } - - for(let j=0;j${pop}${line[j]}` - } - else{ - depthChunkType.push('"') - depthChunk.push(line[j]) - depth++ - } - break - } - case "'": - case '‘': - case '’':{ - if(depthChunkType[depth] === "'"){ - if(isNotCharacter(line[j-1]) || !isNotCharacter(line[j+1]) || (line[j-2] === 'i' && line[j-1] === 'n')){ - //this is not a quote - depthChunk[depth] += line[j] - } - else{ - depthChunkType.pop() - const pop = depthChunk.pop() - depth-- - depthChunk[depth] += `${pop}${line[j]}` - } - } - else{ - if(!isNotCharacter(line[j-1]) || isNotCharacter(line[j+1])){ - //this is not a quote - depthChunk[depth] += line[j] - } - else{ - depthChunkType.push("'") - depthChunk.push(line[j]) - depth++ - } - } - break - - } - - default:{ - depthChunk[depth] += line[j] - } - } - } - - let lineResult = '' - - while(depthChunk.length > 0){ - lineResult = depthChunk.pop() + lineResult - } - - if(lineResult.startsWith(' ')){ - lineResult = lineResult.substring(1) - } - if(lineResult.endsWith(' ')){ - lineResult = lineResult.substring(0,lineResult.length-1) - } - - result += lineResult - } - - return result.trim() -} \ No newline at end of file