Add Llama3 tokenizer support
This commit is contained in:
410504
public/token/llama/llama3.json
Normal file
410504
public/token/llama/llama3.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@ export const tokenizerList = [
|
|||||||
['novelai', 'NovelAI'],
|
['novelai', 'NovelAI'],
|
||||||
['claude', 'Claude'],
|
['claude', 'Claude'],
|
||||||
['llama', 'Llama'],
|
['llama', 'Llama'],
|
||||||
|
['llama3', 'Llama3'],
|
||||||
['novellist', 'Novellist'],
|
['novellist', 'Novellist'],
|
||||||
] as const
|
] as const
|
||||||
|
|
||||||
@@ -31,6 +32,8 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
|||||||
return await tokenizeWebTokenizers(data, 'claude')
|
return await tokenizeWebTokenizers(data, 'claude')
|
||||||
case 'novellist':
|
case 'novellist':
|
||||||
return await tokenizeWebTokenizers(data, 'novellist')
|
return await tokenizeWebTokenizers(data, 'novellist')
|
||||||
|
case 'llama3':
|
||||||
|
return await tokenizeWebTokenizers(data, 'llama')
|
||||||
default:
|
default:
|
||||||
return await tikJS(data)
|
return await tikJS(data)
|
||||||
}
|
}
|
||||||
@@ -71,7 +74,7 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
|||||||
return await tikJS(data)
|
return await tikJS(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'
|
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'
|
||||||
|
|
||||||
let tikParser:Tiktoken = null
|
let tikParser:Tiktoken = null
|
||||||
let tokenizersTokenizer:Tokenizer = null
|
let tokenizersTokenizer:Tokenizer = null
|
||||||
@@ -105,6 +108,11 @@ async function tokenizeWebTokenizers(text:string, type:tokenizerType) {
|
|||||||
await (await fetch("/token/claude/claude.json")
|
await (await fetch("/token/claude/claude.json")
|
||||||
).arrayBuffer())
|
).arrayBuffer())
|
||||||
break
|
break
|
||||||
|
case 'llama3':
|
||||||
|
tokenizersTokenizer = await webTokenizer.Tokenizer.fromJSON(
|
||||||
|
await (await fetch("/token/llama/llama3.json")
|
||||||
|
).arrayBuffer())
|
||||||
|
break
|
||||||
case 'novelai':
|
case 'novelai':
|
||||||
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
|
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
|
||||||
await (await fetch("/token/nai/nerdstash_v2.model")
|
await (await fetch("/token/nai/nerdstash_v2.model")
|
||||||
|
|||||||
Reference in New Issue
Block a user