Comments (2)
I've created another small hack, perhaps it's useful to others too.
In the packaged script, search for mainStreaming
. Replace that part with:
/**
* Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
*/ let my_webllm = {};
my_webllm['engine'] = null;
my_webllm['initProgressCallback'] = (report) => {
console.log("WebLLM: init report: ", report);
};
my_webllm['initCompleteCallback'] = () => {
console.log("WebLLM: init complete");
};
my_webllm['chunkCallback'] = (chunk, message_so_far, addition) => {
console.log("WebLLM: chunk callback: chunk,message_so_far,addition: ", chunk, message_so_far, addition);
}
my_webllm['completeCallback'] = (message) => {
console.log("WebLLM: complete callback: message: ", message);
}
my_webllm['statsCallback'] = (stats) => {
console.log("WebLLM: stats callback: stats: ", stats);
}
my_webllm['loadModel'] = async function(selectedModel) {
if(typeof selectedModel != 'string'){
console.error("WebLLM: no valid model string provided");
}
my_webllm['engine'] = await _webLlm.CreateWebWorkerEngine(new Worker(require("b16cbe164a5b9742")), selectedModel, {
initProgressCallback: my_webllm.initProgressCallback
});
my_webllm.initCompleteCallback();
}
my_webllm['setInitProgressCallback'] = async function(initProgressCallback) {
if(typeof initProgressCallback === 'function'){
my_webllm['initProgressCallback'] = initProgressCallback;
}
else{
console.error("WebLLM: no valid initProgressCallback provided");
}
}
my_webllm['setInitCompleteCallback'] = async function(initCompleteCallback) {
if(typeof initCompleteCallback === 'function'){
my_webllm['initCompleteCallback'] = initCompleteCallback;
}
else{
console.error("WebLLM: no valid initCompleteCallback provided");
}
}
my_webllm['setChunkCallback'] = async function(chunkCallback) {
if(typeof chunkCallback === 'function'){
my_webllm['chunkCallback'] = chunkCallback;
}
else{
console.error("WebLLM: no valid chunkCallback provided");
}
}
my_webllm['setCompleteCallback'] = async function(completeCallback) {
if(typeof completeCallback === 'function'){
my_webllm['completeCallback'] = completeCallback;
}
else{
console.error("WebLLM: no valid completeCallback provided");
}
}
my_webllm['setStatsCallback'] = async function(statsCallback) {
if(typeof statsCallback === 'function'){
my_webllm['statsCallback'] = statsCallback;
}
else{
console.error("WebLLM: no valid statsCallback provided");
}
}
my_webllm['doChat'] = async function(request) {
if(my_webllm.engine == null){
console.error("WebLLM: aborting, engine has not been started yet");
return false
}
if(typeof request != 'undefined' && request != null && typeof request.messages != 'undefined'){
const asyncChunkGenerator = await my_webllm.engine.chat.completions.create(request);
let message = "";
for await (const chunk of asyncChunkGenerator){
//console.log("WebLLM: doChat: chunk: ", chunk);
if (chunk.choices[0].delta.content) // Last chunk has undefined content
message += chunk.choices[0].delta.content;
my_webllm['chunkCallback'](chunk, message, chunk.choices[0].delta.content);
setLabel("generate-label", message);
// engine.interruptGenerate(); // works with interrupt as well
}
const final_message = await my_webllm.engine.getMessage();
my_webllm.completeCallback(final_message);
console.log("WebLLM: Final message:\n", final_message); // the concatenated message]
let stats = await my_webllm.engine.runtimeStatsText();
my_webllm['statsCallback'](stats);
//console.log("WebLLM: stats: ", stats);
}
else{
console.error("WebLLM: no valid prompt message provided");
}
}
window.my_webllm = my_webllm;
console.log("You can now use window.my_webllm: ", window.my_webllm);
// Run one of the function below
// mainNonStreaming();
//mainStreaming();
Then, in the getStarted.html
file, replace the end with this:
<script src="/get_started.b7a05eb9.js" defer=""></script>
<script>
const request = {
stream: true,
messages: [
{
"role": "system",
"content": "You are a helpful, respectful and honest assistant. Be as happy as you can when speaking please. "
},
{
"role": "user",
"content": "Provide me three US states."
},
{
"role": "assistant",
"content": "California, New York, Pennsylvania."
},
{
"role": "user",
"content": "Two more please!"
}
],
temperature: 1.5,
max_gen_len: 256
};
window.onload = init;
async function init() {
console.log("window.my_webllm: ", window.my_webllm);
if(window.my_webllm){
await window.my_webllm.loadModel('Llama-3-8B-Instruct-q4f32_1');
await window.my_webllm.doChat(request);
}
}
</script>
</body></html>
from web-llm.
Solved by finally having a great javascript file that can simply be loaded in. Loving it.
from web-llm.
Related Issues (20)
- Sending raw text to the model HOT 4
- Deploy small LLM in a chrome extension HOT 2
- Runing LLM in a webworker fails due to loglevel dependency HOT 1
- support concurrent inference from multiple models HOT 4
- anyone tried to run web-llm in tauri?
- Request: Allow deletion of individual cached models. HOT 3
- LLama 3.1 Error: Device was lost during reload. This can happen due to insufficient memory or other GPU constraints. Detailed error: [object GPUDeviceLostInfo]. Please try to reload WebLLM with a less resource-intensive model. HOT 4
- Custom model outputs garbage in firefox nightly, works fine in chrome.
- Phi 3 Mini output near random (Phi-3-mini-4k-instruct-q4f16_1-MLC) HOT 9
- Support concurrent requests to a single model instance HOT 5
- Gemma 2 2B crashes on mobile phone HOT 16
- [Tracking][WebLLM] Function calling (beta) and Embeddings HOT 1
- Feature request: engine.preload() HOT 5
- I can't find a method to stop a conversation in progress. HOT 1
- TypeError: Cannot read properties of undefined (reading 'origin') HOT 1
- vercel/ai provider integration HOT 1
- Use subgroup operations when possible HOT 5
- DuckDB-NSQL-7B Model
- Usage Stats in Intermediate Steps HOT 3
- Engine Reuse Fails with Different JSON Schemas - "Module has already been disposed" Error HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from web-llm.