Comments (2)
@morpheuslord Yes, of course! Feel free to use the code in your project! :-) Happy to be a contributor :)
from llama2_local.
Update: I got it to work
import os
import fire
from enum import Enum
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
from transformers import TextIteratorStreamer
from llama_chat_format import format_to_llama_chat_style
from flask import Flask, request, jsonify
class Model_Type(Enum):
gptq = 1
ggml = 2
full_precision = 3
def get_model_type(model_name):
if "gptq" in model_name.lower():
return Model_Type.gptq
elif "ggml" in model_name.lower():
return Model_Type.ggml
else:
return Model_Type.full_precision
def create_folder_if_not_exists(folder_path):
if not os.path.exists(folder_path):
os.makedirs(folder_path)
def initialize_gpu_model_and_tokenizer(model_name, model_type):
if model_type == Model_Type.gptq:
model = AutoGPTQForCausalLM.from_quantized(
model_name, device_map="auto", use_safetensors=True,
use_triton=False)
tokenizer = AutoTokenizer.from_pretrained(model_name)
else:
model = AutoModelForCausalLM.from_pretrained(
model_name, device_map="auto", token=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)
return model, tokenizer
def init_auto_model_and_tokenizer(model_name, model_type, file_name=None):
model_type = get_model_type(model_name)
if Model_Type.ggml == model_type:
models_folder = "./models"
create_folder_if_not_exists(models_folder)
file_path = hf_hub_download(
repo_id=model_name, filename=file_name, local_dir=models_folder)
model = Llama(file_path, n_ctx=4096)
tokenizer = None
else:
model, tokenizer = initialize_gpu_model_and_tokenizer(
model_name, model_type=model_type)
return model, tokenizer
app = Flask(__name__)
@app.route('/api/chatbot', methods=['POST'])
def chatbot_api():
data = request.json
user_message = data['user_message']
model_name = data['model_name']
file_name = data.get('file_name')
is_chat_model = 'chat' in model_name.lower()
model_type = get_model_type(model_name)
if model_type == Model_Type.ggml:
assert file_name is not None, """
When model_name is provided for a GGML quantized model, file_name argument must also be provided."""
model, tokenizer = init_auto_model_and_tokenizer(
model_name, model_type, file_name)
if is_chat_model:
instruction = format_to_llama_chat_style([[user_message, None]])
else:
instruction = user_message
history = [[user_message, None]]
response = generate_response(
model, tokenizer, instruction, history, model_type)
return jsonify({'bot_response': response})
def generate_response(model, tokenizer, instruction, history, model_type):
response = ""
kwargs = dict(temperature=0.6, top_p=0.9)
if model_type == Model_Type.ggml:
kwargs["max_tokens"] = 512
for chunk in model(prompt=instruction, stream=True, **kwargs):
token = chunk["choices"][0]["text"]
response += token
else:
streamer = TextIteratorStreamer(
tokenizer, skip_prompt=True, Timeout=5)
inputs = tokenizer(instruction, return_tensors="pt").to(model.device)
kwargs["max_new_tokens"] = 512
kwargs["input_ids"] = inputs["input_ids"]
kwargs["streamer"] = streamer
thread = Thread(target=model.generate, kwargs=kwargs)
thread.start()
for token in streamer:
response += token
return response
def run_app(port):
app.run(port=port)
if __name__ == '__main__':
fire.Fire(run_app(5000))
I wanted to ask if I can use this code in my project mentioned above. I wanted to add you as a contributor for it.
from llama2_local.
Related Issues (12)
- Run after tunnelling HOT 3
- how can I use for cpu QA model from llama2
- ModuleNotFoundError: No module named 'gradio' HOT 1
- Problems installing llama2 (failure building wheel) HOT 1
- Always loads the same model HOT 2
- CUDA extension not installed HOT 8
- Necessary prerequisites to compiling llama HOT 2
- Responses become too large HOT 1
- safetensors does not contain metadata
- ValueError: Need either a `state_dict` or a `save_folder` containing offloaded weights. HOT 2
- How to run other models?
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from llama2_local.