I want to connect Zhipu glm-3-turbo model. When I execute the following script, it prompts that the number of API concurrency is too high. zhipu limits the concurrency of my account to 5, so I want to modify the number of concurrent requests for AI in the current project (perhaps called the number of threads) to be compatible with the excessive concurrency I encountered.
09:01:20,43: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:20,120: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:21,172: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:21,237: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:22,66: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:22,221: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:23,251: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
09:01:23,288: sammo.runners.RetriableError: Server error: 429 {'error': {'code': '1302', 'message': '您当前使用该API的并发数过高,请降低并发,或联系客服增加限额。'}}
from sammo.search import BeamSearch
from sammo.mutators import BagOfMutators, InduceInstructions, Paraphrase
import pathlib
import sammo
from sammo.runners import OpenAIChat
from sammo.base import LLMResult,Costs
from sammo.base import Template, EvaluationScore
from sammo.components import Output, GenerateText, ForEach, Union
from sammo.extractors import ExtractRegex
from sammo.data import DataTable
import json
import requests
import os
from sammo.utils import serialize_json
from sammo.instructions import MetaPrompt, Section, Paragraph, InputData
from sammo.dataformatters import PlainFormatter
from sammo.search_op import one_of
from sammo import PROMPT_LOGGER_NAME
import logging
prompt_logger = logging.getLogger(PROMPT_LOGGER_NAME)
class ZhiPuAIChat(OpenAIChat):
# 'https://open.bigmodel.cn/api/paas/v4/chat/completions
BASE_URL = "https://open.bigmodel.cn/api/paas/v4"
SUFFIX = "/chat/completions"
async def generate_text(
self,
prompt: str,
max_tokens: int | None = None,
randomness: float | None = 0.01,
seed: int = 0,
priority: int = 0,
system_prompt: str | None = None,
history: list[dict] | None = None,
json_mode: bool = False,
) -> LLMResult:
"""Calls the chat endpoint of the OAI model.
Args:
prompt: The user prompt.
max_tokens: The maximum number of tokens to generate. If not set, corresponds to maximum
available tokens.
randomness: The randomness to use when generating tokens.
seed: When using randomness, use this seed for local reproducibility (achieved by caching).
priority: The priority of the request (used for throttling).
Returns:
Dictionary with keys "data" (the generated text), "cost" (the number of tokens used),
and "retries" (the number of retries).
"""
messages = []
if system_prompt is not None:
messages = [{"role": "system", "content": system_prompt}]
if history:
history = [x for x in history if x["role"] != "system"]
if history is not None:
messages = messages + history
# check for images in prompt
revised_prompt = self._post_process_prompt(prompt)
messages += [{"role": "user", "content": revised_prompt}]
request = dict(messages=messages, max_tokens=self._max_context_window or max_tokens, temperature=0.1)
if json_mode:
request["response_format"] = {"type": "json_object"}
fingerprint = serialize_json({"seed": seed, "generative_model_id": self._equivalence_class, **request})
return await self._execute_request(request, fingerprint, priority)
def _to_llm_result(self, request: dict, json_data: dict, fingerprint: str | bytes) -> LLMResult:
request_text = request["messages"][-1]["content"]
prompt_logger.debug(f"\n\n\nAPI call:\n{request_text}\n->\n\n{json_data['choices'][0]['message']['content']}")
return LLMResult(
json_data["choices"][0]["message"]["content"],
history=request["messages"] + [json_data["choices"][0]["message"]],
costs=self._extract_costs(json_data),
request_text=request["messages"][-1]["content"],
)
def _post_process_prompt(self, prompt: str):
return prompt
@staticmethod
def _extract_costs(json_data: dict) -> dict:
return Costs(
input_costs=json_data["usage"].get("prompt_tokens", 0),
output_costs=json_data["usage"].get("completion_tokens", 0),
)
class InititialCandidates:
def __init__(self, dtrain):
self.dtrain = dtrain
def __call__(self):
example_formatter = PlainFormatter(
all_labels=self.dtrain.outputs.unique(), orient="item"
)
labels = self.dtrain.outputs.unique()
instructions = MetaPrompt(
[
Paragraph("Instructions: "),
Paragraph(
one_of(
[
self.dtrain.constants["instructions"],
"",
"Find the best output label given the input.",
self.dtrain.constants["instructions"] * 2,
]
),
id="instructions",
),
Paragraph("\n"),
Paragraph(
f"Output labels: {', '.join(labels)}\n" if len(labels) <= 10 else ""
),
Paragraph(InputData()),
Paragraph("Output: "),
],
render_as="raw",
data_formatter=example_formatter,
)
return Output(
instructions.with_extractor("raise"),
minibatch_size=1,
on_error="empty_result",
)
API_CONFIG_FILE = pathlib.Path().cwd() / "config" / "personal.openai"
API_CONFIG = ""
if API_CONFIG_FILE.exists():
API_CONFIG = API_CONFIG_FILE
if not API_CONFIG:
raise ValueError('Please set API_CONFIG to {"api_key": "YOUR_KEY"}')
_ = sammo.setup_logger("WARNING") # we're only interested in warnings for now
runner = ZhiPuAIChat(
model_id="glm-3-turbo",
api_config=API_CONFIG,
cache=os.getenv("CACHE_FILE", "cache.tsv"),
timeout=30
)
# %load -s load_data,accuracy _init.py
def load_data(
url="https://raw.githubusercontent.com/SinMu-L/BIG-bench/main/bigbench/benchmark_tasks/implicatures/task.json",
):
task = json.loads(requests.get(url).content)
# convert label to single string
# for x in task["examples"]:
# x["output"] = max(x["target_scores"], key=x["target_scores"].get)
return DataTable.from_records(
task["examples"],
input_fields="input",
constants={"instructions": task["task_prefix"]},
)
def accuracy(y_True: DataTable, y_pred: DataTable) -> EvaluationScore:
y_True = y_True.outputs.normalized_values()
y_pred = y_pred.outputs.normalized_values()
n_correct = sum([y_p == y_t for y_p, y_t in zip(y_pred, y_True)])
return EvaluationScore(n_correct / len(y_True))
mydata = load_data()
d_train = mydata.sample(8, seed=42)
mutation_operators = BagOfMutators(
InititialCandidates(d_train),
InduceInstructions({"id": "instructions"}, d_train),
Paraphrase({"id": "instructions"}),
sample_for_init_candidates=False,
)
prompt_optimizer = BeamSearch(
runner,
mutation_operators,
accuracy,
maximize=True,
depth=3,
mutations_per_beam=2,
n_initial_candidates=4,
beam_width=4,
add_previous=True,
)
prompt_optimizer.fit(d_train)
prompt_optimizer.show_report()
print(prompt_optimizer.best_prompt)