Model Gateway
Model Gateway
Concurrently generate and compare responses from different models in a single request.
Usage
from openai import OpenAI
client = OpenAI(
base_url="https://router.neutrinoapp.com/api/engines",
api_key="<Neutrino-API-key>"
)
response = client.chat.completions.create(
# Instead of a specific model, set this to a list of models
model=["mixtral-8x7b-instruct", "claude-instant-1", "gpt-3.5-turbo"],
messages = [
{"role": "system", "content": "You are a helpful AI asssistant. Your job is to be helpful and respond to user requests."},
{"role": "user", "content": "Tell me a joke"},
]
)
for choice in response:
model = choice.model
content = choice.message.content
print(f"{model}:\n{content}\n\n")
# Output:
# gpt-3.5-turbo:
# Why don't scientists trust atoms?
# Because they make up everything!
# claude-2:
# Here's a silly joke for you:
# What do you call a dog magician? A labracadabrador!
# mixtral-8x7b-instruct:
# Of course, I'd be happy to share a light-hearted, clean joke with you. Here it is:
# Why don't scientists trust atoms?
# Because they make up everything!
Streaming Responses
from openai import OpenAI
client = OpenAI(
base_url="https://router.neutrinoapp.com/api/llm-router",
api_key="<Neutrino-API-key>"
)
response = client.chat.completions.create(
# Instead of a specific model, set this to a list of models
model=["mixtral-8x7b-instruct", "claude-instant-1", "gpt-3.5-turbo"],
messages = [
{"role": "system", "content": "You are a helpful AI asssistant. Your job is to be helpful and respond to user requests."},
{"role": "user", "content": "Tell me a joke"},
],
stream=True
)
running_model_responses = {}
for chunk in response:
model = chunk.model
delta_content = chunk.choices[0].delta.content
if model not in running_model_responses:
running_model_responses[model] = ""
running_model_responses[model] += delta_content
for model, content in running_model_responses.items():
print(f"{model}:\n{content}\n-----")