Usage

from openai import OpenAI

client = OpenAI(
    base_url="https://router.neutrinoapp.com/api/llm-router",
    api_key="<Neutrino-API-key>"
)

response = client.chat.completions.create(
    # Instead of a specific model, set this to a list of models
    model=["mixtral-8x7b-instruct", "claude-instant-1", "gpt-3.5-turbo"],
    messages = [
        {"role": "system", "content": "You are a helpful AI asssistant. Your job is to be helpful and respond to user requests."},
        {"role": "user", "content": "Tell me a joke"},
    ]
)

for choice in response:
    model = choice.model
    content = choice.message.content
    print(f"{model}:\n{content}\n\n")

# Output:
# gpt-3.5-turbo:
# Why don't scientists trust atoms?
# Because they make up everything!

# claude-2:
# Here's a silly joke for you:
# What do you call a dog magician? A labracadabrador!

# mixtral-8x7b-instruct:
# Of course, I'd be happy to share a light-hearted, clean joke with you. Here it is:
# Why don't scientists trust atoms?
# Because they make up everything!

Streaming Responses

from openai import OpenAI

client = OpenAI(
    base_url="https://router.neutrinoapp.com/api/llm-router",
    api_key="<Neutrino-API-key>"
)

response = client.chat.completions.create(
    # Instead of a specific model, set this to a list of models
    model=["mixtral-8x7b-instruct", "claude-instant-1", "gpt-3.5-turbo"],
    messages = [
        {"role": "system", "content": "You are a helpful AI asssistant. Your job is to be helpful and respond to user requests."},
        {"role": "user", "content": "Tell me a joke"},
    ],
    stream=True
)

running_model_responses = {}

for chunk in response:
    model = chunk.model
    delta_content = chunk.choices[0].delta.content
    if model not in running_model_responses:
        running_model_responses[model] = ""
    running_model_responses[model] += delta_content

for model, content in running_model_responses.items():
    print(f"{model}:\n{content}\n-----")