Prerequisite: Please install python (version 3.10 or higher) before proceeding.

Step 1. Install chatformers

pip install chatformers

Step 2. Get API Keys first. Lets assume you are using model from GRQO (https://console.groq.com/docs/openai):

Step 2. Imports

from chatformers.chatbot import Chatbot
import os
from openai import OpenAI

You can use other openai compaitable llm also if required.

from groq import Groq

client = Groq( api_key=os.environ.get(“GROQ_API_KEY”), )

Step 3. Setup Variables

python
system_prompt = None  # use the default
metadata = None  # use the default metadata
user_id = "Sam-Julia"  # combination of user name and assistant name is recommended
chat_model_name = "llama-3.1-70b-versatile"
memory_model_name = "llama-3.1-70b-versatile"
max_tokens = 150  # len of tokens to generate from LLM
limit = 4  # maximum number of memory to added during LLM chat
debug = True  # enable to print debug messages
os.environ["GROQ_API_KEY"] = ""

Step 4. Setup LLM Client

python
llm_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key="")  # Any OpenAI Compatible LLM Client, using groq here

Step 5. Set Config

Check documentation for other supported vectorstore, embedding model and LLM model

This is the example of chromadb as vector store, ollama as embedding model and groq as LLM. You can use other supported vectorstore, embedding model and LLM model-

python
config = {
    "vector_store": {
        "provider": "chroma",
        "config": {
            "collection_name": user_id,
            "path": "db",
        }
    },
    "embedder": {
        "provider": "ollama",
        "config": {
            "model": "nomic-embed-text:latest"
        }
    },
    "llm": {
        "provider": "groq",
        "config": {
            "model": memory_model_name,
            "temperature": 0.1,
            "max_tokens": 1000,
        }
    },
}

Step 6. Initilize Chatbot-

python
chatbot = Chatbot(config=config, llm_client=llm_client, metadata=None, system_prompt=system_prompt,
                  chat_model_name=chat_model_name, memory_model_name=memory_model_name,
                  max_tokens=max_tokens, limit=limit, debug=debug)

Step 7. Add Memories-

Here’s how to solve some common problems when working with the CLI.

python
# Example to add buffer memory
memory_messages = [
    {"role": "user", "content": "My name is Sam, what about you?"},
    {"role": "assistant", "content": "Hello Sam! I'm Julia."},
    {"role": "user", "content": "What do you like to eat?"},
    {"role": "assistant", "content": "I like pizza"}
]
chatbot.add_memories(memory_messages, user_id=user_id)

Step 8. Add Buffer window / Sliding Chat Memory-

python
# Buffer window memory, this will be acts as sliding window memory for LLM
message_history = [{"role": "user", "content": "where r u from?"},
                    {"role": "assistant", "content": "I am from CA, USA"},
                    {"role": "user", "content": "ok"},
                    {"role": "assistant", "content": "hmm"},
                    {"role": "user", "content": "What are u doing on next Sunday?"},
                    {"role": "assistant", "content": "I am all available"}
                    ]

Step 9. Chat-

python
# Example to chat with the bot, send latest / current query here
query = "Do you remember my name?"
response = chatbot.chat(query=query, message_history=message_history, user_id=user_id, print_stream=True)
print("Assistant: ", response)

Step 10. Optional-

python
# Example to check memories in bot based on user_id
memories = chatbot.get_memories(user_id=user_id)
for m in memories:
    print(m)
print("================================================================")
related_memories = chatbot.related_memory(user_id=user_id,
                                          query="yes i am sam? what us your name")
print(related_memories)

Complete Code-

python
from chatformers.chatbot import Chatbot
import os
from openai import OpenAI

system_prompt = None  # use the default
metadata = None  # use the default metadata
user_id = "Sam-Julia"
chat_model_name = "llama-3.1-70b-versatile"
memory_model_name = "llama-3.1-70b-versatile"
max_tokens = 150  # len of tokens to generate from LLM
limit = 4  # maximum number of memory to added during LLM chat
debug = True  # enable to print debug messages

os.environ["GROQ_API_KEY"] = ""
llm_client = OpenAI(base_url="https://api.groq.com/openai/v1",
                    api_key="",
                    )  # Any OpenAI Compatible LLM Client
config = {
    "vector_store": {
        "provider": "chroma",
        "config": {
            "collection_name": "test",
            "path": "db",
        }
    },
    "embedder": {
        "provider": "ollama",
        "config": {
            "model": "nomic-embed-text:latest"
        }
    },
    "llm": {
        "provider": "groq",
        "config": {
            "model": memory_model_name,
            "temperature": 0.1,
            "max_tokens": 1000,
        }
    },
}

chatbot = Chatbot(config=config, llm_client=llm_client, metadata=None, system_prompt=system_prompt,
                  chat_model_name=chat_model_name, memory_model_name=memory_model_name,
                  max_tokens=max_tokens, limit=limit, debug=debug)

# Example to add buffer memory
memory_messages = [
    {"role": "user", "content": "My name is Sam, what about you?"},
    {"role": "assistant", "content": "Hello Sam! I'm Julia."},
    {"role": "user", "content": "What do you like to eat?"},
    {"role": "assistant", "content": "I like pizza"}
]
chatbot.add_memories(memory_messages, user_id=user_id)

# Buffer window memory, this will be acts as sliding window memory for LLM
message_history = [{"role": "user", "content": "where r u from?"},
                   {"role": "assistant", "content": "I am from CA, USA"},
                   {"role": "user", "content": "ok"},
                   {"role": "assistant", "content": "hmm"},
                   {"role": "user", "content": "What are u doing on next Sunday?"},
                   {"role": "assistant", "content": "I am all available"}
                   ]
# Example to chat with the bot, send latest / current query here
query = "Could you remind me what do you like to eat?"
response = chatbot.chat(query=query, message_history=message_history, user_id=user_id, print_stream=True)
print("Assistant: ", response)

# Example to check memories in bot based on user_id
# memories = chatbot.get_memories(user_id=user_id)
# for m in memories:
#     print(m)
# print("================================================================")
# related_memories = chatbot.related_memory(user_id=user_id,
#                                           query="yes i am sam? what us your name")
# print(related_memories)

Output Looks like-

INFO: USING BELOW GIVEN CONFIGS-
{'embedder': {'config': {'model': 'nomic-embed-text:latest'},
              'provider': 'ollama'},
 'llm': {'config': {'max_tokens': 1000,
                    'model': 'llama-3.1-70b-versatile',
                    'temperature': 0.1},
         'provider': 'groq'},
 'vector_store': {'config': {'collection_name': 'test', 'path': 'db'},
                  'provider': 'chroma'}}
INFO: END OF CONFIGS
INFO: SYSTEM PROMPT-
You are a helpful assistant.You have access of following memories from old conversation you had earlier. You can refer these if required-
Likes pizza
Name is Julia

Assistant: I like to eat pizza.

Process finished with exit code 0