Python SDK¶
The Python SDK (agent-memory-client
) provides the easiest way to integrate memory into your AI applications. It includes high-level abstractions, tool integration for OpenAI and Anthropic, and automatic function call resolution.
Installation¶
Requirements: Python 3.10 or higher
Quick Start¶
from agent_memory_client import MemoryAPIClient
# Connect to your memory server
client = MemoryAPIClient(
base_url="http://localhost:8000",
api_key="your-api-key" # Optional if auth disabled
)
# Store a memory
await client.create_long_term_memories([{
"text": "User prefers morning meetings and hates scheduling calls after 4 PM",
"memory_type": "semantic",
"topics": ["scheduling", "preferences"],
"user_id": "alice"
}])
# Search memories
results = await client.search_long_term_memory(
text="when does user prefer meetings",
limit=5
)
Client Configuration¶
Basic Setup¶
from agent_memory_client import MemoryAPIClient
# Minimal configuration (development)
client = MemoryAPIClient(base_url="http://localhost:8000")
# Production configuration
client = MemoryAPIClient(
base_url="https://your-memory-server.com",
api_key="your-api-token",
timeout=30.0,
session_id="user-session-123",
user_id="user-456",
namespace="production"
)
Authentication¶
# Token authentication
client = MemoryAPIClient(
base_url="https://your-server.com",
api_key="your-token-here"
)
# OAuth2/JWT authentication
client = MemoryAPIClient(
base_url="https://your-server.com",
bearer_token="your-jwt-token"
)
# Development (no auth)
client = MemoryAPIClient(base_url="http://localhost:8000")
Tool Integration¶
OpenAI Integration¶
The SDK provides automatic tool schemas and function call resolution for OpenAI:
import openai
from agent_memory_client import MemoryAPIClient
# Setup clients
memory_client = MemoryAPIClient(base_url="http://localhost:8000")
openai_client = openai.AsyncClient()
# Get tool schemas for OpenAI
memory_tools = MemoryAPIClient.get_all_memory_tool_schemas()
async def chat_with_memory(message: str, session_id: str):
# Make request with memory tools
response = await openai_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": message}],
tools=memory_tools,
tool_choice="auto"
)
# Process tool calls automatically
if response.choices[0].message.tool_calls:
# Resolve all tool calls
results = []
for tool_call in response.choices[0].message.tool_calls:
result = await memory_client.resolve_tool_call(
tool_call=tool_call,
session_id=session_id
)
if result["success"]:
results.append({
"role": "tool",
"tool_call_id": tool_call.id,
"name": tool_call.function.name,
"content": result["formatted_response"]
})
else:
results.append({
"role": "tool",
"tool_call_id": tool_call.id,
"name": tool_call.function.name,
"content": f"Error: {result['error']}"
})
# Continue conversation with results
messages = [
{"role": "user", "content": message},
response.choices[0].message,
*results
]
final_response = await openai_client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return final_response.choices[0].message.content
return response.choices[0].message.content
Anthropic Integration¶
Similar tool integration for Anthropic Claude:
import anthropic
from agent_memory_client import MemoryAPIClient
# Setup clients
memory_client = MemoryAPIClient(base_url="http://localhost:8000")
anthropic_client = anthropic.AsyncClient()
# Get tool schemas for Anthropic
memory_tools = MemoryAPIClient.get_all_memory_tool_schemas_anthropic()
async def chat_with_memory(message: str, session_id: str):
response = await anthropic_client.messages.create(
model="claude-3-5-sonnet-20241022",
messages=[{"role": "user", "content": message}],
tools=memory_tools,
max_tokens=1000
)
# Process tool calls
if response.stop_reason == "tool_use":
results = []
for content_block in response.content:
if content_block.type == "tool_use":
result = await memory_client.resolve_tool_call(
tool_call={
"type": "tool_use",
"id": content_block.id,
"name": content_block.name,
"input": content_block.input
},
session_id=session_id
)
if result["success"]:
results.append({
"type": "tool_result",
"tool_use_id": content_block.id,
"content": result["formatted_response"]
})
else:
results.append({
"type": "tool_result",
"tool_use_id": content_block.id,
"content": f"Error: {result['error']}"
})
# Continue conversation
messages = [
{"role": "user", "content": message},
{"role": "assistant", "content": response.content + results}
]
final_response = await anthropic_client.messages.create(
model="claude-3-5-sonnet-20241022",
messages=messages,
max_tokens=1000
)
return final_response.content[0].text
return response.content[0].text
Available Tools¶
The SDK provides these tools for LLM integration:
eagerly_create_long_term_memory
- Eagerly create a long-term memory by making an API requestlazily_create_long_term_memory
- Lazily create a long-term memory by adding it to working memory (does not require an immediate network request; does require saving working memory afterward)search_long_term_memory
- Search with semantic similarityedit_memory
- Update existing memoriesdelete_memory
- Remove memoriesset_working_memory
- Update or create a working memory sessionget_or_create_working_memory
- Retrieve or create a working memory session
Note: The following tool names have been deprecated for clarity: - create_long_term_memories
(deprecated) → use eagerly_create_long_term_memory
- add_memory_to_working_memory
(deprecated) → use lazily_create_long_term_memory
Memory Operations¶
Creating Memories¶
# Create multiple memories
memories = [
{
"text": "User works as a software engineer at TechCorp",
"memory_type": "semantic",
"topics": ["career", "work", "company"],
"entities": ["TechCorp", "software engineer"],
"user_id": "alice"
},
{
"text": "User prefers Python and TypeScript for development",
"memory_type": "semantic",
"topics": ["programming", "preferences", "languages"],
"entities": ["Python", "TypeScript"],
"user_id": "alice"
}
]
result = await client.create_long_term_memories(memories)
print(f"Created {len(result.memories)} memories")
Searching Memories¶
# Basic semantic search
results = await client.search_long_term_memory(
text="user programming experience",
limit=10
)
# Advanced filtering
results = await client.search_long_term_memory(
text="user preferences",
user_id="alice",
topics=["programming", "food"],
limit=5,
min_relevance_score=0.7
)
# Time-based filtering
from datetime import datetime, timedelta
week_ago = datetime.now() - timedelta(days=7)
results = await client.search_long_term_memory(
text="recent updates",
created_after=week_ago,
limit=10
)
# Process results
for memory in results.memories:
print(f"Relevance: {memory.relevance_score:.2f}")
print(f"Text: {memory.text}")
print(f"Topics: {', '.join(memory.topics or [])}")
Memory Editing¶
# Update a memory
await client.edit_memory(
memory_id="memory-123",
updates={
"text": "User works as a senior software engineer at TechCorp",
"topics": ["career", "work", "company", "senior"],
"entities": ["TechCorp", "senior software engineer"]
}
)
# Add context to existing memory
await client.edit_memory(
memory_id="memory-456",
updates={
"text": "User prefers Python and TypeScript for development. Recently started learning Rust.",
"topics": ["programming", "preferences", "languages", "rust"],
"entities": ["Python", "TypeScript", "Rust"]
}
)
Working Memory¶
# Store conversation context
conversation = {
"messages": [
{"role": "user", "content": "I'm planning a trip to Italy"},
{"role": "assistant", "content": "That sounds exciting! What cities are you thinking of visiting?"},
{"role": "user", "content": "Rome and Florence, maybe Venice too"}
],
"memories": [
{
"text": "User is planning a trip to Italy, considering Rome, Florence, and Venice",
"memory_type": "semantic",
"topics": ["travel", "italy", "vacation"],
"entities": ["Italy", "Rome", "Florence", "Venice"]
}
]
}
await client.set_working_memory("session-123", conversation)
# Retrieve or create working memory
created, memory = await client.get_or_create_working_memory("session-123")
if created:
print("Created new session")
else:
print("Found existing session")
print(f"Session has {len(memory.messages)} messages")
Memory-Enhanced Conversations¶
Context Injection¶
The SDK provides a powerful memory_prompt
method that automatically enriches your prompts with relevant context:
async def get_contextualized_response(user_message: str, session_id: str, user_id: str):
# Get memory-enriched context
context = await client.memory_prompt(
query=user_message,
session={
"session_id": session_id,
"user_id": user_id,
"model_name": "gpt-4o"
},
long_term_search={
"text": user_message,
"limit": 5,
"user_id": user_id
}
)
# Send to LLM
response = await openai_client.chat.completions.create(
model="gpt-4o",
messages=context.messages
)
return response.choices[0].message.content
Automatic Memory Storage¶
async def chat_with_auto_memory(message: str, session_id: str):
# Get contextualized prompt
context = await client.memory_prompt(
query=message,
session={"session_id": session_id, "model_name": "gpt-4o"}
)
# Generate response
response = await openai_client.chat.completions.create(
model="gpt-4o",
messages=context.messages + [{"role": "user", "content": message}]
)
# Store the conversation
conversation = {
"messages": [
{"role": "user", "content": message},
{"role": "assistant", "content": response.choices[0].message.content}
]
}
await client.set_working_memory(session_id, conversation)
return response.choices[0].message.content
Batch Operations¶
Bulk Memory Creation¶
# Process large datasets efficiently
async def import_user_data(user_data: list, user_id: str):
batch_size = 50
for i in range(0, len(user_data), batch_size):
batch = user_data[i:i + batch_size]
memories = [
{
"text": item["description"],
"memory_type": "semantic",
"topics": item.get("categories", []),
"entities": item.get("entities", []),
"user_id": user_id,
"metadata": {"source": item["source"]}
}
for item in batch
]
result = await client.create_long_term_memories(memories)
print(f"Imported batch {i//batch_size + 1}, {len(result.memories)} memories")
Bulk Search Operations¶
# Search multiple queries efficiently
async def multi_search(queries: list[str], user_id: str):
results = {}
# Use asyncio.gather for concurrent searches
search_tasks = [
client.search_long_term_memory(
text=query,
user_id=user_id,
limit=3
)
for query in queries
]
search_results = await asyncio.gather(*search_tasks)
for query, result in zip(queries, search_results):
results[query] = [memory.text for memory in result.memories]
return results
Error Handling¶
Robust Client Usage¶
from agent_memory_client import MemoryAPIClient, MemoryError
import asyncio
import logging
async def robust_memory_operation(client: MemoryAPIClient):
try:
# Attempt memory operation
results = await client.search_long_term_memory(
text="user preferences",
limit=5
)
return results.memories
except MemoryError as e:
if e.status_code == 401:
logging.error("Authentication failed - check API key")
elif e.status_code == 429:
logging.warning("Rate limited - waiting before retry")
await asyncio.sleep(5)
return await robust_memory_operation(client)
else:
logging.error(f"Memory API error: {e}")
return []
except Exception as e:
logging.error(f"Unexpected error: {e}")
return []
Connection Management¶
import httpx
from agent_memory_client import MemoryAPIClient
# Custom timeout and retry configuration
async with httpx.AsyncClient(
timeout=30.0,
limits=httpx.Limits(max_keepalive_connections=10, max_connections=20)
) as http_client:
client = MemoryAPIClient(
base_url="http://localhost:8000",
http_client=http_client
)
# Perform operations
results = await client.search_long_term_memory(text="query")
Advanced Features¶
Custom Tool Workflows¶
class CustomMemoryAgent:
def __init__(self, memory_client: MemoryAPIClient):
self.memory = memory_client
async def intelligent_search(self, query: str, user_id: str):
# Multi-stage search with refinement
initial_results = await self.memory.search_long_term_memory(
text=query,
user_id=user_id,
limit=20
)
if not initial_results.memories:
# Try broader search
return await self.memory.search_long_term_memory(
text=query,
limit=10
)
# Filter by relevance threshold
relevant_memories = [
m for m in initial_results.memories
if m.relevance_score > 0.7
]
return relevant_memories[:5]
async def contextual_store(self, text: str, context: dict, user_id: str):
# Extract topics and entities from context
topics = context.get("topics", [])
entities = context.get("entities", [])
# Search for similar existing memories
similar = await self.memory.search_long_term_memory(
text=text,
user_id=user_id,
limit=3,
min_relevance_score=0.8
)
if similar.memories:
# Update existing memory instead of creating duplicate
await self.memory.edit_memory(
memory_id=similar.memories[0].id,
updates={
"text": f"{similar.memories[0].text}. {text}",
"topics": list(set(similar.memories[0].topics + topics)),
"entities": list(set(similar.memories[0].entities + entities))
}
)
else:
# Create new memory
await self.memory.create_long_term_memories([{
"text": text,
"memory_type": "semantic",
"topics": topics,
"entities": entities,
"user_id": user_id
}])
Performance Optimization¶
from functools import lru_cache
import asyncio
class OptimizedMemoryClient:
def __init__(self, client: MemoryAPIClient):
self.client = client
self._search_cache = {}
@lru_cache(maxsize=100)
def _cache_key(self, text: str, user_id: str, limit: int) -> str:
return f"{text}:{user_id}:{limit}"
async def cached_search(self, text: str, user_id: str, limit: int = 5):
cache_key = self._cache_key(text, user_id, limit)
if cache_key in self._search_cache:
return self._search_cache[cache_key]
results = await self.client.search_long_term_memory(
text=text,
user_id=user_id,
limit=limit
)
# Cache results for 5 minutes
self._search_cache[cache_key] = results
asyncio.create_task(self._expire_cache(cache_key, 300))
return results
async def _expire_cache(self, key: str, delay: int):
await asyncio.sleep(delay)
self._search_cache.pop(key, None)
Best Practices¶
1. Client Management¶
# Use a single client instance per application
class MemoryService:
def __init__(self):
self.client = MemoryAPIClient(
base_url=os.getenv("MEMORY_SERVER_URL"),
api_key=os.getenv("MEMORY_API_KEY")
)
async def close(self):
await self.client.close()
# Singleton pattern
memory_service = MemoryService()
2. Memory Organization¶
# Use consistent naming patterns
async def create_user_memory(text: str, user_id: str, category: str):
return await client.create_long_term_memories([{
"text": text,
"memory_type": "semantic",
"topics": [category, "user-preference"],
"user_id": user_id,
"namespace": f"user:{user_id}:preferences"
}])
3. Context Management¶
# Implement context-aware memory storage
async def store_conversation_memory(conversation: dict, session_id: str):
# Extract key information
important_facts = extract_facts(conversation)
if important_facts:
await client.create_long_term_memories([{
"text": fact,
"memory_type": "semantic",
"session_id": session_id,
"metadata": {"conversation_turn": i}
} for i, fact in enumerate(important_facts)])
Configuration Reference¶
Environment Variables¶
# Client configuration
MEMORY_SERVER_URL=http://localhost:8000
MEMORY_API_KEY=your-api-token
# Connection settings
MEMORY_TIMEOUT=30
MEMORY_MAX_RETRIES=3
# Default user settings
DEFAULT_USER_ID=default-user
DEFAULT_NAMESPACE=production
Client Options¶
client = MemoryAPIClient(
base_url="http://localhost:8000",
api_key="optional-token",
bearer_token="optional-jwt",
timeout=30.0,
max_retries=3,
session_id="default-session",
user_id="default-user",
namespace="default",
http_client=custom_httpx_client
)
The Python SDK makes it easy to add sophisticated memory capabilities to any AI application, with minimal setup and maximum flexibility. Use the tool integrations for LLM-driven memory, direct API calls for code-driven approaches, or combine both patterns for hybrid solutions.