Skip to content

Python SDK

The Python SDK (agent-memory-client) provides the easiest way to integrate memory into your AI applications. It includes high-level abstractions, tool integration for OpenAI and Anthropic, and automatic function call resolution.

Installation

Requirements: Python 3.10 or higher

pip install agent-memory-client

Quick Start

from agent_memory_client import MemoryAPIClient

# Connect to your memory server
client = MemoryAPIClient(
    base_url="http://localhost:8000",
    api_key="your-api-key"  # Optional if auth disabled
)

# Store a memory
await client.create_long_term_memories([{
    "text": "User prefers morning meetings and hates scheduling calls after 4 PM",
    "memory_type": "semantic",
    "topics": ["scheduling", "preferences"],
    "user_id": "alice"
}])

# Search memories
results = await client.search_long_term_memory(
    text="when does user prefer meetings",
    limit=5
)

Client Configuration

Basic Setup

from agent_memory_client import MemoryAPIClient

# Minimal configuration (development)
client = MemoryAPIClient(base_url="http://localhost:8000")

# Production configuration
client = MemoryAPIClient(
    base_url="https://your-memory-server.com",
    api_key="your-api-token",
    timeout=30.0,
    session_id="user-session-123",
    user_id="user-456",
    namespace="production"
)

Authentication

# Token authentication
client = MemoryAPIClient(
    base_url="https://your-server.com",
    api_key="your-token-here"
)

# OAuth2/JWT authentication
client = MemoryAPIClient(
    base_url="https://your-server.com",
    bearer_token="your-jwt-token"
)

# Development (no auth)
client = MemoryAPIClient(base_url="http://localhost:8000")

Tool Integration

OpenAI Integration

The SDK provides automatic tool schemas and function call resolution for OpenAI:

import openai
from agent_memory_client import MemoryAPIClient

# Setup clients
memory_client = MemoryAPIClient(base_url="http://localhost:8000")
openai_client = openai.AsyncClient()

# Get tool schemas for OpenAI
memory_tools = MemoryAPIClient.get_all_memory_tool_schemas()

async def chat_with_memory(message: str, session_id: str):
    # Make request with memory tools
    response = await openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": message}],
        tools=memory_tools,
        tool_choice="auto"
    )

    # Process tool calls automatically
    if response.choices[0].message.tool_calls:
        # Resolve all tool calls
        results = []
        for tool_call in response.choices[0].message.tool_calls:
            result = await memory_client.resolve_tool_call(
                tool_call=tool_call,
                session_id=session_id
            )
            if result["success"]:
                results.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "name": tool_call.function.name,
                    "content": result["formatted_response"]
                })
            else:
                results.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "name": tool_call.function.name,
                    "content": f"Error: {result['error']}"
                })

        # Continue conversation with results
        messages = [
            {"role": "user", "content": message},
            response.choices[0].message,
            *results
        ]

        final_response = await openai_client.chat.completions.create(
            model="gpt-4o",
            messages=messages
        )

        return final_response.choices[0].message.content

    return response.choices[0].message.content

Anthropic Integration

Similar tool integration for Anthropic Claude:

import anthropic
from agent_memory_client import MemoryAPIClient

# Setup clients
memory_client = MemoryAPIClient(base_url="http://localhost:8000")
anthropic_client = anthropic.AsyncClient()

# Get tool schemas for Anthropic
memory_tools = MemoryAPIClient.get_all_memory_tool_schemas_anthropic()

async def chat_with_memory(message: str, session_id: str):
    response = await anthropic_client.messages.create(
        model="claude-3-5-sonnet-20241022",
        messages=[{"role": "user", "content": message}],
        tools=memory_tools,
        max_tokens=1000
    )

    # Process tool calls
    if response.stop_reason == "tool_use":
        results = []
        for content_block in response.content:
            if content_block.type == "tool_use":
                result = await memory_client.resolve_tool_call(
                    tool_call={
                        "type": "tool_use",
                        "id": content_block.id,
                        "name": content_block.name,
                        "input": content_block.input
                    },
                    session_id=session_id
                )
                if result["success"]:
                    results.append({
                        "type": "tool_result",
                        "tool_use_id": content_block.id,
                        "content": result["formatted_response"]
                    })
                else:
                    results.append({
                        "type": "tool_result",
                        "tool_use_id": content_block.id,
                        "content": f"Error: {result['error']}"
                    })

        # Continue conversation
        messages = [
            {"role": "user", "content": message},
            {"role": "assistant", "content": response.content + results}
        ]

        final_response = await anthropic_client.messages.create(
            model="claude-3-5-sonnet-20241022",
            messages=messages,
            max_tokens=1000
        )

        return final_response.content[0].text

    return response.content[0].text

Available Tools

The SDK provides these tools for LLM integration:

  1. eagerly_create_long_term_memory - Eagerly create a long-term memory by making an API request
  2. lazily_create_long_term_memory - Lazily create a long-term memory by adding it to working memory (does not require an immediate network request; does require saving working memory afterward)
  3. search_long_term_memory - Search with semantic similarity
  4. edit_memory - Update existing memories
  5. delete_memory - Remove memories
  6. set_working_memory - Update or create a working memory session
  7. get_or_create_working_memory - Retrieve or create a working memory session

Note: The following tool names have been deprecated for clarity: - create_long_term_memories (deprecated) → use eagerly_create_long_term_memory - add_memory_to_working_memory (deprecated) → use lazily_create_long_term_memory

Memory Operations

Creating Memories

# Create multiple memories
memories = [
    {
        "text": "User works as a software engineer at TechCorp",
        "memory_type": "semantic",
        "topics": ["career", "work", "company"],
        "entities": ["TechCorp", "software engineer"],
        "user_id": "alice"
    },
    {
        "text": "User prefers Python and TypeScript for development",
        "memory_type": "semantic",
        "topics": ["programming", "preferences", "languages"],
        "entities": ["Python", "TypeScript"],
        "user_id": "alice"
    }
]

result = await client.create_long_term_memories(memories)
print(f"Created {len(result.memories)} memories")

Searching Memories

# Basic semantic search
results = await client.search_long_term_memory(
    text="user programming experience",
    limit=10
)

# Advanced filtering
results = await client.search_long_term_memory(
    text="user preferences",
    user_id="alice",
    topics=["programming", "food"],
    limit=5,
    min_relevance_score=0.7
)

# Time-based filtering
from datetime import datetime, timedelta

week_ago = datetime.now() - timedelta(days=7)
results = await client.search_long_term_memory(
    text="recent updates",
    created_after=week_ago,
    limit=10
)

# Process results
for memory in results.memories:
    print(f"Relevance: {memory.relevance_score:.2f}")
    print(f"Text: {memory.text}")
    print(f"Topics: {', '.join(memory.topics or [])}")

Memory Editing

# Update a memory
await client.edit_memory(
    memory_id="memory-123",
    updates={
        "text": "User works as a senior software engineer at TechCorp",
        "topics": ["career", "work", "company", "senior"],
        "entities": ["TechCorp", "senior software engineer"]
    }
)

# Add context to existing memory
await client.edit_memory(
    memory_id="memory-456",
    updates={
        "text": "User prefers Python and TypeScript for development. Recently started learning Rust.",
        "topics": ["programming", "preferences", "languages", "rust"],
        "entities": ["Python", "TypeScript", "Rust"]
    }
)

Working Memory

# Store conversation context
conversation = {
    "messages": [
        {"role": "user", "content": "I'm planning a trip to Italy"},
        {"role": "assistant", "content": "That sounds exciting! What cities are you thinking of visiting?"},
        {"role": "user", "content": "Rome and Florence, maybe Venice too"}
    ],
    "memories": [
        {
            "text": "User is planning a trip to Italy, considering Rome, Florence, and Venice",
            "memory_type": "semantic",
            "topics": ["travel", "italy", "vacation"],
            "entities": ["Italy", "Rome", "Florence", "Venice"]
        }
    ]
}

await client.set_working_memory("session-123", conversation)

# Retrieve or create working memory
created, memory = await client.get_or_create_working_memory("session-123")
if created:
    print("Created new session")
else:
    print("Found existing session")
print(f"Session has {len(memory.messages)} messages")

Memory-Enhanced Conversations

Context Injection

The SDK provides a powerful memory_prompt method that automatically enriches your prompts with relevant context:

async def get_contextualized_response(user_message: str, session_id: str, user_id: str):
    # Get memory-enriched context
    context = await client.memory_prompt(
        query=user_message,
        session={
            "session_id": session_id,
            "user_id": user_id,
            "model_name": "gpt-4o"
        },
        long_term_search={
            "text": user_message,
            "limit": 5,
            "user_id": user_id
        }
    )

    # Send to LLM
    response = await openai_client.chat.completions.create(
        model="gpt-4o",
        messages=context.messages
    )

    return response.choices[0].message.content

Automatic Memory Storage

async def chat_with_auto_memory(message: str, session_id: str):
    # Get contextualized prompt
    context = await client.memory_prompt(
        query=message,
        session={"session_id": session_id, "model_name": "gpt-4o"}
    )

    # Generate response
    response = await openai_client.chat.completions.create(
        model="gpt-4o",
        messages=context.messages + [{"role": "user", "content": message}]
    )

    # Store the conversation
    conversation = {
        "messages": [
            {"role": "user", "content": message},
            {"role": "assistant", "content": response.choices[0].message.content}
        ]
    }

    await client.set_working_memory(session_id, conversation)

    return response.choices[0].message.content

Batch Operations

Bulk Memory Creation

# Process large datasets efficiently
async def import_user_data(user_data: list, user_id: str):
    batch_size = 50

    for i in range(0, len(user_data), batch_size):
        batch = user_data[i:i + batch_size]

        memories = [
            {
                "text": item["description"],
                "memory_type": "semantic",
                "topics": item.get("categories", []),
                "entities": item.get("entities", []),
                "user_id": user_id,
                "metadata": {"source": item["source"]}
            }
            for item in batch
        ]

        result = await client.create_long_term_memories(memories)
        print(f"Imported batch {i//batch_size + 1}, {len(result.memories)} memories")

Bulk Search Operations

# Search multiple queries efficiently
async def multi_search(queries: list[str], user_id: str):
    results = {}

    # Use asyncio.gather for concurrent searches
    search_tasks = [
        client.search_long_term_memory(
            text=query,
            user_id=user_id,
            limit=3
        )
        for query in queries
    ]

    search_results = await asyncio.gather(*search_tasks)

    for query, result in zip(queries, search_results):
        results[query] = [memory.text for memory in result.memories]

    return results

Error Handling

Robust Client Usage

from agent_memory_client import MemoryAPIClient, MemoryError
import asyncio
import logging

async def robust_memory_operation(client: MemoryAPIClient):
    try:
        # Attempt memory operation
        results = await client.search_long_term_memory(
            text="user preferences",
            limit=5
        )

        return results.memories

    except MemoryError as e:
        if e.status_code == 401:
            logging.error("Authentication failed - check API key")
        elif e.status_code == 429:
            logging.warning("Rate limited - waiting before retry")
            await asyncio.sleep(5)
            return await robust_memory_operation(client)
        else:
            logging.error(f"Memory API error: {e}")
            return []

    except Exception as e:
        logging.error(f"Unexpected error: {e}")
        return []

Connection Management

import httpx
from agent_memory_client import MemoryAPIClient

# Custom timeout and retry configuration
async with httpx.AsyncClient(
    timeout=30.0,
    limits=httpx.Limits(max_keepalive_connections=10, max_connections=20)
) as http_client:

    client = MemoryAPIClient(
        base_url="http://localhost:8000",
        http_client=http_client
    )

    # Perform operations
    results = await client.search_long_term_memory(text="query")

Advanced Features

Custom Tool Workflows

class CustomMemoryAgent:
    def __init__(self, memory_client: MemoryAPIClient):
        self.memory = memory_client

    async def intelligent_search(self, query: str, user_id: str):
        # Multi-stage search with refinement
        initial_results = await self.memory.search_long_term_memory(
            text=query,
            user_id=user_id,
            limit=20
        )

        if not initial_results.memories:
            # Try broader search
            return await self.memory.search_long_term_memory(
                text=query,
                limit=10
            )

        # Filter by relevance threshold
        relevant_memories = [
            m for m in initial_results.memories
            if m.relevance_score > 0.7
        ]

        return relevant_memories[:5]

    async def contextual_store(self, text: str, context: dict, user_id: str):
        # Extract topics and entities from context
        topics = context.get("topics", [])
        entities = context.get("entities", [])

        # Search for similar existing memories
        similar = await self.memory.search_long_term_memory(
            text=text,
            user_id=user_id,
            limit=3,
            min_relevance_score=0.8
        )

        if similar.memories:
            # Update existing memory instead of creating duplicate
            await self.memory.edit_memory(
                memory_id=similar.memories[0].id,
                updates={
                    "text": f"{similar.memories[0].text}. {text}",
                    "topics": list(set(similar.memories[0].topics + topics)),
                    "entities": list(set(similar.memories[0].entities + entities))
                }
            )
        else:
            # Create new memory
            await self.memory.create_long_term_memories([{
                "text": text,
                "memory_type": "semantic",
                "topics": topics,
                "entities": entities,
                "user_id": user_id
            }])

Performance Optimization

from functools import lru_cache
import asyncio

class OptimizedMemoryClient:
    def __init__(self, client: MemoryAPIClient):
        self.client = client
        self._search_cache = {}

    @lru_cache(maxsize=100)
    def _cache_key(self, text: str, user_id: str, limit: int) -> str:
        return f"{text}:{user_id}:{limit}"

    async def cached_search(self, text: str, user_id: str, limit: int = 5):
        cache_key = self._cache_key(text, user_id, limit)

        if cache_key in self._search_cache:
            return self._search_cache[cache_key]

        results = await self.client.search_long_term_memory(
            text=text,
            user_id=user_id,
            limit=limit
        )

        # Cache results for 5 minutes
        self._search_cache[cache_key] = results
        asyncio.create_task(self._expire_cache(cache_key, 300))

        return results

    async def _expire_cache(self, key: str, delay: int):
        await asyncio.sleep(delay)
        self._search_cache.pop(key, None)

Best Practices

1. Client Management

# Use a single client instance per application
class MemoryService:
    def __init__(self):
        self.client = MemoryAPIClient(
            base_url=os.getenv("MEMORY_SERVER_URL"),
            api_key=os.getenv("MEMORY_API_KEY")
        )

    async def close(self):
        await self.client.close()

# Singleton pattern
memory_service = MemoryService()

2. Memory Organization

# Use consistent naming patterns
async def create_user_memory(text: str, user_id: str, category: str):
    return await client.create_long_term_memories([{
        "text": text,
        "memory_type": "semantic",
        "topics": [category, "user-preference"],
        "user_id": user_id,
        "namespace": f"user:{user_id}:preferences"
    }])

3. Context Management

# Implement context-aware memory storage
async def store_conversation_memory(conversation: dict, session_id: str):
    # Extract key information
    important_facts = extract_facts(conversation)

    if important_facts:
        await client.create_long_term_memories([{
            "text": fact,
            "memory_type": "semantic",
            "session_id": session_id,
            "metadata": {"conversation_turn": i}
        } for i, fact in enumerate(important_facts)])

Configuration Reference

Environment Variables

# Client configuration
MEMORY_SERVER_URL=http://localhost:8000
MEMORY_API_KEY=your-api-token

# Connection settings
MEMORY_TIMEOUT=30
MEMORY_MAX_RETRIES=3

# Default user settings
DEFAULT_USER_ID=default-user
DEFAULT_NAMESPACE=production

Client Options

client = MemoryAPIClient(
    base_url="http://localhost:8000",
    api_key="optional-token",
    bearer_token="optional-jwt",
    timeout=30.0,
    max_retries=3,
    session_id="default-session",
    user_id="default-user",
    namespace="default",
    http_client=custom_httpx_client
)

The Python SDK makes it easy to add sophisticated memory capabilities to any AI application, with minimal setup and maximum flexibility. Use the tool integrations for LLM-driven memory, direct API calls for code-driven approaches, or combine both patterns for hybrid solutions.