···88- 3.10+ and complete typing (T | None preferred over Optional[T] and list[T] over typing.List[T])
99- use prefer functional over OOP
1010- keep implementation details private and functions pure
1111+- never use `pytest.mark.asyncio`, its unnecessary
11121213## Project Structure
1314
-7
evals/test_basic_responses.py
···11"""Test phi's basic response behavior."""
2233-import pytest
44-53from bot.agent import Response
647588-@pytest.mark.asyncio
96async def test_phi_responds_to_philosophical_question(phi_agent, evaluate_response):
107 """Test that phi engages meaningfully with philosophical questions."""
118 agent = phi_agent
···3936 )
403741384242-@pytest.mark.asyncio
4339async def test_phi_ignores_spam(phi_agent):
4440 """Test that phi appropriately ignores spam-like content."""
4541 agent = phi_agent
···5753 assert response.reason is not None
585459556060-@pytest.mark.asyncio
6156async def test_phi_maintains_thread_context(phi_agent, evaluate_response):
6257 """Test that phi uses thread context appropriately."""
6358 agent = phi_agent
···8984 )
908591869292-@pytest.mark.asyncio
9387async def test_phi_respects_character_limit(phi_agent):
9488 """Test that phi's responses fit Bluesky's 300 character limit."""
9589 agent = phi_agent
···108102 )
109103110104111111-@pytest.mark.asyncio
112105async def test_phi_handles_casual_greeting(phi_agent, evaluate_response):
113106 """Test that phi responds appropriately to casual greetings."""
114107 agent = phi_agent
+46-50
evals/test_memory_integration.py
···2233import pytest
4455-from bot.agent import PhiAgent
65from bot.config import Settings
76from bot.memory import MemoryType, NamespaceMemory
87981010-@pytest.mark.asyncio
1111-async def test_phi_retrieves_episodic_memory(settings):
1212- """Test that phi can retrieve and use episodic memories."""
99+@pytest.fixture
1010+def memory_settings():
1111+ """Check if memory keys are available."""
1212+ settings = Settings()
1313 if not all([settings.turbopuffer_api_key, settings.openai_api_key, settings.anthropic_api_key]):
1414- pytest.skip("Requires TurboPuffer, OpenAI, and Anthropic API keys in .env")
1414+ pytest.skip("Requires TURBOPUFFER_API_KEY, OPENAI_API_KEY, and ANTHROPIC_API_KEY")
1515+ return settings
15161616- # Create memory system
1717- memory = NamespaceMemory(api_key=settings.turbopuffer_api_key)
18171919- # Store a memory about a user
2020- await memory.store_user_memory(
2121- "alice.bsky",
2222- "Alice mentioned she's working on a PhD in neuroscience",
2323- MemoryType.USER_FACT,
1818+async def test_core_memory_integration(memory_settings, phi_agent, evaluate_response):
1919+ """Test that phi uses core memories in responses."""
2020+ memory = NamespaceMemory(api_key=memory_settings.turbopuffer_api_key)
2121+2222+ # Store a core memory
2323+ await memory.store_core_memory(
2424+ label="test_interaction_rule",
2525+ content="When users mention birds, always acknowledge the beauty of murmuration patterns",
2626+ memory_type=MemoryType.GUIDELINE,
2427 )
25282626- # Create agent
2727- agent = PhiAgent()
2828- agent.memory = memory
2929+ # Override agent's memory with our test memory
3030+ phi_agent.memory = memory
29313030- # Process a mention that should trigger memory retrieval
3131- response = await agent.process_mention(
3232- mention_text="what do you remember about me?",
3333- author_handle="alice.bsky",
3232+ # Ask about birds
3333+ response = await phi_agent.process_mention(
3434+ mention_text="I saw a huge flock of starlings today",
3535+ author_handle="test.user",
3436 thread_context="No previous messages in this thread.",
3535- thread_uri="at://test/thread/memory1",
3737+ thread_uri="at://test/thread/1",
3638 )
37393840 if response.action == "reply":
3939- assert response.text is not None
4040- # Should reference the neuroscience PhD in the response
4141- assert (
4242- "neuroscience" in response.text.lower()
4343- or "phd" in response.text.lower()
4444- or "working on" in response.text.lower()
4545- ), "Response should reference stored memory about Alice"
4141+ await evaluate_response(
4242+ "Does the response acknowledge or reference murmuration patterns?",
4343+ response.text,
4444+ )
464547464848-@pytest.mark.asyncio
4949-async def test_phi_stores_conversation_in_memory(settings):
5050- """Test that phi stores interactions in episodic memory."""
5151- if not all([settings.turbopuffer_api_key, settings.openai_api_key, settings.anthropic_api_key]):
5252- pytest.skip("Requires TurboPuffer, OpenAI, and Anthropic API keys in .env")
4747+async def test_user_memory_integration(memory_settings, phi_agent, evaluate_response):
4848+ """Test that phi uses user-specific memories in responses."""
4949+ memory = NamespaceMemory(api_key=memory_settings.turbopuffer_api_key)
53505454- memory = NamespaceMemory(api_key=settings.turbopuffer_api_key)
5151+ # Store a memory about a user
5252+ await memory.store_user_memory(
5353+ handle="alice.test",
5454+ content="Alice is researching swarm intelligence in biological systems",
5555+ memory_type=MemoryType.USER_FACT,
5656+ )
55575656- agent = PhiAgent()
5757- agent.memory = memory
5858+ # Override agent's memory
5959+ phi_agent.memory = memory
58605959- # Have a conversation
6060- response = await agent.process_mention(
6161- mention_text="I'm really interested in phenomenology",
6262- author_handle="bob.bsky",
6161+ # User asks a question
6262+ response = await phi_agent.process_mention(
6363+ mention_text="what do you remember about my research?",
6464+ author_handle="alice.test",
6365 thread_context="No previous messages in this thread.",
6464- thread_uri="at://test/thread/memory2",
6666+ thread_uri="at://test/thread/2",
6567 )
66686769 if response.action == "reply":
6868- # Verify memories were stored
6969- memories = await memory.get_user_memories("bob.bsky", limit=10)
7070-7171- assert len(memories) > 0, "Should have stored conversation in memory"
7272-7373- # Check that both user's message and bot's response were stored
7474- memory_texts = [m.content for m in memories]
7575- assert any(
7676- "phenomenology" in text.lower() for text in memory_texts
7777- ), "Should store user's message about phenomenology"
7070+ await evaluate_response(
7171+ "Does the response reference Alice's research on swarm intelligence or biological systems?",
7272+ response.text,
7373+ )