diff --git a/IN_MEMORY_STORAGE.md b/IN_MEMORY_STORAGE.md new file mode 100644 index 0000000..27c7810 --- /dev/null +++ b/IN_MEMORY_STORAGE.md @@ -0,0 +1,162 @@ +# In-Memory Heap Storage + +## Overview + +This repository now includes an in-memory heap storage implementation that stores V8 heap snapshots in RAM rather than persisting them to disk or cloud storage. This is useful for: + +- **Testing and Development**: Fast iteration without filesystem I/O overhead +- **Temporary Sessions**: Sessions that don't require persistence beyond process lifetime +- **Performance**: Faster read/write operations compared to disk or S3 +- **Simplicity**: No external dependencies or configuration required + +## Implementation Details + +### Architecture + +The `InMemoryHeapStorage` struct uses: +- `HashMap>` to store heap snapshots keyed by heap name +- `Arc>` for thread-safe concurrent access +- Tokio's async RwLock for non-blocking operations + +### Features + +- **Thread-safe**: Multiple concurrent readers with exclusive writer access +- **Clone-able**: Shares the same underlying storage across clones +- **Zero-configuration**: Works out of the box with no setup +- **Rich API**: Additional utility methods beyond the basic `put`/`get` interface + +### API + +Basic HeapStorage trait methods: +- `put(name: &str, data: &[u8]) -> Result<(), String>` - Store heap snapshot +- `get(name: &str) -> Result, String>` - Retrieve heap snapshot + +Additional utility methods: +- `len() -> usize` - Get number of stored items +- `is_empty() -> bool` - Check if storage is empty +- `clear()` - Remove all stored data +- `contains_key(name: &str) -> bool` - Check if key exists +- `remove(name: &str) -> Option>` - Remove and return specific item +- `keys() -> Vec` - Get all stored keys + +## Usage + +### Command Line + +Run the server with in-memory storage: + +```bash +# With stdio transport (default) +cargo run --bin server -- --in-memory + +# With HTTP transport +cargo run --bin server -- --in-memory --http-port 8080 + +# With SSE transport +cargo run --bin server -- --in-memory --sse-port 8080 +``` + +### Programmatic Usage + +```rust +use mcp::heap_storage::{InMemoryHeapStorage, HeapStorage}; + +// Create new in-memory storage +let storage = InMemoryHeapStorage::new(); + +// Store data +storage.put("heap1", b"snapshot data").await.unwrap(); + +// Retrieve data +let data = storage.get("heap1").await.unwrap(); + +// Check existence +if storage.contains_key("heap1").await { + println!("Found heap1"); +} + +// Get all keys +let keys = storage.keys().await; +println!("Stored heaps: {:?}", keys); + +// Clear all data +storage.clear().await; +``` + +### Integration with AnyHeapStorage + +```rust +use mcp::heap_storage::{AnyHeapStorage, InMemoryHeapStorage}; + +let storage = AnyHeapStorage::InMemory(InMemoryHeapStorage::new()); + +// Use through the HeapStorage trait +storage.put("key", b"value").await.unwrap(); +let value = storage.get("key").await.unwrap(); +``` + +## Comparison with Other Storage Backends + +| Feature | In-Memory | File | S3 | +|---------|-----------|------|-----| +| Persistence | Process lifetime only | Permanent | Permanent | +| Performance | Fastest | Fast | Slower (network) | +| Setup Required | None | Directory path | AWS credentials + bucket | +| Memory Usage | High (all in RAM) | Low | Low | +| Distributed Access | No | Shared filesystem only | Yes | +| Best For | Testing, dev, temp sessions | Single server production | Multi-server production | + +## Configuration Options + +The in-memory storage conflicts with other storage options: +- Cannot be used with `--s3-bucket` +- Cannot be used with `--directory-path` +- Cannot be used with `--stateless` + +## Limitations + +1. **No Persistence**: All data is lost when the process terminates +2. **Memory Bounded**: Limited by available RAM +3. **Single Process**: Cannot be shared across multiple server instances +4. **No Backup**: No automatic snapshot or recovery mechanism + +## Testing + +The implementation includes comprehensive unit tests: + +```bash +# Run all tests +cargo test -p server + +# Run only heap_storage tests +cargo test -p server heap_storage +``` + +Test coverage includes: +- Basic put/get operations +- Error handling for missing keys +- Utility method functionality +- Clone behavior (shared storage) +- Integration with AnyHeapStorage enum + +## When to Use + +**Use in-memory storage when:** +- Developing and testing locally +- Running ephemeral workloads +- Performance is critical and persistence is not +- You want zero configuration overhead + +**Don't use in-memory storage when:** +- You need data to survive process restarts +- Running in production with important data +- Memory is constrained +- Multiple servers need to share state + +## Future Enhancements + +Possible improvements: +- Memory usage limits with LRU eviction +- Periodic snapshots to disk +- Memory metrics and monitoring +- TTL (time-to-live) for entries diff --git a/server/src/main.rs b/server/src/main.rs index eef37f4..200a054 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -13,7 +13,7 @@ use tokio_util::sync::CancellationToken; mod mcp; use mcp::{StatelessService, StatefulService, initialize_v8}; -use mcp::heap_storage::{AnyHeapStorage, S3HeapStorage, FileHeapStorage}; +use mcp::heap_storage::{AnyHeapStorage, S3HeapStorage, FileHeapStorage, InMemoryHeapStorage}; /// Command line arguments for configuring heap storage #[derive(Parser, Debug)] @@ -21,15 +21,19 @@ use mcp::heap_storage::{AnyHeapStorage, S3HeapStorage, FileHeapStorage}; struct Cli { /// S3 bucket name (required if --use-s3) - #[arg(long, conflicts_with_all = ["directory_path", "stateless"])] + #[arg(long, conflicts_with_all = ["directory_path", "stateless", "in_memory"])] s3_bucket: Option, /// Directory path for filesystem storage (required if --use-filesystem) - #[arg(long, conflicts_with_all = ["s3_bucket", "stateless"])] + #[arg(long, conflicts_with_all = ["s3_bucket", "stateless", "in_memory"])] directory_path: Option, + /// Use in-memory storage (heap state persists only during process lifetime) + #[arg(long, conflicts_with_all = ["s3_bucket", "directory_path", "stateless"])] + in_memory: bool, + /// Run in stateless mode - no heap snapshots are saved or loaded - #[arg(long, conflicts_with_all = ["s3_bucket", "directory_path"])] + #[arg(long, conflicts_with_all = ["s3_bucket", "directory_path", "in_memory"])] stateless: bool, /// HTTP port to listen on (if not specified, uses stdio transport) @@ -77,12 +81,18 @@ async fn main() -> Result<()> { } } else { // Stateful mode - with heap persistence - let heap_storage = if let Some(bucket) = cli.s3_bucket { + let heap_storage = if cli.in_memory { + tracing::info!("Using in-memory heap storage"); + AnyHeapStorage::InMemory(InMemoryHeapStorage::new()) + } else if let Some(bucket) = cli.s3_bucket { + tracing::info!("Using S3 heap storage with bucket: {}", bucket); AnyHeapStorage::S3(S3HeapStorage::new(bucket).await) } else if let Some(dir) = cli.directory_path { + tracing::info!("Using file heap storage with directory: {}", dir); AnyHeapStorage::File(FileHeapStorage::new(dir)) } else { // default to file /tmp/mcp-v8-heaps + tracing::info!("Using default file heap storage: /tmp/mcp-v8-heaps"); AnyHeapStorage::File(FileHeapStorage::new("/tmp/mcp-v8-heaps")) }; diff --git a/server/src/mcp/heap_storage.rs b/server/src/mcp/heap_storage.rs index 26245e6..b73a47f 100644 --- a/server/src/mcp/heap_storage.rs +++ b/server/src/mcp/heap_storage.rs @@ -1,9 +1,10 @@ use std::path::PathBuf; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; use aws_sdk_s3::Client as S3Client; use aws_sdk_s3::ByteStream; - use aws_config; -use std::sync::Arc; use async_trait::async_trait; #[async_trait] @@ -98,11 +99,80 @@ impl HeapStorage for S3HeapStorage { } } +/// In-memory heap storage using a HashMap protected by RwLock +/// This is useful for testing and development, or when persistence is not required +#[derive(Clone)] +pub struct InMemoryHeapStorage { + store: Arc>>>, +} + +impl InMemoryHeapStorage { + pub fn new() -> Self { + Self { + store: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Get the number of items currently stored + pub async fn len(&self) -> usize { + self.store.read().await.len() + } + + /// Check if the storage is empty + pub async fn is_empty(&self) -> bool { + self.store.read().await.is_empty() + } + + /// Clear all stored data + pub async fn clear(&self) { + self.store.write().await.clear(); + } + + /// Check if a key exists + pub async fn contains_key(&self, name: &str) -> bool { + self.store.read().await.contains_key(name) + } + + /// Remove a specific key + pub async fn remove(&self, name: &str) -> Option> { + self.store.write().await.remove(name) + } + + /// Get all keys currently stored + pub async fn keys(&self) -> Vec { + self.store.read().await.keys().cloned().collect() + } +} + +impl Default for InMemoryHeapStorage { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl HeapStorage for InMemoryHeapStorage { + async fn put(&self, name: &str, data: &[u8]) -> Result<(), String> { + let mut store = self.store.write().await; + store.insert(name.to_string(), data.to_vec()); + Ok(()) + } + + async fn get(&self, name: &str) -> Result, String> { + let store = self.store.read().await; + store + .get(name) + .cloned() + .ok_or_else(|| format!("Key '{}' not found in memory storage", name)) + } +} + #[derive(Clone)] pub enum AnyHeapStorage { #[allow(dead_code)] File(FileHeapStorage), S3(S3HeapStorage), + InMemory(InMemoryHeapStorage), } @@ -114,12 +184,96 @@ impl HeapStorage for AnyHeapStorage { match self { AnyHeapStorage::File(inner) => inner.put(name, data).await, AnyHeapStorage::S3(inner) => inner.put(name, data).await, + AnyHeapStorage::InMemory(inner) => inner.put(name, data).await, } } async fn get(&self, name: &str) -> Result, String> { match self { AnyHeapStorage::File(inner) => inner.get(name).await, AnyHeapStorage::S3(inner) => inner.get(name).await, + AnyHeapStorage::InMemory(inner) => inner.get(name).await, } } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_in_memory_storage_basic_operations() { + let storage = InMemoryHeapStorage::new(); + + // Test put and get + let data = b"hello world"; + storage.put("test_key", data).await.unwrap(); + let retrieved = storage.get("test_key").await.unwrap(); + assert_eq!(retrieved, data); + + // Test get non-existent key + let result = storage.get("non_existent").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_in_memory_storage_operations() { + let storage = InMemoryHeapStorage::new(); + + // Test empty + assert!(storage.is_empty().await); + assert_eq!(storage.len().await, 0); + + // Add data + storage.put("key1", b"value1").await.unwrap(); + storage.put("key2", b"value2").await.unwrap(); + + // Test len and is_empty + assert!(!storage.is_empty().await); + assert_eq!(storage.len().await, 2); + + // Test contains_key + assert!(storage.contains_key("key1").await); + assert!(!storage.contains_key("key3").await); + + // Test keys + let keys = storage.keys().await; + assert_eq!(keys.len(), 2); + assert!(keys.contains(&"key1".to_string())); + assert!(keys.contains(&"key2".to_string())); + + // Test remove + let removed = storage.remove("key1").await; + assert!(removed.is_some()); + assert_eq!(removed.unwrap(), b"value1"); + assert_eq!(storage.len().await, 1); + + // Test clear + storage.clear().await; + assert!(storage.is_empty().await); + } + + #[tokio::test] + async fn test_in_memory_storage_clone() { + let storage1 = InMemoryHeapStorage::new(); + storage1.put("key", b"value").await.unwrap(); + + // Clone should share the same underlying storage + let storage2 = storage1.clone(); + let retrieved = storage2.get("key").await.unwrap(); + assert_eq!(retrieved, b"value"); + + // Changes through clone should be visible + storage2.put("key2", b"value2").await.unwrap(); + assert!(storage1.contains_key("key2").await); + } + + #[tokio::test] + async fn test_any_heap_storage_in_memory() { + let storage = AnyHeapStorage::InMemory(InMemoryHeapStorage::new()); + + // Test through trait interface + storage.put("test", b"data").await.unwrap(); + let retrieved = storage.get("test").await.unwrap(); + assert_eq!(retrieved, b"data"); + } } \ No newline at end of file