|
| 1 | +import dev.langchain4j.community.store.embedding.jvector.JVectorEmbeddingStore; |
| 2 | +import dev.langchain4j.data.embedding.Embedding; |
| 3 | +import dev.langchain4j.data.segment.TextSegment; |
| 4 | +import dev.langchain4j.model.embedding.EmbeddingModel; |
| 5 | +import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel; |
| 6 | +import dev.langchain4j.store.embedding.EmbeddingMatch; |
| 7 | +import dev.langchain4j.store.embedding.EmbeddingSearchRequest; |
| 8 | +import dev.langchain4j.store.embedding.EmbeddingSearchResult; |
| 9 | +import dev.langchain4j.store.embedding.EmbeddingStore; |
| 10 | + |
| 11 | +import java.io.BufferedReader; |
| 12 | +import java.io.File; |
| 13 | +import java.io.InputStream; |
| 14 | +import java.io.InputStreamReader; |
| 15 | +import java.util.ArrayList; |
| 16 | +import java.util.List; |
| 17 | +import java.util.Random; |
| 18 | + |
| 19 | +public class JVectorEmbeddingStoreExample { |
| 20 | + |
| 21 | + private final static String TEST_DOCUMENT = "test-document.txt"; |
| 22 | + public static final String TMP_JVECTOR_EMBEDDING_STORE = "/tmp/jvector-embedding-store"; |
| 23 | + |
| 24 | + public static void main(String[] args) { |
| 25 | + |
| 26 | + File tempPath = new File(TMP_JVECTOR_EMBEDDING_STORE); |
| 27 | + try { |
| 28 | + // Create the default embedding model |
| 29 | + EmbeddingModel embeddingModel = new AllMiniLmL6V2QuantizedEmbeddingModel(); |
| 30 | + |
| 31 | + // Read all lines from the text file once |
| 32 | + if (JVectorEmbeddingStoreExample.class.getClassLoader().getResource(TEST_DOCUMENT) == null) { |
| 33 | + throw new RuntimeException("Test document not found: " + TEST_DOCUMENT); |
| 34 | + } |
| 35 | + List<String> lines = readLinesFromResource(TEST_DOCUMENT); |
| 36 | + System.out.println("Read " + lines.size() + " lines from " + TEST_DOCUMENT); |
| 37 | + System.out.println(); |
| 38 | + |
| 39 | + if (!tempPath.exists()) { |
| 40 | + boolean created = tempPath.mkdirs(); |
| 41 | + if (!created) { |
| 42 | + throw new RuntimeException("Failed to create temporary directory: " + TMP_JVECTOR_EMBEDDING_STORE); |
| 43 | + } |
| 44 | + } |
| 45 | + File indexPath = new File(tempPath, "example-index"); |
| 46 | + |
| 47 | + // First store: JVector with maxDegree 16 |
| 48 | + EmbeddingStore<TextSegment> jvectorStore1 = JVectorEmbeddingStore.builder() |
| 49 | + .dimension(384) |
| 50 | + .maxDegree(16) |
| 51 | + .build(); |
| 52 | + |
| 53 | + runWithStore(jvectorStore1, embeddingModel, lines, "JVectorStore(maxDegree=16)"); |
| 54 | + |
| 55 | + // Second store: JVector with a different configuration (maxDegree 8) |
| 56 | + EmbeddingStore<TextSegment> jvectorStore2 = JVectorEmbeddingStore.builder() |
| 57 | + .dimension(384) |
| 58 | + .maxDegree(8) |
| 59 | + .persistencePath(indexPath.getAbsolutePath()) |
| 60 | + .build(); |
| 61 | + |
| 62 | + runWithStore(jvectorStore2, embeddingModel, lines, "JVectorStore(maxDegree=8)"); |
| 63 | + } catch (Exception e) { |
| 64 | + System.err.println("Failed to run the example due to: " + e.getMessage()); |
| 65 | + } finally { |
| 66 | + // Clean up the temporary index directory |
| 67 | + if (tempPath.exists() && tempPath.isDirectory()) { |
| 68 | + final File[] files = tempPath.listFiles(); |
| 69 | + if (files != null) { |
| 70 | + for (File file : files) { |
| 71 | + if (!file.delete()) { |
| 72 | + System.err.println("Failed to delete file: " + file.getAbsolutePath()); |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + } |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + /** |
| 81 | + * Runs the common workflow against the provided embedding store. |
| 82 | + */ |
| 83 | + private static void runWithStore(EmbeddingStore<TextSegment> embeddingStore, |
| 84 | + EmbeddingModel embeddingModel, |
| 85 | + List<String> lines, |
| 86 | + String storeName) { |
| 87 | + |
| 88 | + long startTime = System.currentTimeMillis(); |
| 89 | + System.out.println("=== Running with store: " + storeName + " ==="); |
| 90 | + System.out.println("Adding embeddings to the store..."); |
| 91 | + int added = 0; |
| 92 | + for (String line : lines) { |
| 93 | + if (!line.trim().isEmpty()) { |
| 94 | + TextSegment segment = TextSegment.from(line); |
| 95 | + Embedding embedding = embeddingModel.embed(segment).content(); |
| 96 | + embeddingStore.add(embedding, segment); |
| 97 | + added++; |
| 98 | + } |
| 99 | + } |
| 100 | + System.out.println("Successfully added " + added + " embeddings to the store"); |
| 101 | + System.out.println(); |
| 102 | + |
| 103 | + // Query the store with random lines from the file |
| 104 | + Random random = new Random(); |
| 105 | + int numberOfQueries = 5; |
| 106 | + |
| 107 | + System.out.println("Querying the embedding store with " + numberOfQueries + " random lines:"); |
| 108 | + System.out.println("========================================="); |
| 109 | + |
| 110 | + for (int i = 0; i < numberOfQueries; i++) { |
| 111 | + String randomLine = lines.get(random.nextInt(lines.size())); |
| 112 | + System.out.println("\nQuery " + (i + 1) + ": " + randomLine); |
| 113 | + |
| 114 | + Embedding queryEmbedding = embeddingModel.embed(randomLine).content(); |
| 115 | + |
| 116 | + EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder() |
| 117 | + .queryEmbedding(queryEmbedding) |
| 118 | + .maxResults(3) |
| 119 | + .build(); |
| 120 | + |
| 121 | + EmbeddingSearchResult<TextSegment> result = embeddingStore.search(searchRequest); |
| 122 | + List<EmbeddingMatch<TextSegment>> matches = result.matches(); |
| 123 | + |
| 124 | + System.out.println("Top 3 matches:"); |
| 125 | + for (int j = 0; j < matches.size(); j++) { |
| 126 | + EmbeddingMatch<TextSegment> match = matches.get(j); |
| 127 | + System.out.printf(" %d. Score: %.4f - %s%n", |
| 128 | + j + 1, |
| 129 | + match.score(), |
| 130 | + match.embedded().text()); |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + long endTime = System.currentTimeMillis(); |
| 135 | + System.out.printf("=== Finished running with store: %s in %d msec. ===%n", |
| 136 | + storeName, (endTime - startTime)); |
| 137 | + System.out.println(); |
| 138 | + } |
| 139 | + |
| 140 | + /** |
| 141 | + * Read all lines from a resource file |
| 142 | + */ |
| 143 | + private static List<String> readLinesFromResource(String resourceName) { |
| 144 | + List<String> lines = new ArrayList<>(); |
| 145 | + try (InputStream inputStream = JVectorEmbeddingStoreExample.class |
| 146 | + .getClassLoader() |
| 147 | + .getResourceAsStream(resourceName)) { |
| 148 | + if (inputStream == null) { |
| 149 | + throw new RuntimeException("Resource not found: " + resourceName); |
| 150 | + } |
| 151 | + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { |
| 152 | + String line; |
| 153 | + while ((line = reader.readLine()) != null) { |
| 154 | + lines.add(line); |
| 155 | + } |
| 156 | + } |
| 157 | + } catch (Exception e) { |
| 158 | + throw new RuntimeException("Failed to read resource file: " + resourceName, e); |
| 159 | + } |
| 160 | + return lines; |
| 161 | + } |
| 162 | +} |
0 commit comments