Skip to content

Commit 688826d

Browse files
Added an example for the jvector embedding store (#177)
Added an example for the jvector embedding store that was added to the langchain4j-community project.
1 parent e501914 commit 688826d

File tree

4 files changed

+232
-0
lines changed

4 files changed

+232
-0
lines changed

jvector-example/pom.xml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
7+
<groupId>dev.langchain4j</groupId>
8+
<artifactId>jvector-example</artifactId>
9+
<version>1.9.0-beta16-SNAPSHOT</version>
10+
11+
<properties>
12+
<maven.compiler.source>17</maven.compiler.source>
13+
<maven.compiler.target>17</maven.compiler.target>
14+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15+
</properties>
16+
17+
<dependencies>
18+
<dependency>
19+
<groupId>dev.langchain4j</groupId>
20+
<artifactId>langchain4j-community-jvector</artifactId>
21+
<version>${project.version}</version>
22+
</dependency>
23+
<dependency>
24+
<groupId>dev.langchain4j</groupId>
25+
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
26+
<version>1.8.0-beta15</version>
27+
</dependency>
28+
</dependencies>
29+
</project>
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import dev.langchain4j.community.store.embedding.jvector.JVectorEmbeddingStore;
2+
import dev.langchain4j.data.embedding.Embedding;
3+
import dev.langchain4j.data.segment.TextSegment;
4+
import dev.langchain4j.model.embedding.EmbeddingModel;
5+
import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
6+
import dev.langchain4j.store.embedding.EmbeddingMatch;
7+
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
8+
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
9+
import dev.langchain4j.store.embedding.EmbeddingStore;
10+
11+
import java.io.BufferedReader;
12+
import java.io.File;
13+
import java.io.InputStream;
14+
import java.io.InputStreamReader;
15+
import java.util.ArrayList;
16+
import java.util.List;
17+
import java.util.Random;
18+
19+
public class JVectorEmbeddingStoreExample {
20+
21+
private final static String TEST_DOCUMENT = "test-document.txt";
22+
public static final String TMP_JVECTOR_EMBEDDING_STORE = "/tmp/jvector-embedding-store";
23+
24+
public static void main(String[] args) {
25+
26+
File tempPath = new File(TMP_JVECTOR_EMBEDDING_STORE);
27+
try {
28+
// Create the default embedding model
29+
EmbeddingModel embeddingModel = new AllMiniLmL6V2QuantizedEmbeddingModel();
30+
31+
// Read all lines from the text file once
32+
if (JVectorEmbeddingStoreExample.class.getClassLoader().getResource(TEST_DOCUMENT) == null) {
33+
throw new RuntimeException("Test document not found: " + TEST_DOCUMENT);
34+
}
35+
List<String> lines = readLinesFromResource(TEST_DOCUMENT);
36+
System.out.println("Read " + lines.size() + " lines from " + TEST_DOCUMENT);
37+
System.out.println();
38+
39+
if (!tempPath.exists()) {
40+
boolean created = tempPath.mkdirs();
41+
if (!created) {
42+
throw new RuntimeException("Failed to create temporary directory: " + TMP_JVECTOR_EMBEDDING_STORE);
43+
}
44+
}
45+
File indexPath = new File(tempPath, "example-index");
46+
47+
// First store: JVector with maxDegree 16
48+
EmbeddingStore<TextSegment> jvectorStore1 = JVectorEmbeddingStore.builder()
49+
.dimension(384)
50+
.maxDegree(16)
51+
.build();
52+
53+
runWithStore(jvectorStore1, embeddingModel, lines, "JVectorStore(maxDegree=16)");
54+
55+
// Second store: JVector with a different configuration (maxDegree 8)
56+
EmbeddingStore<TextSegment> jvectorStore2 = JVectorEmbeddingStore.builder()
57+
.dimension(384)
58+
.maxDegree(8)
59+
.persistencePath(indexPath.getAbsolutePath())
60+
.build();
61+
62+
runWithStore(jvectorStore2, embeddingModel, lines, "JVectorStore(maxDegree=8)");
63+
} catch (Exception e) {
64+
System.err.println("Failed to run the example due to: " + e.getMessage());
65+
} finally {
66+
// Clean up the temporary index directory
67+
if (tempPath.exists() && tempPath.isDirectory()) {
68+
final File[] files = tempPath.listFiles();
69+
if (files != null) {
70+
for (File file : files) {
71+
if (!file.delete()) {
72+
System.err.println("Failed to delete file: " + file.getAbsolutePath());
73+
}
74+
}
75+
}
76+
}
77+
}
78+
}
79+
80+
/**
81+
* Runs the common workflow against the provided embedding store.
82+
*/
83+
private static void runWithStore(EmbeddingStore<TextSegment> embeddingStore,
84+
EmbeddingModel embeddingModel,
85+
List<String> lines,
86+
String storeName) {
87+
88+
long startTime = System.currentTimeMillis();
89+
System.out.println("=== Running with store: " + storeName + " ===");
90+
System.out.println("Adding embeddings to the store...");
91+
int added = 0;
92+
for (String line : lines) {
93+
if (!line.trim().isEmpty()) {
94+
TextSegment segment = TextSegment.from(line);
95+
Embedding embedding = embeddingModel.embed(segment).content();
96+
embeddingStore.add(embedding, segment);
97+
added++;
98+
}
99+
}
100+
System.out.println("Successfully added " + added + " embeddings to the store");
101+
System.out.println();
102+
103+
// Query the store with random lines from the file
104+
Random random = new Random();
105+
int numberOfQueries = 5;
106+
107+
System.out.println("Querying the embedding store with " + numberOfQueries + " random lines:");
108+
System.out.println("=========================================");
109+
110+
for (int i = 0; i < numberOfQueries; i++) {
111+
String randomLine = lines.get(random.nextInt(lines.size()));
112+
System.out.println("\nQuery " + (i + 1) + ": " + randomLine);
113+
114+
Embedding queryEmbedding = embeddingModel.embed(randomLine).content();
115+
116+
EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
117+
.queryEmbedding(queryEmbedding)
118+
.maxResults(3)
119+
.build();
120+
121+
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(searchRequest);
122+
List<EmbeddingMatch<TextSegment>> matches = result.matches();
123+
124+
System.out.println("Top 3 matches:");
125+
for (int j = 0; j < matches.size(); j++) {
126+
EmbeddingMatch<TextSegment> match = matches.get(j);
127+
System.out.printf(" %d. Score: %.4f - %s%n",
128+
j + 1,
129+
match.score(),
130+
match.embedded().text());
131+
}
132+
}
133+
134+
long endTime = System.currentTimeMillis();
135+
System.out.printf("=== Finished running with store: %s in %d msec. ===%n",
136+
storeName, (endTime - startTime));
137+
System.out.println();
138+
}
139+
140+
/**
141+
* Read all lines from a resource file
142+
*/
143+
private static List<String> readLinesFromResource(String resourceName) {
144+
List<String> lines = new ArrayList<>();
145+
try (InputStream inputStream = JVectorEmbeddingStoreExample.class
146+
.getClassLoader()
147+
.getResourceAsStream(resourceName)) {
148+
if (inputStream == null) {
149+
throw new RuntimeException("Resource not found: " + resourceName);
150+
}
151+
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
152+
String line;
153+
while ((line = reader.readLine()) != null) {
154+
lines.add(line);
155+
}
156+
}
157+
} catch (Exception e) {
158+
throw new RuntimeException("Failed to read resource file: " + resourceName, e);
159+
}
160+
return lines;
161+
}
162+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
The Earth completes one revolution around the Sun in approximately 365.25 days.
2+
Each year the Earth makes a full orbit around the Sun, taking about 365.25 days.
3+
Machine learning is a field of AI that builds models to learn patterns from data.
4+
ML focuses on creating algorithms that learn from examples and improve over time.
5+
The Pacific Ocean covers more area than any other ocean and is also the deepest.
6+
The Pacific is the biggest and deepest ocean on the planet, containing the deepest trenches.
7+
Photosynthesis enables plants to convert light energy into chemical energy and oxygen.
8+
Plants use photosynthesis to transform sunlight into glucose and release oxygen.
9+
The Great Wall of China stretches thousands of kilometers and is a famous historic structure.
10+
China's Great Wall is a renowned fortified structure built over centuries across the landscape.
11+
Python is a versatile high-level language commonly used for data analysis and scripting.
12+
Python's simplicity and libraries make it a preferred language for data science tasks.
13+
Mount Everest stands at about 8,848 meters and is the highest peak on Earth.
14+
At roughly 8,848 meters above sea level, Mount Everest is the planet's tallest mountain.
15+
Shakespeare authored plays such as Hamlet and many other notable works.
16+
William Shakespeare wrote famous plays including Hamlet, Othello, and Macbeth.
17+
DNA stores the genetic code that guides the growth and functioning of living organisms.
18+
The sequence of nucleotides in DNA encodes the hereditary information for organisms.
19+
The Eiffel Tower in Paris was finished in 1889 and measures around 330 meters tall.
20+
Completed in 1889, the Eiffel Tower rises to approximately 330 meters above Paris.
21+
Artificial neural networks are inspired by the structure and function of biological neurons.
22+
Neural network models imitate networks of neurons to process information and learn.
23+
The Amazon rainforest is estimated to produce about twenty percent of the world's oxygen.
24+
About one-fifth of global oxygen production is attributed to the vast Amazon rainforest.
25+
Albert Einstein introduced the theory of relativity in the early twentieth century.
26+
Einstein developed both special and general relativity during the early 1900s.
27+
James Naismith invented the game of basketball in 1891 while in Massachusetts.
28+
Basketball was created by James Naismith in 1891 at a Massachusetts training school.
29+
The adult human brain contains on the order of 86 billion neurons forming complex networks.
30+
Researchers estimate the human brain has roughly 86 billion nerve cells.
31+
Coffee ranks among the most widely consumed beverages worldwide each day.
32+
Globally, coffee is one of the leading beverages consumed by millions daily.
33+
The speed of light in a vacuum is approximately 299,792 kilometers per second.
34+
In vacuum, light travels at about 299,792 km/s, a fundamental physical constant.
35+
Ancient Egyptians constructed pyramids to serve as monumental tombs for their pharaohs.
36+
The pyramids were built by the ancient Egyptians as burial complexes for their rulers.
37+
Quantum computing harnesses quantum phenomena like superposition to perform computation.
38+
Quantum computers use entanglement and superposition to tackle certain computational problems.
39+
The Mona Lisa was painted by Leonardo da Vinci during the sixteenth century.
40+
Leonardo da Vinci created the Mona Lisa in the 1500s, one of the most famous paintings.

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
<module>payara-micro-example</module>
5555
<module>gpullama3.java-example</module>
5656
<module>watsonx-ai-examples</module>
57+
<module>jvector-example</module>
5758
<module>yugabytedb-example</module>
5859
</modules>
5960

0 commit comments

Comments
 (0)