Skip to content

Commit 6070611

Browse files
committed
[chore]: add weaviate db instance creation and population scripts
1 parent f10c790 commit 6070611

File tree

2 files changed

+199
-284
lines changed

2 files changed

+199
-284
lines changed
Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,41 @@
11
from app.db.weaviate.weaviate_client import get_client
22
import weaviate.classes.config as wc
3+
34
def create_schema(client, name, properties):
45
client.collections.create(
56
name=name,
67
properties=properties,
7-
vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(),
8-
generative_config=wc.Configure.Generative.openai()
98
)
109
print(f"Created: {name}")
11-
def create_user_profile_schema(client):
12-
properties = [
13-
wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT),
14-
wc.Property(name="profileSummary", data_type=wc.DataType.TEXT),
15-
wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY),
16-
wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY),
17-
]
18-
create_schema(client, "weaviate_user_profile", properties)
19-
2010

21-
def create_code_chunk_schema(client):
22-
properties = [
23-
wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT),
24-
wc.Property(name="codeContent", data_type=wc.DataType.TEXT),
25-
wc.Property(name="language", data_type=wc.DataType.TEXT),
26-
wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY),
27-
]
28-
create_schema(client, "weaviate_code_chunk", properties)
29-
30-
def create_interaction_schema(client):
11+
def create_user_profile_schema(client):
12+
"""
13+
Create schema for WeaviateUserProfile model.
14+
Main vectorization will be on profile_text_for_embedding field.
15+
"""
3116
properties = [
32-
wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT),
33-
wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT),
34-
wc.Property(name="platform", data_type=wc.DataType.TEXT),
17+
wc.Property(name="user_id", data_type=wc.DataType.TEXT),
18+
wc.Property(name="github_username", data_type=wc.DataType.TEXT),
19+
wc.Property(name="display_name", data_type=wc.DataType.TEXT),
20+
wc.Property(name="bio", data_type=wc.DataType.TEXT),
21+
wc.Property(name="location", data_type=wc.DataType.TEXT),
22+
wc.Property(name="repositories", data_type=wc.DataType.TEXT), # JSON string
23+
wc.Property(name="languages", data_type=wc.DataType.TEXT_ARRAY),
3524
wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY),
25+
wc.Property(name="followers_count", data_type=wc.DataType.INT),
26+
wc.Property(name="following_count", data_type=wc.DataType.INT),
27+
wc.Property(name="total_stars_received", data_type=wc.DataType.INT),
28+
wc.Property(name="total_forks", data_type=wc.DataType.INT),
29+
wc.Property(name="profile_text_for_embedding", data_type=wc.DataType.TEXT),
30+
wc.Property(name="last_updated", data_type=wc.DataType.DATE),
3631
]
37-
create_schema(client, "weaviate_interaction", properties)
32+
create_schema(client, "weaviate_user_profile", properties)
3833

3934
def create_all_schemas():
35+
"""
36+
Create only the user profile schema as per the updated model structure.
37+
"""
4038
client = get_client()
41-
existing_collections = client.collections.list_all()
42-
if "weaviate_code_chunk" not in existing_collections:
43-
create_code_chunk_schema(client)
44-
if "weaviate_interaction" not in existing_collections:
45-
create_interaction_schema(client)
46-
if "weaviate_user_profile" not in existing_collections:
47-
create_user_profile_schema(client)
48-
print("✅ All schemas ensured.")
39+
create_user_profile_schema(client)
40+
client.close()
41+
print("✅ User profile schema created successfully.")

0 commit comments

Comments
 (0)