|
1 | 1 | from app.db.weaviate.weaviate_client import get_client |
2 | 2 | import weaviate.classes.config as wc |
| 3 | + |
3 | 4 | def create_schema(client, name, properties): |
4 | 5 | client.collections.create( |
5 | 6 | name=name, |
6 | 7 | properties=properties, |
7 | | - vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), |
8 | | - generative_config=wc.Configure.Generative.openai() |
9 | 8 | ) |
10 | 9 | print(f"Created: {name}") |
11 | | -def create_user_profile_schema(client): |
12 | | - properties = [ |
13 | | - wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT), |
14 | | - wc.Property(name="profileSummary", data_type=wc.DataType.TEXT), |
15 | | - wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY), |
16 | | - wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY), |
17 | | - ] |
18 | | - create_schema(client, "weaviate_user_profile", properties) |
19 | | - |
20 | 10 |
|
21 | | -def create_code_chunk_schema(client): |
22 | | - properties = [ |
23 | | - wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT), |
24 | | - wc.Property(name="codeContent", data_type=wc.DataType.TEXT), |
25 | | - wc.Property(name="language", data_type=wc.DataType.TEXT), |
26 | | - wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY), |
27 | | - ] |
28 | | - create_schema(client, "weaviate_code_chunk", properties) |
29 | | - |
30 | | -def create_interaction_schema(client): |
| 11 | +def create_user_profile_schema(client): |
| 12 | + """ |
| 13 | + Create schema for WeaviateUserProfile model. |
| 14 | + Main vectorization will be on profile_text_for_embedding field. |
| 15 | + """ |
31 | 16 | properties = [ |
32 | | - wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT), |
33 | | - wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT), |
34 | | - wc.Property(name="platform", data_type=wc.DataType.TEXT), |
| 17 | + wc.Property(name="user_id", data_type=wc.DataType.TEXT), |
| 18 | + wc.Property(name="github_username", data_type=wc.DataType.TEXT), |
| 19 | + wc.Property(name="display_name", data_type=wc.DataType.TEXT), |
| 20 | + wc.Property(name="bio", data_type=wc.DataType.TEXT), |
| 21 | + wc.Property(name="location", data_type=wc.DataType.TEXT), |
| 22 | + wc.Property(name="repositories", data_type=wc.DataType.TEXT), # JSON string |
| 23 | + wc.Property(name="languages", data_type=wc.DataType.TEXT_ARRAY), |
35 | 24 | wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), |
| 25 | + wc.Property(name="followers_count", data_type=wc.DataType.INT), |
| 26 | + wc.Property(name="following_count", data_type=wc.DataType.INT), |
| 27 | + wc.Property(name="total_stars_received", data_type=wc.DataType.INT), |
| 28 | + wc.Property(name="total_forks", data_type=wc.DataType.INT), |
| 29 | + wc.Property(name="profile_text_for_embedding", data_type=wc.DataType.TEXT), |
| 30 | + wc.Property(name="last_updated", data_type=wc.DataType.DATE), |
36 | 31 | ] |
37 | | - create_schema(client, "weaviate_interaction", properties) |
| 32 | + create_schema(client, "weaviate_user_profile", properties) |
38 | 33 |
|
39 | 34 | def create_all_schemas(): |
| 35 | + """ |
| 36 | + Create only the user profile schema as per the updated model structure. |
| 37 | + """ |
40 | 38 | client = get_client() |
41 | | - existing_collections = client.collections.list_all() |
42 | | - if "weaviate_code_chunk" not in existing_collections: |
43 | | - create_code_chunk_schema(client) |
44 | | - if "weaviate_interaction" not in existing_collections: |
45 | | - create_interaction_schema(client) |
46 | | - if "weaviate_user_profile" not in existing_collections: |
47 | | - create_user_profile_schema(client) |
48 | | - print("✅ All schemas ensured.") |
| 39 | + create_user_profile_schema(client) |
| 40 | + client.close() |
| 41 | + print("✅ User profile schema created successfully.") |
0 commit comments