Skip to content

Commit f10c790

Browse files
committed
[feat]: weaviate db models alignment to support user indexing and search
1 parent 60262c6 commit f10c790

File tree

1 file changed

+78
-25
lines changed

1 file changed

+78
-25
lines changed
Lines changed: 78 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,88 @@
11
from pydantic import BaseModel, Field
2-
from typing import List
2+
from typing import List, Optional
3+
from datetime import datetime
34

4-
5-
class WeaviateUserProfile(BaseModel):
5+
class WeaviateRepository(BaseModel):
66
"""
7-
Represents a vectorized user profile for semantic search in Weaviate.
7+
Represents a single repostiory within WeaviateUserProfile.
8+
Helps in structuring the repository-specific data that contributes to the user's overall profile
89
"""
9-
supabase_user_id: str = Field(..., alias="supabaseUserId")
10-
profile_summary: str = Field(..., alias="profileSummary")
11-
primary_languages: List[str] = Field(..., alias="primaryLanguages")
12-
expertise_areas: List[str] = Field(..., alias="expertiseAreas")
13-
embedding: List[float] = Field(..., description="384-dimensional vector")
10+
name: str = Field(..., description="The name of the repository.")
11+
description: Optional[str] = Field(None, description="The repository's description.")
12+
url: str = Field(..., description="The URL of the repository.")
13+
languages: List[str] = Field(..., description="The languages used in the repository.")
14+
stars: int = Field(0, description="The number of stars the repository has.")
15+
forks: int = Field(0, description="The number of forks the repository has.")
1416

1517

16-
class WeaviateCodeChunk(BaseModel):
18+
class WeaviateUserProfile(BaseModel):
1719
"""
18-
Vectorized representation of code chunks stored in Weaviate.
20+
Represents a user's profile data to be stored and indexed in Weaviate.
21+
Enables semantic search capabilities to find users based on their profile data.
1922
"""
20-
supabase_chunk_id: str = Field(..., alias="supabaseChunkId")
21-
code_content: str = Field(..., alias="codeContent")
22-
language: str
23-
function_names: List[str] = Field(..., alias="functionNames")
24-
embedding: List[float] = Field(..., description="384-dimensional vector")
23+
user_id: str = Field(..., description="The unique identifier for the user, linking back to the Supabase 'users' table.")
24+
github_username: str = Field(..., description="The user's unique GitHub username.")
25+
display_name: Optional[str] = Field(None, description="User's display name.")
26+
bio: Optional[str] = Field(None, description="User's biography from their GitHub profile.")
27+
location: Optional[str] = Field(None, description="User's location.")
2528

29+
repositories: List[WeaviateRepository] = Field(
30+
default_factory=list, description="List of repositories the user's repositories.")
2631

27-
class WeaviateInteraction(BaseModel):
28-
"""
29-
Vectorized interaction representation stored in Weaviate.
30-
"""
31-
supabase_interaction_id: str = Field(..., alias="supabaseInteractionId")
32-
conversation_summary: str = Field(..., alias="conversationSummary")
33-
platform: str
34-
topics: List[str]
35-
embedding: List[float] = Field(..., description="384-dimensional vector")
32+
languages: List[str] = Field(default_factory=list,
33+
description="A unique, aggregated list of all programming languages from the user's repositories.")
34+
topics: List[str] = Field(default_factory=list,
35+
description="A unique, aggregated list of all topics from the user's repositories.")
36+
37+
followers_count: int = Field(0, description="Number of followers the user has on GitHub.")
38+
following_count: int = Field(0, description="Number of other users this user is following on GitHub.")
39+
total_stars_received: int = Field(
40+
0, description="Total number of stars received across all of the user's owned repositories.")
41+
total_forks: int = Field(0, description="Total number of times the user's repositories have been forked.")
42+
43+
profile_text_for_embedding: str = Field(
44+
..., description="A synthesized text field combining bio, repository names, descriptions, languages, and topics for vectorization.")
45+
46+
last_updated: datetime = Field(default_factory=datetime.now,
47+
description="The date and time the profile was last updated.")
48+
49+
class Config:
50+
"""
51+
Pydantic model configuration.
52+
"""
53+
orm_mode = True
54+
schema_extra = {
55+
"example": {
56+
"user_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef",
57+
"github_username": "jane-dev",
58+
"display_name": "Jane Developer",
59+
"bio": "Creator of innovative open-source tools. Full-stack developer with a passion for Rust and WebAssembly.",
60+
"location": "Berlin, Germany",
61+
"repositories": [
62+
{
63+
"name": "rust-web-framework",
64+
"description": "A high-performance web framework for Rust.",
65+
"languages": ["Rust", "TOML"],
66+
"topics": ["rust", "webdev", "performance", "framework"],
67+
"stars": 2500,
68+
"forks": 400
69+
},
70+
{
71+
"name": "data-viz-lib",
72+
"description": "A declarative data visualization library for JavaScript.",
73+
"languages": ["JavaScript", "TypeScript"],
74+
"topics": ["data-visualization", "d3", "charts"],
75+
"stars": 1200,
76+
"forks": 150
77+
}
78+
],
79+
"languages": ["Rust", "JavaScript", "TypeScript", "TOML"],
80+
"topics": ["rust", "webdev", "performance", "framework", "data-visualization", "d3", "charts"],
81+
"followers_count": 1800,
82+
"following_count": 250,
83+
"total_stars_received": 3700,
84+
"total_forks": 550,
85+
"profile_text_for_embedding": "Jane Developer, Creator of innovative open-source tools. Full-stack developer with a passion for Rust and WebAssembly. Repositories: rust-web-framework, A high-performance web framework for Rust. data-viz-lib, A declarative data visualization library for JavaScript. Languages: Rust, JavaScript, TypeScript. Topics: rust, webdev, performance, data-visualization.",
86+
"last_updated": "2025-06-23T12:21:00Z"
87+
}
88+
}

0 commit comments

Comments
 (0)