|
1 | 1 | from pydantic import BaseModel, Field |
2 | | -from typing import List |
| 2 | +from typing import List, Optional |
| 3 | +from datetime import datetime |
3 | 4 |
|
4 | | - |
5 | | -class WeaviateUserProfile(BaseModel): |
| 5 | +class WeaviateRepository(BaseModel): |
6 | 6 | """ |
7 | | - Represents a vectorized user profile for semantic search in Weaviate. |
| 7 | + Represents a single repostiory within WeaviateUserProfile. |
| 8 | + Helps in structuring the repository-specific data that contributes to the user's overall profile |
8 | 9 | """ |
9 | | - supabase_user_id: str = Field(..., alias="supabaseUserId") |
10 | | - profile_summary: str = Field(..., alias="profileSummary") |
11 | | - primary_languages: List[str] = Field(..., alias="primaryLanguages") |
12 | | - expertise_areas: List[str] = Field(..., alias="expertiseAreas") |
13 | | - embedding: List[float] = Field(..., description="384-dimensional vector") |
| 10 | + name: str = Field(..., description="The name of the repository.") |
| 11 | + description: Optional[str] = Field(None, description="The repository's description.") |
| 12 | + url: str = Field(..., description="The URL of the repository.") |
| 13 | + languages: List[str] = Field(..., description="The languages used in the repository.") |
| 14 | + stars: int = Field(0, description="The number of stars the repository has.") |
| 15 | + forks: int = Field(0, description="The number of forks the repository has.") |
14 | 16 |
|
15 | 17 |
|
16 | | -class WeaviateCodeChunk(BaseModel): |
| 18 | +class WeaviateUserProfile(BaseModel): |
17 | 19 | """ |
18 | | - Vectorized representation of code chunks stored in Weaviate. |
| 20 | + Represents a user's profile data to be stored and indexed in Weaviate. |
| 21 | + Enables semantic search capabilities to find users based on their profile data. |
19 | 22 | """ |
20 | | - supabase_chunk_id: str = Field(..., alias="supabaseChunkId") |
21 | | - code_content: str = Field(..., alias="codeContent") |
22 | | - language: str |
23 | | - function_names: List[str] = Field(..., alias="functionNames") |
24 | | - embedding: List[float] = Field(..., description="384-dimensional vector") |
| 23 | + user_id: str = Field(..., description="The unique identifier for the user, linking back to the Supabase 'users' table.") |
| 24 | + github_username: str = Field(..., description="The user's unique GitHub username.") |
| 25 | + display_name: Optional[str] = Field(None, description="User's display name.") |
| 26 | + bio: Optional[str] = Field(None, description="User's biography from their GitHub profile.") |
| 27 | + location: Optional[str] = Field(None, description="User's location.") |
25 | 28 |
|
| 29 | + repositories: List[WeaviateRepository] = Field( |
| 30 | + default_factory=list, description="List of repositories the user's repositories.") |
26 | 31 |
|
27 | | -class WeaviateInteraction(BaseModel): |
28 | | - """ |
29 | | - Vectorized interaction representation stored in Weaviate. |
30 | | - """ |
31 | | - supabase_interaction_id: str = Field(..., alias="supabaseInteractionId") |
32 | | - conversation_summary: str = Field(..., alias="conversationSummary") |
33 | | - platform: str |
34 | | - topics: List[str] |
35 | | - embedding: List[float] = Field(..., description="384-dimensional vector") |
| 32 | + languages: List[str] = Field(default_factory=list, |
| 33 | + description="A unique, aggregated list of all programming languages from the user's repositories.") |
| 34 | + topics: List[str] = Field(default_factory=list, |
| 35 | + description="A unique, aggregated list of all topics from the user's repositories.") |
| 36 | + |
| 37 | + followers_count: int = Field(0, description="Number of followers the user has on GitHub.") |
| 38 | + following_count: int = Field(0, description="Number of other users this user is following on GitHub.") |
| 39 | + total_stars_received: int = Field( |
| 40 | + 0, description="Total number of stars received across all of the user's owned repositories.") |
| 41 | + total_forks: int = Field(0, description="Total number of times the user's repositories have been forked.") |
| 42 | + |
| 43 | + profile_text_for_embedding: str = Field( |
| 44 | + ..., description="A synthesized text field combining bio, repository names, descriptions, languages, and topics for vectorization.") |
| 45 | + |
| 46 | + last_updated: datetime = Field(default_factory=datetime.now, |
| 47 | + description="The date and time the profile was last updated.") |
| 48 | + |
| 49 | + class Config: |
| 50 | + """ |
| 51 | + Pydantic model configuration. |
| 52 | + """ |
| 53 | + orm_mode = True |
| 54 | + schema_extra = { |
| 55 | + "example": { |
| 56 | + "user_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef", |
| 57 | + "github_username": "jane-dev", |
| 58 | + "display_name": "Jane Developer", |
| 59 | + "bio": "Creator of innovative open-source tools. Full-stack developer with a passion for Rust and WebAssembly.", |
| 60 | + "location": "Berlin, Germany", |
| 61 | + "repositories": [ |
| 62 | + { |
| 63 | + "name": "rust-web-framework", |
| 64 | + "description": "A high-performance web framework for Rust.", |
| 65 | + "languages": ["Rust", "TOML"], |
| 66 | + "topics": ["rust", "webdev", "performance", "framework"], |
| 67 | + "stars": 2500, |
| 68 | + "forks": 400 |
| 69 | + }, |
| 70 | + { |
| 71 | + "name": "data-viz-lib", |
| 72 | + "description": "A declarative data visualization library for JavaScript.", |
| 73 | + "languages": ["JavaScript", "TypeScript"], |
| 74 | + "topics": ["data-visualization", "d3", "charts"], |
| 75 | + "stars": 1200, |
| 76 | + "forks": 150 |
| 77 | + } |
| 78 | + ], |
| 79 | + "languages": ["Rust", "JavaScript", "TypeScript", "TOML"], |
| 80 | + "topics": ["rust", "webdev", "performance", "framework", "data-visualization", "d3", "charts"], |
| 81 | + "followers_count": 1800, |
| 82 | + "following_count": 250, |
| 83 | + "total_stars_received": 3700, |
| 84 | + "total_forks": 550, |
| 85 | + "profile_text_for_embedding": "Jane Developer, Creator of innovative open-source tools. Full-stack developer with a passion for Rust and WebAssembly. Repositories: rust-web-framework, A high-performance web framework for Rust. data-viz-lib, A declarative data visualization library for JavaScript. Languages: Rust, JavaScript, TypeScript. Topics: rust, webdev, performance, data-visualization.", |
| 86 | + "last_updated": "2025-06-23T12:21:00Z" |
| 87 | + } |
| 88 | + } |
0 commit comments