diff --git a/Project.toml b/Project.toml
index 6b08173..f3e6fc3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,13 +3,29 @@ uuid = "94e1309d-ccf4-42de-905f-515f1d7b1cae"
 authors = ["Dilum Aluthge", "contributors"]
 version = "2.0.0"
 
+[deps]
+FeatureTransforms = "8fd68953-04b8-4117-ac19-158bf6de9782"
+InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+OMOPCommonDataModel = "ba65db9e-6590-4054-ab8a-101ed9124986"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+
 [weakdeps]
+DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
+DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
 [extensions]
 HealthBaseDrWatsonExt = "DrWatson"
+HealthBaseOMOPCDMExt = ["DataFrames", "OMOPCommonDataModel", "InlineStrings", "Serialization", "Dates", "FeatureTransforms", "DBInterface", "DuckDB"]
 
 [compat]
+Dates = "1.10"
+PrettyTables = "2.4.0"
+Tables = "1.12.1"
 julia = "1.10"
 
 [extras]
diff --git a/assets/version_info b/assets/version_info
new file mode 100644
index 0000000..62b6504
Binary files /dev/null and b/assets/version_info differ
diff --git a/docs/Project.toml b/docs/Project.toml
index 2b39e2a..75bbdbb 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,21 @@
 [deps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
+DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
+FeatureTransforms = "8fd68953-04b8-4117-ac19-158bf6de9782"
 HealthBase = "94e1309d-ccf4-42de-905f-515f1d7b1cae"
 LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
+OMOPCommonDataModel = "ba65db9e-6590-4054-ab8a-101ed9124986"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+
+[compat]
+Documenter = "1"
+DocumenterTools = "0.1.10"
+HealthBase = "1, 2"
+LiveServer = "1"
+julia = "1.10"
+DuckDB = "1"
+FeatureTransforms = "0.4.0"
+OMOPCommonDataModel = "0.1"
+Tables = "1.12.1"
diff --git a/docs/make.jl b/docs/make.jl
index b98964a..c398233 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,11 +1,21 @@
 using HealthBase
 using Documenter
+using Tables
+using DataFrames
+using OMOPCommonDataModel
+using FeatureTransforms
+using DuckDB
 
-DocMeta.setdocmeta!(HealthBase, :DocTestSetup, :(using HealthBase); recursive = true)
+DocMeta.setdocmeta!(HealthBase, :DocTestSetup, :(using HealthBase, Tables); recursive = true)
 
 makedocs(;
-    modules = [HealthBase],
-    authors = "Jacob S. Zelko, Dilum Aluthge and contributors",
+modules = [
+    HealthBase,
+    isdefined(Base, :get_extension) ?
+        Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt) : HealthBase.HealthBaseOMOPCDMExt
+    ],
+    checkdocs = :none,
+    authors = "Jacob S. Zelko, Dilum Aluthge and contributors", 
     repo = "https://github.com/JuliaHealth/HealthBase.jl/blob/{commit}{path}#{line}",
     sitename = "HealthBase.jl",
     format = Documenter.HTML(;
@@ -15,7 +25,18 @@ makedocs(;
     ),
     pages = [
         "Home" => "index.md",
-        "Workflow Guides" => ["observational_template_workflow.md"],
+        "Quickstart" => "quickstart.md",
+
+        "Workflow Guides" => [
+            "Observational Template Workflow" => "observational_template_workflow.md",
+            "OMOP CDM Workflow" => "OMOPCDMWorkflow.md",
+        ],
+
+        "HealthTable System" => [
+            "HealthTable: General Tables.jl Interface" => "HealthTableGeneral.md",
+            "HealthTable: OMOP CDM Support" => "HealthTableOMOPCDM.md",
+            "HealthTable: Preprocessing Functions" => "HealthTablePreprocessing.md",
+        ],
         "API" => "api.md",
     ],
     # TODO: Update and configure doctests before next release
diff --git a/docs/src/HealthTableGeneral.md b/docs/src/HealthTableGeneral.md
new file mode 100644
index 0000000..5ff3c3f
--- /dev/null
+++ b/docs/src/HealthTableGeneral.md
@@ -0,0 +1,27 @@
+# HealthTable: Tables.jl Interface (General)
+
+## The `HealthTable` Struct
+
+The core of the interface is the `HealthTable` struct.
+
+```@docs
+HealthBase.HealthTable
+```
+
+## `Tables.jl` API Implementation
+
+The `HealthTable` wrapper types will implement key `Tables.jl` methods:
+
+`HealthTable` implements the `Tables.jl` interface to ensure compatibility with the Julia data ecosystem:
+
+```@docs
+Tables.istable(::Type{<:HealthBase.HealthTable})
+Tables.rowaccess(::Type{<:HealthBase.HealthTable})
+Tables.rows(::HealthBase.HealthTable)
+Tables.columnaccess(::Type{<:HealthBase.HealthTable})
+Tables.columns(::HealthBase.HealthTable)
+Tables.schema(::HealthBase.HealthTable)
+Tables.materializer(::Type{<:HealthBase.HealthTable})
+```
+
+Source: https://tables.juliadata.org/stable/implementing-the-interface/
diff --git a/docs/src/HealthTableOMOPCDM.md b/docs/src/HealthTableOMOPCDM.md
new file mode 100644
index 0000000..57d144c
--- /dev/null
+++ b/docs/src/HealthTableOMOPCDM.md
@@ -0,0 +1,35 @@
+# OMOP CDM Support for HealthTable
+
+## Core Goals & Features
+
+The `HealthTable` interface in `HealthBase.jl` is designed to make working with OMOP CDM data in Julia easy, robust, and compatible with the `Tables.jl` ecosystem. The key features include:
+
+- **Schema-Aware Validation**: Instead of just wrapping your data, `HealthTable` actively validates it against the official OMOP CDM specification using `OMOPCommonDataModel.jl`. This includes:
+    - **Column Type Enforcement**: Verifies that column types in the input `DataFrame` match the official OMOP schema (e.g., `person_id` is `Int64`, `condition_start_date` is `Date`).
+    - **Clear Error Reporting**: If mismatches exist, the constructor returns detailed messages about all invalid columns or can emit warnings if type enforcement is disabled.
+    - **Metadata Attachment**: Attaches OMOP metadata (like `cdmDatatype`, `standardConcept`, etc.) directly to each validated column.
+    
+- **Preprocessing Utilities**: Built-in tools for data preparation include:
+    - `one_hot_encode`: One-hot encodes categorical variables using `FeatureTransforms.jl`.
+    - `apply_vocabulary_compression`: Groups rare categorical values under a shared `"Other"` label.
+    - `map_concepts`: Maps concept IDs to human-readable concept names using a DuckDB-backed `concept` table.
+    - `map_concepts!`: An in-place variant of concept mapping that modifies the existing table.
+
+- **Tables.jl Compatibility**: The `HealthTable` type implements the full `Tables.jl` interface so it can be used with any downstream package in the Julia data ecosystem.
+
+- **JuliaHealth Integration**: Designed to interoperate seamlessly with current and future JuliaHealth tools and projects.
+
+- **Extensible Foundation**: The core architecture is extensible future support could include streaming, direct DuckDB views, or remote OMOP datasets.
+
+
+## `Tables.jl` Interface Sketch
+
+The `HealthTable` type is the main interface for working with OMOP CDM tables. You construct it by passing in a `DataFrame` and optionally specifying a CDM version. The constructor will validate the schema and attach metadata. The resulting object:
+
+- Is a wrapper over the validated DataFrame (`ht.source`),
+- Provides schema-aware access to data,
+- Can be used anywhere a `Tables.jl`-compatible table is expected.
+
+This eliminates the need for a separate wrapping step the constructor itself ensures conformance and returns a ready-to-use tabular object.
+
+In future extensions, similar wrappers could be created for other data sources, such as database queries or streaming sources. These types would implement the same `Tables.jl` interface to support composable workflows.
\ No newline at end of file
diff --git a/docs/src/HealthTablePreprocessing.md b/docs/src/HealthTablePreprocessing.md
new file mode 100644
index 0000000..ca631f4
--- /dev/null
+++ b/docs/src/HealthTablePreprocessing.md
@@ -0,0 +1,37 @@
+# HealthTable: Preprocessing Functions
+
+This page documents the preprocessing and transformation functions available for `HealthTable` objects when working with OMOP CDM data. These functions are provided by the OMOP CDM extension and enable data preparation workflows for machine learning and analysis.
+
+## One-Hot Encoding
+
+Transform categorical variables into binary indicator columns suitable for machine learning algorithms.
+
+```@docs
+HealthBase.one_hot_encode
+```
+
+## Vocabulary Compression
+
+Reduce the dimensionality of categorical variables by grouping infrequent levels under a common label.
+
+```@docs
+HealthBase.apply_vocabulary_compression
+```
+
+## Concept Translation
+
+### Concept Mapping (Immutable)
+
+Map OMOP concept IDs to human-readable concept names using the OMOP vocabulary tables, returning a new `HealthTable`.
+
+```@docs
+HealthBase.map_concepts
+```
+
+### Concept Mapping (In-Place)
+
+In-place version of concept mapping that modifies the original `HealthTable` directly for memory efficiency.
+
+```@docs
+HealthBase.map_concepts!
+```
diff --git a/docs/src/OMOPCDMWorkflow.md b/docs/src/OMOPCDMWorkflow.md
new file mode 100644
index 0000000..be13ae2
--- /dev/null
+++ b/docs/src/OMOPCDMWorkflow.md
@@ -0,0 +1,76 @@
+# OMOP CDM Workflow with HealthTable
+
+## Typical Workflow
+
+The envisioned process for working with OMOP CDM data using the `HealthBase.jl` components typically follows these steps:
+
+1. **Data Loading**  
+   Raw data is loaded into a suitable tabular structure, most commonly a `DataFrame`.
+
+2. **Validation and Wrapping with `HealthTable`**  
+   The raw `DataFrame` is then wrapped using `HealthBase.HealthTable`. This function takes the `DataFrame` and uses the attached OMOP CDM version (e.g., "v5.4.1") to validate its structure and column types against the OMOP CDM schema.
+
+   - It checks if the column types are compatible with the expected OMOP CDM types (from `OMOPCommonDataModel.jl`).
+   - If `disable_type_enforcement = false`, it will throw errors on mismatches or attempt safe conversions.
+   - It attaches metadata to columns indicating their OMOP CDM types.
+   - The result is a `HealthTable` instance that wraps the validated `DataFrame` and exposes the `Tables.jl` interface.
+
+3. **Interacting via `Tables.jl`**  
+   Once wrapped, the `HealthTable` instance can be seamlessly used with any `Tables.jl`-compatible tools and standard `Tables.jl` functions.
+
+4. **Applying Preprocessing Utilities**  
+   After wrapping, you can apply preprocessing steps essential for analysis or modeling. These include:
+
+   - One-hot encoding
+   - Handling of high-cardinality categorical variables
+   - Concept mapping utilities
+
+   These utilities usually return a modified `HealthTable` or a materialized `DataFrame` ready for downstream use.
+
+## Example Usage
+
+```julia
+using DataFrames, OMOPCommonDataModel, InlineStrings, Serialization, Dates, FeatureTransforms, DBInterface, DuckDB
+using HealthBase
+
+# Assume 'condition_occurrence_df' is a DataFrame loaded from a CSV/database
+condition_occurrence_df = DataFrame(
+    condition_occurrence_id = [1, 2, 3],
+    person_id = [101, 102, 101],
+    condition_concept_id = [201826, 433736, 317009],
+    condition_start_date = [Date(2010,1,1), Date(2012,5,10), Date(2011,3,15)]
+    # ... other fields
+)
+
+# Validate and wrap the DataFrame with HealthTable
+ht_conditions = HealthTable(condition_occurrence_df; omop_cdm_version="v5.4.1")
+
+# 1. Schema Inspection
+sch = Tables.schema(ht_conditions)
+println("Schema Names: ", sch.names)
+println("Schema Types: ", sch.types)
+# This should output the names and types from the validated DataFrame
+
+# 2. Iteration (Rows)
+for row in Tables.rows(ht_conditions)
+    # 'row' is a Tables.Row, with fields matching the OMOP schema
+    println("Person ID: $(row.person_id), Condition: $(row.condition_concept_id)")
+end
+
+# 3. Integration with other packages (example: MLJ.jl)
+# 4. Materialization
+# DataFrame(ht_conditions)
+```
+
+## Preprocessing and Utilities
+
+Preprocessing utilities can operate on `HealthTable` objects (or their materialized versions), leveraging the `Tables.jl` interface and schema awareness derived via `Tables.schema`.
+
+Examples include:
+
+- `one_hot_encode(ht::HealthTable, column_symbol::Symbol; drop_original=true)`
+- `apply_vocabulary_compression(ht::HealthTable, column_symbol::Symbol, mapping_dict::Dict)`
+- `map_concepts(ht::HealthTable, column_symbol::Symbol, concept_map::AbstractDict)`
+- `map_concepts!(ht::HealthTable, column_symbol::Symbol, concept_map::AbstractDict)` *(in-place version)*
+
+These functions follow the principle of user-triggered, optional transformations configurable via keyword arguments.
diff --git a/docs/src/api.md b/docs/src/api.md
index df5138f..b5f4b98 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -9,4 +9,12 @@ CurrentModule = HealthBase
 
 ```@autodocs
 Modules = [HealthBase]
+Filter = t -> !(t in [HealthBase.HealthTable,
+                     Base.getproperty(Tables, :columns),
+                     Base.getproperty(Tables, :rows),
+                     Base.getproperty(Tables, :schema),
+                     Base.getproperty(Tables, :istable),
+                     Base.getproperty(Tables, :rowaccess),
+                     Base.getproperty(Tables, :columnaccess),
+                     Base.getproperty(Tables, :materializer)])
 ```
diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md
new file mode 100644
index 0000000..ebe547d
--- /dev/null
+++ b/docs/src/quickstart.md
@@ -0,0 +1,158 @@
+# Quickstart
+
+Welcome to the **Quickstart** guide for [`HealthBase.jl`](https://github.com/JuliaHealth/HealthBase.jl)!  
+This guide walks you through setting up your Julia environment, creating example OMOP CDM data, validating it, and applying preprocessing steps using the `HealthTable` system.
+
+## Getting Started
+
+### Launch Julia and Enter Your Project Environment
+
+To get started:
+
+1. Open your terminal or Julia REPL.
+2. Navigate to your project folder (where `Project.toml` is located):
+
+```sh
+cd path/to/your/project
+```
+
+3. Activate the project:
+
+```julia
+julia --project=.
+```
+
+4. (Optional for docs) For working on documentation:
+
+```sh
+julia --project=docs
+```
+
+## 1. Load Packages
+
+Before loading `HealthBase`, you must first load some **trigger packages**.  
+These packages enable HealthBase's extensions, which power important features like type validation and concept mapping.
+
+> ⚠️ **Important:** Load the following packages **before** `using HealthBase`.  
+> Otherwise, some functions may not be available due to missing extensions.
+
+```julia
+# First, load the trigger packages
+using DataFrames, OMOPCommonDataModel, InlineStrings, Serialization, Dates, FeatureTransforms, DBInterface, DuckDB
+
+# Then, load HealthBase
+using HealthBase
+```
+
+## 2. Create Example DataFrames
+
+We'll create two `DataFrame`s:
+
+- `good_df` - a minimal, valid slice of the OMOP _person_ table.
+- `wrong_df` - intentionally invalid (wrong types & extra column) so you can see the constructor’s validation in action.
+
+```julia
+good_df = DataFrame(
+    person_id = 1:6,
+    gender_concept_id = [8507, 8507, 8532, 8532, 8507, 8532],
+    year_of_birth = [1980, 1995, 1990, 1975, 1988, 2001],
+    race_concept_id = [8527, 8515, 8527, 8516, 8527, 8516]
+)
+
+# Invalid DataFrame to test validation
+wrong_df = DataFrame(
+    person_id = ["1", "2"],            # Should be Int64
+    gender_concept_id = [8507, 8532],
+    year_of_birth = [1990, 1985],
+    race_concept_id = [8527, 8516],
+    extra_col = [true, false],         # Extra column not in the OMOP schema
+)
+
+ht = HealthTable(good_df; omop_cdm_version="v5.4.1")
+
+# OMOP CDM version metadata
+metadata(ht.source, "omop_cdm_version")
+
+# Will give column-specific metadata
+colmetadata(ht.source, :gender_concept_id)
+
+# This will throw an error (strict enforcement)
+ht = HealthTable(wrong_df; omop_cdm_version="v5.4.1", disable_type_enforcement = false)
+
+# If you want to *load anyway* and just receive warnings, disable type enforcement:
+ht_relaxed = HealthTable(wrong_df; omop_cdm_version="v5.4.1", disable_type_enforcement = true)
+```
+
+## 3. Preprocessing Pipeline
+
+Now, we'll apply a series of transformations to clean and prepare the data.
+
+### Mapping Concepts
+
+Convert concept codes (e.g., gender ID) into readable or binary columns using a DuckDB connection.
+
+```julia
+conn = DBInterface.connect(DuckDB.DB, "synthea_1M_3YR.duckdb")
+
+# Single column, auto-suffixed column name (gender_concept_id_mapped)
+ht_mapped = map_concepts(ht, :gender_concept_id, conn; schema = "dbt_synthea_dev")
+
+# Multiple columns, custom new column names
+ht_mapped2 = map_concepts(ht, [:gender_concept_id, :race_concept_id], conn; new_cols = ["gender", "race"], schema = "dbt_synthea_dev", drop_original=true)
+
+# In-place variant
+map_concepts!(ht, [:gender_concept_id], conn; schema = "dbt_synthea_dev")
+```
+
+### Manual Concept Mapping (Without DB)
+
+Sometimes, you may want to map concept IDs using a custom dictionary instead of querying the database.
+
+```julia
+# Define custom mapping manually
+custom_map = Dict(8507 => "Male", 8532 => "Female")
+
+# Option 1: Add a new column using `Base.map`
+ht.source.gender_label = map(x -> get(custom_map, x, "Unknown"), ht.source.gender_concept_id)
+
+# Option 2: Use `Base.map!` with a new destination vector
+gender_labels = Vector{String}(undef, length(ht.source.gender_concept_id))
+map!(x -> get(custom_map, x, "Unknown"), gender_labels, ht.source.gender_concept_id)
+ht.source.gender_label = gender_labels
+```
+
+### Compress sparse categories
+
+Group rare values into an "Other" category so they don’t overwhelm your model.
+
+```julia
+ht_compressed = apply_vocabulary_compression(ht_mapped; cols = [:race_concept_id], min_freq = 2, other_label = "Other")
+```
+
+### One-hot encode categorical columns
+
+Convert categorical codes into binary indicator columns (true/false).
+
+```julia
+ht_ohe = one_hot_encode(ht_compressed; cols=[:gender_concept_id, :race_concept_id])
+```
+
+### For Developers: Interactive Use in the REPL
+
+When working interactively in the REPL during development:
+
+- Always load the **trigger packages first**
+- Then load `HealthBase`
+- Only after that, use extension functions like `one_hot_encode`, `map_concepts`, etc.
+
+```julia
+# Correct load order for extensions to work:
+using DataFrames, OMOPCommonDataModel, InlineStrings, Serialization, Dates, FeatureTransforms, DBInterface, DuckDB
+using HealthBase
+
+# Now this will work:
+# ht_ohe = one_hot_encode(ht; cols=[:gender_concept_id])
+```
+
+Happy experimenting with `HealthBase.jl`! 🎉  
+Feel free to explore more advanced workflows in the other guide sections.
diff --git a/ext/HealthBaseOMOPCDMExt.jl b/ext/HealthBaseOMOPCDMExt.jl
new file mode 100644
index 0000000..c961302
--- /dev/null
+++ b/ext/HealthBaseOMOPCDMExt.jl
@@ -0,0 +1,452 @@
+module HealthBaseOMOPCDMExt
+
+using HealthBase
+using DataFrames
+using OMOPCommonDataModel
+using Serialization
+using InlineStrings
+using Dates
+import FeatureTransforms: 
+    OneHotEncoding, apply_append
+using DuckDB               
+using DBInterface: execute
+
+# NOTE: In the future, replace this with OMOP CDM version info directly from OMOPCommonDataModel.jl dependencies.
+const OMOPCDM_VERSIONS = deserialize(joinpath(@__DIR__, "..", "assets", "version_info"))
+
+# Mapping OMOP CDM datatypes to Julia types
+const DATATYPE_MAP = Dict(
+    "integer" => Int64, "Integer" => Int64, "bigint" => Int64,
+    "float" => Float64,
+    "date" => Date, "datetime" => DateTime,
+    "varchar(1)" => String, "varchar(2)" => String, "varchar(3)" => String,
+    "varchar(9)" => String, "varchar(10)" => String, "varchar(20)" => String,
+    "varchar(25)" => String, "varchar(50)" => String, "varchar(80)" => String,
+    "varchar(250)" => String, "varchar(255)" => String, "varchar(1000)" => String,
+    "varchar(2000)" => String, "varchar(MAX)" => String
+)
+
+function __init__()
+    @info "OMOP CDM extension for HealthBase has been loaded!"
+end
+
+"""
+    HealthTable(df::DataFrame; omop_cdm_version=nothing, disable_type_enforcement=false, collect_errors=true)
+
+Constructs a `HealthTable` for an OMOP CDM dataset by validating the given `DataFrame`.
+
+This constructor validates the `DataFrame` against the OMOP CDM schema for the specified version (if not provided, takes default version "v5.4.0").
+It ensures that:
+- all column names exist in the OMOP schema,
+- each column's data type matches the expected type from the schema,
+- appropriate metadata is attached to each column based on the OMOP CDM field definition.
+
+If any mismatches are found, a comprehensive error (or warning, depending on settings) will be 
+raised to help users correct their data. Once validated, the table is wrapped into a schema-aware 
+`HealthTable`, suitable for downstream use.
+
+## Arguments
+- `df::DataFrame`: The `DataFrame` to wrap. It should contain columns corresponding to an OMOP CDM table.
+
+## Keyword Arguments
+- `omop_cdm_version::Union{Nothing,String}=nothing`: Optional. Pass a specific version or leave `nothing` to auto-detect from the DataFrame metadata (falls back to "v5.4.0").
+- `disable_type_enforcement::Bool=false`: If `true`, type mismatches will emit a single comprehensive warning instead of throwing an error.
+- `collect_errors::Bool=true`: If `false`, the constructor will throw an error immediately upon finding the first column with a type mismatch. If `true` (the default), it will collect all errors and report them in a single message.
+
+## Returns
+- `HealthTable`: A new `HealthTable` instance with validated data and attached metadata.
+
+## Examples
+
+1. Loading a DataFrame from scratch:
+```julia
+using DataFrames, OMOPCommonDataModel, InlineStrings, Serialization, Dates, FeatureTransforms, DBInterface, DuckDB
+using HealthBase
+
+person_df = DataFrame(
+    person_id = 1:6,
+    gender_concept_id = [8507, 8507, 8532, 8532, 8507, 8532],
+    year_of_birth = [1980, 1995, 1990, 1975, 1988, 2001],
+    race_concept_id = [8527, 8515, 8527, 8516, 8527, 8516]
+)
+ht = HealthTable(person_df; omop_cdm_version="v5.4.0")
+```
+
+2. Loading a DataFrame from a database query:
+```julia
+using DBInterface, DuckDB, DataFrames, HealthBase
+# db = DuckDB.DB("synthea.duckdb") # Example database file
+# person_df = DBInterface.execute(db, "SELECT * FROM person") |> DataFrame
+# ht = HealthTable(person_df; omop_cdm_version="v5.4.0")
+```
+
+3. Accessing column metadata:
+```julia
+# After constructing ht as above:
+colnames = names(ht.source)
+coltypes = eltype.(eachcol(ht.source))
+# OMOP metadata can be accessed from ht or its source columns if attached
+```
+
+4. Quick-fail/warning for bad data:
+You can control how strictly HealthTable enforces schema validation:
+
+```julia
+# Fail immediately on first mismatch
+ht = HealthTable(df; collect_errors = false)
+
+# Collect all mismatches and throw a combined error
+ht = HealthTable(df; collect_errors = true)
+
+# Only warn on mismatches; allows proceeding (use with caution)
+ht = HealthTable(df; disable_type_enforcement = true)
+```
+Use disable_type_enforcement=true if you're exploring or cleaning data but for modeling or analysis, validated types are strongly recommended.
+"""
+function HealthBase.HealthTable(
+    df::DataFrame; 
+    omop_cdm_version::String="v5.4.0", 
+    disable_type_enforcement=false, 
+    collect_errors=true
+)
+    if !haskey(OMOPCDM_VERSIONS, omop_cdm_version)
+        throw(ArgumentError("OMOP CDM version '$(omop_cdm_version)' is not supported. Available versions: $(keys(OMOPCDM_VERSIONS))"))
+    end
+
+    omop_fields = OMOPCDM_VERSIONS[omop_cdm_version][:fields]
+    @assert !isempty(omop_fields) "OMOP CDM version $(omop_cdm_version) has no registered fields."
+    failed_columns = Vector{NamedTuple{(:colname, :type, :expected), Tuple{String, Any, Any}}}()
+    extra_columns = String[]
+
+    for col in names(df)
+        col_symbol = Symbol(col)
+        
+        if !haskey(omop_fields, col_symbol)
+            push!(extra_columns, col)
+            continue
+        end
+
+        fieldinfo = omop_fields[col_symbol]
+        actual_type = eltype(df[!, col_symbol])
+
+        if !haskey(fieldinfo, :cdmDatatype)
+            if !collect_errors
+                throw(ArgumentError("Column '$(col)' is missing :cdmDatatype information in the schema."))
+            end
+            push!(failed_columns, (colname=col, type=actual_type, expected="<missing from schema>"))
+        else
+            expected_string = fieldinfo[:cdmDatatype]
+
+            if !haskey(DATATYPE_MAP, expected_string)
+                push!(failed_columns, (colname=col, type=actual_type, expected="Unrecognized OMOP datatype: $(expected_string)"))
+            else
+                expected_type = DATATYPE_MAP[expected_string]
+
+                if !(actual_type <: Union{expected_type, Missing})
+                    if !collect_errors
+                        throw(ArgumentError("Column '$(col)' has type $(actual_type), but expected a subtype of $(expected_type)."))
+                    end
+                    push!(failed_columns, (colname=col, type=actual_type, expected=expected_type))
+                end
+            end
+
+            for (key, val) in fieldinfo
+                if !ismissing(val)
+                    colmetadata!(df, col_symbol, String(key), string(val))
+                end
+            end
+        end
+    end
+        
+    validation_msgs = String[]
+
+    if !isempty(failed_columns)
+        error_details = join(["Column '$(err.colname)': has type $(err.type), expected $(err.expected)" for err in failed_columns], "\n")
+        push!(validation_msgs, "OMOP CDM type validation failed for the following columns:\n" * error_details)
+    end
+
+    if !isempty(validation_msgs)
+        full_message = join(validation_msgs, "\n\n") * "\n"
+        if disable_type_enforcement
+            @warn full_message * "\nType enforcement is disabled. Unexpected behavior may occur."
+        else
+            throw(ArgumentError(full_message))
+        end
+    end
+
+    DataFrames.metadata!(df, "omop_cdm_version", omop_cdm_version)
+
+    return HealthBase.HealthTable{typeof(df)}(df)
+end
+
+"""
+    one_hot_encode(ht::HealthTable; cols, drop_original=true, return_features_only=false)
+
+One-hot encode the categorical columns in `ht` using **FeatureTransforms.jl**.
+
+For every requested column the function appends Boolean indicator columns — one per
+unique (non-missing) level. New columns are named `col_value`, e.g. `gender_concept_id_8507`.
+
+Boolean source columns are detected and skipped automatically with a warning.
+
+# Arguments
+- `ht::HealthTable`: Table to transform (schema-aware).
+
+# Keyword Arguments
+- `cols::Vector{Symbol}`: Categorical columns to encode.
+- `drop_original::Bool=true`: Drop the source columns after encoding.
+- `return_features_only::Bool=false`: If `true` return a **DataFrame** containing only the
+  encoded data; if `false` wrap the result in a `HealthTable` with
+  `disable_type_enforcement=true` (because the output is no longer standard OMOP CDM).
+
+# Returns
+- `DataFrame` or `HealthTable` depending on `return_features_only`.
+
+# Example
+```julia
+ht_ohe = one_hot_encode(ht; cols = [:gender_concept_id, :race_concept_id])
+X = one_hot_encode(ht; cols = [:gender_concept_id], return_features_only = true) # ML features
+```
+"""
+function HealthBase.one_hot_encode(
+    ht::HealthTable;
+    cols::Vector{Symbol},
+    drop_original::Bool = true,
+    return_features_only::Bool = false
+)
+    df = copy(ht.source)
+    missing = setdiff(cols, Symbol.(names(df)))
+    @assert isempty(missing) "Columns $(missing) not found."
+
+    for col in cols
+        if eltype(df[!, col]) <: Bool
+            @warn "Column $col is already Boolean; skipping one-hot."
+            continue
+        end
+
+        cats = unique(skipmissing(df[!, col]))
+        enc = OneHotEncoding(cats)
+        header = Symbol.(string(col, "_", c) for c in cats)
+        df = apply_append(df, enc; cols=[col], header=header)
+    end
+
+    drop_original && select!(df, Not(cols))
+
+    return return_features_only ? df : HealthBase.HealthTable{typeof(df)}(df)
+end
+
+"""
+    map_concepts(ht::HealthTable, col::Symbol, new_col::String, conn::DuckDB.DB; drop_original::Bool = false, concept_table::String = "concept", schema::String = "main")
+
+Map concept IDs in a column to their corresponding concept names using the OMOP `concept` table. Only direct mappings using concept IDs are supported.
+
+
+# Arguments
+- `ht::HealthTable`: Input OMOP data table.
+- `cols::Union{Symbol, Vector{Symbol}}`: Column(s) containing concept IDs.
+- `conn::DuckDB.DB`: Database connection for concept lookup.
+
+# Keyword Arguments
+- `new_cols`: Name(s) for output columns. If not provided, uses `col * suffix`.
+- `suffix::String="_mapped"`: Suffix for default new column names.
+- `drop_original::Bool=false`: Drop source column(s) after mapping.
+- `concept_table::String="concept"`: Table name for concepts.
+- `schema::String="main"`: Schema containing the concept table.
+
+# Returns
+- A new `HealthTable` with the concept names added in `new_col`.
+
+# Example
+```julia
+conn = DBInterface.connect(DuckDB.DB, "path/to/db/.duckdb")
+
+# Map gender_concept_id to concept_name
+ht_mapped = map_concepts(ht, :gender_concept_id, "gender_name", conn; schema = "dbt_synthea_dev")
+```
+"""
+function HealthBase.map_concepts(
+    ht::HealthTable,
+    cols::Union{Symbol, Vector{Symbol}},
+    conn::DuckDB.DB;
+    new_cols::Union{Nothing, String, Vector{String}} = nothing,
+    drop_original::Bool = false,
+    suffix::String = "_mapped",
+    concept_table::String = "concept",
+    schema::String = "main"
+)
+    df = copy(ht.source)
+    _map_concepts!(df, cols, conn; new_cols, drop_original, suffix, concept_table, schema)
+
+    return HealthBase.HealthTable{typeof(df)}(df)
+end
+
+"""
+    map_concepts!(ht::HealthTable, cols, conn; ...)
+
+In-place version of `map_concepts`. Maps concept IDs to human-readable names using the OMOP `concept` table.
+
+# Arguments
+- `ht::HealthTable`: The table to update.
+- `cols`: Single column or list of columns with concept IDs.
+- `conn::DuckDB.DB`: Connection to the OMOP database.
+
+# Keyword Arguments
+- `new_cols`: Optional new column names. Defaults to `col * "_mapped"`.
+- `suffix`: Suffix used when `new_cols` is not provided.
+- `drop_original`: Whether to drop the original columns.
+- `concept_table`, `schema`: Source table and schema.
+
+# Returns
+- The mutated `HealthTable`.
+
+# Example
+```julia
+conn = DBInterface.connect(DuckDB.DB, "path/to/db/.duckdb")
+
+# Map gender_concept_id to concept_name in-place
+map_concepts!(ht, :gender_concept_id, conn; new_cols="gender_name", schema="dbt_synthea_dev")
+```
+"""
+function HealthBase.map_concepts!(
+    ht::HealthTable,
+    cols::Union{Symbol, Vector{Symbol}},
+    conn::DuckDB.DB;
+    new_cols::Union{Nothing, String, Vector{String}} = nothing,
+    drop_original::Bool = false,
+    suffix::String = "_mapped",
+    concept_table::String = "concept",
+    schema::String = "main"
+)
+    _map_concepts!(
+        ht.source,
+        cols,
+        conn;
+        new_cols = new_cols,
+        drop_original = drop_original,
+        suffix = suffix,
+        concept_table = concept_table,
+        schema = schema
+    )
+    return ht
+end
+
+"""
+    _map_concepts!(df, cols, conn; ...)
+
+Low-level internal helper to map concept IDs to names directly on a `DataFrame`.
+
+# Arguments
+- `df::DataFrame`: Target DataFrame.
+- `cols`: Single or multiple columns with concept IDs.
+- `conn::DuckDB.DB`: Database connection.
+
+# Keyword Arguments
+- `new_cols`: New column names or `nothing` (defaults to col * `suffix`).
+- `drop_original`: Drop source columns after mapping.
+- `suffix`: Suffix for auto-generated column names.
+- `concept_table`, `schema`: OMOP source location.
+
+# Notes
+- This is called internally by `map_concepts` and `map_concepts!`.
+"""
+function _map_concepts!(
+    df::DataFrame,
+    cols::Union{Symbol, Vector{Symbol}},
+    conn::DuckDB.DB;
+    new_cols::Union{Nothing, String, Vector{String}} = nothing,
+    drop_original::Bool = false,
+    suffix::String = "_mapped",
+    concept_table::String = "concept",
+    schema::String = "main"
+)
+    cols = isa(cols, Symbol) ? [cols] : cols
+
+    if isnothing(new_cols)
+        new_cols = [string(col, suffix) for col in cols]
+    elseif isa(new_cols, String)
+        new_cols = [new_cols]
+    end
+
+    @assert length(cols) == length(new_cols) "Length of `cols` and `new_cols` must match."
+
+    for (col, new_col) in zip(cols, new_cols)
+        @assert col in propertynames(df) "Column '$col' not found in table."
+
+        ids = unique(skipmissing(df[!, col]))
+        if isempty(ids)
+            @warn "No concept_ids found in column $col; skipping."
+            continue
+        end
+
+        id_list_str = join(string.(ids), ", ")
+        query = """
+            SELECT concept_id, concept_name
+            FROM $schema.$concept_table
+            WHERE concept_id IN ($id_list_str)
+        """
+
+        result_df = DataFrame(execute(conn, query))
+        if isempty(result_df)
+            @warn "Concept mapping for $col returned empty result. Check table, schema, and values."
+            continue
+        end
+
+        mapping = Dict((cid => cname) for (cid, cname) in zip(result_df.concept_id, result_df.concept_name))
+        df[!, new_col] = map(x -> get(mapping, x, missing), df[!, col])
+
+        if drop_original
+            select!(df, Not(col))
+        end
+    end
+end
+
+"""
+    apply_vocabulary_compression(ht::HealthTable; cols, min_freq=10, other_label="Other")
+
+Group infrequent categorical levels under a single *other* label.
+
+# Arguments
+- `ht::HealthTable`: Input data table.
+
+# Keyword Arguments
+- `cols::Vector{Symbol}`: Columns to compress.
+- `min_freq::Int=10`: Minimum frequency for a value to remain unchanged.
+- `other_label::String="Other"`: Label used to replace infrequent values.
+- `drop_original::Bool=false`: Whether to drop original columns after compression.
+
+# Returns
+- `HealthTable`: Table with compressed categorical levels.
+
+# Examples
+```julia
+ht_small = apply_vocabulary_compression(ht; cols=[:condition_source_value], min_freq=5)
+```
+"""
+function HealthBase.apply_vocabulary_compression(
+    ht::HealthTable;
+    cols::Vector{Symbol},
+    min_freq::Integer = 10,
+    other_label::AbstractString = "Other",
+    drop_original::Bool = false,
+)
+    df = copy(ht.source)
+
+    for col in cols
+        @assert col in propertynames(df) "Column '$(col)' not found in table."
+        dest_col = Symbol(string(col), "_compressed")
+        counts = combine(groupby(df, col), nrow => :freq)
+        to_compress = counts[counts.freq .< min_freq, col]
+        if !isempty(to_compress)
+            df[!, dest_col] = map(x -> in(x, to_compress) ? other_label : string(x), df[!, col])
+        end
+    end
+
+    if drop_original
+        select!(df, Not(cols))
+    end
+
+    return HealthBase.HealthTable{typeof(df)}(df)
+end
+
+end
+
diff --git a/src/HealthBase.jl b/src/HealthBase.jl
index 7c1a74b..aad28a1 100644
--- a/src/HealthBase.jl
+++ b/src/HealthBase.jl
@@ -1,8 +1,9 @@
 module HealthBase
 
-using Base: get_extension
-
+using Base: get_extension, @kwdef
 using Base.Experimental: register_error_hint
+using Tables
+using DataFrames
 
 include("exceptions.jl")
 
@@ -12,6 +13,10 @@ function __init__()
             if isnothing(get_extension(HealthBase, :HealthBaseDrWatsonExt))
                 _extension_message("DrWatson", cohortsdir, io)
             end
+        elseif exc.f == HealthTable
+            if isnothing(get_extension(HealthBase, :HealthBaseOMOPCDMExt))
+                _extension_message("OMOPCommonDataModel, DataFrames", HealthTable, io)
+            end
         elseif exc.f == corpusdir
             if isnothing(get_extension(HealthBase, :HealthBaseDrWatsonExt))
                 _extension_message("DrWatson", corpusdir, io)
@@ -36,6 +41,9 @@ function __init__()
     end
 end
 
+include("healthtable_interface.jl")
 include("drwatson_stub.jl")
+include("omopcdm_stub.jl")
+include("show.jl")
 
 end
diff --git a/src/healthtable_interface.jl b/src/healthtable_interface.jl
new file mode 100644
index 0000000..cd6ad33
--- /dev/null
+++ b/src/healthtable_interface.jl
@@ -0,0 +1,122 @@
+"""
+    HealthTable{T}
+
+A lightweight, schema-aware wrapper for OMOP CDM tables, providing a standardized Tables.jl interface and metadata tracking.
+
+The `HealthTable` struct is designed to wrap OMOP CDM-compliant data sources (such as DataFrames), ensuring that all columns 
+conform to the OMOP CDM specification for a given version. It attaches the OMOP CDM version as metadata and enables seamless 
+integration with the Julia Tables.jl ecosystem.
+
+# Fields
+- `source::T`: The underlying data source (typically a `DataFrame`) containing the OMOP CDM table data.
+
+# Examples
+```julia
+person_df = DataFrame(
+    person_id=1:3,
+    gender_concept_id=[8507, 8532, 8507],
+    year_of_birth=[1990, 1985, 2000]
+)
+ht = HealthTable(person_df; omop_cdm_version="v5.4.1")
+Tables.schema(ht) # Get the schema
+DataFrame(ht)     # Materialize as DataFrame
+```
+"""
+@kwdef struct HealthTable{T}
+    source::T
+end
+
+"""
+    Tables.istable(::Type{<:HealthTable})
+
+Signal that `HealthTable` is a table according to the Tables.jl interface.
+
+This function is part of the Tables.jl interface and is used to identify types that can be treated as tabular data.
+
+## Returns
+- `Bool`: Always returns `true` for the `HealthTable` type.
+"""
+Tables.istable(::Type{<:HealthTable}) = true
+
+"""
+    Tables.rowaccess(::Type{<:HealthTable})
+
+Signal that `HealthTable` supports row-based iteration.
+
+This function is part of the Tables.jl interface. A `true` return value indicates that `Tables.rows` can be called on an instance of `HealthTable`.
+
+## Returns
+- `Bool`: Always returns `true` for the `HealthTable` type.
+"""
+Tables.rowaccess(::Type{<:HealthTable}) = true
+
+"""
+    Tables.rows(ht::HealthTable)
+
+Return an iterator over the rows of the `HealthTable`.
+
+This function implements the row-access part of the Tables.jl interface by delegating to the underlying `source` object.
+
+## Arguments
+- `ht::HealthTable`: The `HealthTable` instance.
+
+## Returns
+- An iterator object that yields each row of the table.
+"""
+Tables.rows(ht::HealthTable) = Tables.rows(ht.source)
+
+"""
+    Tables.columnaccess(::Type{<:HealthTable})
+
+Signal that `HealthTable` supports column-based access.
+
+This function is part of the Tables.jl interface. A `true` return value indicates that `Tables.columns` can be called on an instance of `HealthTable`.
+
+## Returns
+- `Bool`: Always returns `true` for the `HealthTable` type.
+"""
+Tables.columnaccess(::Type{<:HealthTable}) = true
+
+"""
+    Tables.columns(ht::HealthTable)
+
+Return the `HealthTable`'s data as a set of columns.
+
+This function implements the column-access part of the Tables.jl interface by delegating to the underlying `source` object.
+
+## Arguments
+- `ht::HealthTable`: The `HealthTable` instance.
+
+## Returns
+- A column-accessible object that represents the table's data.
+"""
+Tables.columns(ht::HealthTable) = Tables.columns(ht.source)
+
+"""
+    Tables.schema(ht::HealthTable)
+
+Get the schema of the `HealthTable`.
+
+The schema includes the names and types of the columns. This function delegates the call to the underlying `source`.
+
+## Arguments
+- `ht::HealthTable`: The `HealthTable` instance.
+
+## Returns
+- `Tables.Schema`: An object describing the column names and their Julia types.
+"""
+Tables.schema(ht::HealthTable) = Tables.schema(ht.source)
+
+"""
+    Tables.materializer(::Type{<:HealthTable})
+
+Specify the default type to use when materializing a `HealthTable`.
+
+This function is part of the Tables.jl interface. It allows other packages to convert a `HealthTable` into a concrete table type like a `DataFrame` by calling `DataFrame(ht)`.
+
+## Returns
+- `Type`: The `DataFrame` type, indicating it as the preferred materialization format.
+"""
+Tables.materializer(::Type{<:HealthTable}) = DataFrame
+
+export HealthTable
\ No newline at end of file
diff --git a/src/omopcdm_stub.jl b/src/omopcdm_stub.jl
new file mode 100644
index 0000000..d6fbed2
--- /dev/null
+++ b/src/omopcdm_stub.jl
@@ -0,0 +1,9 @@
+function one_hot_encode end
+function apply_vocabulary_compression end
+function map_concepts end
+function map_concepts! end
+
+export one_hot_encode
+export apply_vocabulary_compression
+export map_concepts
+export map_concepts!
diff --git a/src/show.jl b/src/show.jl
new file mode 100644
index 0000000..6356a46
--- /dev/null
+++ b/src/show.jl
@@ -0,0 +1,29 @@
+using PrettyTables
+using DataFrames
+
+"""
+    Base.show(io::IO, ht::HealthTable)
+
+Pretty-print a `HealthTable` to any IO stream (REPL, file, etc.).
+
+- If the underlying table is empty, prints a friendly message.
+- Otherwise prints the full table using **PrettyTables.jl** with left-aligned columns.
+- Displays the OMOP-CDM version (from metadata) beneath the table when available.
+
+This method is purely for display; it returns `nothing`.
+"""
+function Base.show(io::IO, ht::HealthTable)
+    df = ht.source
+
+    if nrow(df) == 0
+        pretty_table(io, ["HealthTable is empty"]; header = [""])
+    else
+        pretty_table(io, df; alignment = :l)
+    end
+
+    if haskey(metadata(df), "omop_cdm_version")
+        println(io, "\nOMOP CDM version: ", metadata(df, "omop_cdm_version"))
+    end
+
+    return nothing
+end
diff --git a/test/Project.toml b/test/Project.toml
index cb94583..179b3b6 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,4 +1,25 @@
 [deps]
+DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
+DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
+FeatureTransforms = "8fd68953-04b8-4117-ac19-158bf6de9782"
+InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+OMOPCommonDataModel = "ba65db9e-6590-4054-ab8a-101ed9124986"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[compat]
+DBInterface = "2.2"
+DataFrames = "1"
+Dates = "1.10"
+DrWatson = "2"
+DuckDB = "1"
+FeatureTransforms = "0.4.0"
+InlineStrings = "1"
+OMOPCommonDataModel = "0.1"
+Tables = "1.12.1"
+julia = "1.10"
diff --git a/test/healthtable_interface.jl b/test/healthtable_interface.jl
new file mode 100644
index 0000000..0fcafb1
--- /dev/null
+++ b/test/healthtable_interface.jl
@@ -0,0 +1,75 @@
+@testset "HealthTable Interface" begin
+    df = DataFrame(
+        person_id = 1:5,
+        gender_concept_id = [8507, 8532, 8507, 8532, 8507],
+        year_of_birth = [1990, 1985, 2000, 1975, 1988]
+    )
+    
+    @testset "Constructor" begin
+        # Test basic constructor
+        ht = HealthBase.HealthTable(df)
+        @test ht isa HealthBase.HealthTable
+        @test ht.source === df
+        
+        # Test keyword constructor
+        ht_kw = HealthBase.HealthTable(source=df)
+        @test ht_kw isa HealthBase.HealthTable
+        @test ht_kw.source === df
+    end
+    
+    @testset "Tables.jl Interface" begin
+        ht = HealthBase.HealthTable(df)
+        
+        # Test istable
+        @test Tables.istable(HealthBase.HealthTable) == true
+        @test Tables.istable(typeof(ht)) == true
+        
+        # Test rowaccess
+        @test Tables.rowaccess(HealthBase.HealthTable) == true
+        @test Tables.rowaccess(typeof(ht)) == true
+        
+        # Test columnaccess
+        @test Tables.columnaccess(HealthBase.HealthTable) == true
+        @test Tables.columnaccess(typeof(ht)) == true
+        
+        # Test schema
+        schema_ht = Tables.schema(ht)
+        schema_df = Tables.schema(df)
+        @test schema_ht.names == schema_df.names
+        @test schema_ht.types == schema_df.types
+        
+        # Test rows
+        rows_ht = collect(Tables.rows(ht))
+        rows_df = collect(Tables.rows(df))
+        @test length(rows_ht) == length(rows_df)
+        @test rows_ht[1].person_id == rows_df[1].person_id
+        
+        # Test columns
+        cols_ht = Tables.columns(ht)
+        cols_df = Tables.columns(df)
+        @test Tables.columnnames(cols_ht) == Tables.columnnames(cols_df)
+        
+        # Test materializer
+        @test Tables.materializer(HealthBase.HealthTable) == DataFrame
+        
+        # Test DataFrame materialization
+        df_materialized = DataFrame(ht)
+        @test df_materialized == df
+    end
+    
+    @testset "Different data types" begin
+        # Test with different Tables.jl compatible types
+        
+        # Test with named tuple
+        nt = [(person_id=1, name="Alice"), (person_id=2, name="Bob")]
+        ht_nt = HealthBase.HealthTable(nt)
+        @test Tables.istable(typeof(ht_nt))
+        @test length(collect(Tables.rows(ht_nt))) == 2
+        
+        # Test with empty DataFrame
+        empty_df = DataFrame(person_id = Int[], name = String[])
+        ht_empty = HealthBase.HealthTable(empty_df)
+        @test Tables.istable(typeof(ht_empty))
+        @test length(collect(Tables.rows(ht_empty))) == 0
+    end
+end
diff --git a/test/omopcdmext.jl b/test/omopcdmext.jl
new file mode 100644
index 0000000..9044bfa
--- /dev/null
+++ b/test/omopcdmext.jl
@@ -0,0 +1,282 @@
+@testset "HealthBaseOMOPCDMExt" begin
+    # Check if extension is loaded properly
+    ext = Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt)
+    if isnothing(ext)
+        @warn "HealthBaseOMOPCDMExt extension is not loaded. Skipping tests."
+        return
+    end
+    
+    # This DataFrame is compliant with the OMOP CDM v5.4.1 PERSON table schema.
+    person_df_good = DataFrame(
+        person_id=1,
+        gender_concept_id=8507,
+        year_of_birth=1990,
+        month_of_birth=1,
+        day_of_birth=1,
+        birth_datetime=DateTime(1990, 1, 1),
+        race_concept_id=0,
+        ethnicity_concept_id=0
+    )
+
+    # This DataFrame has an incorrect type for the `year_of_birth` column.
+    person_df_bad = DataFrame(
+        person_id=1,
+        gender_concept_id=8507,
+        year_of_birth="1990", # Incorrect: Should be an Int
+        month_of_birth=1,
+        day_of_birth=1,
+        birth_datetime=DateTime(1990, 1, 1),
+        race_concept_id=0,
+        ethnicity_concept_id=0
+    )
+
+    ht = HealthBase.HealthTable(person_df_good; omop_cdm_version="v5.4.1")
+
+    @testset "Constructor and Type Validation" begin
+        @testset "Valid DataFrame" begin
+            @test ht isa HealthBase.HealthTable
+            @test metadata(ht.source, "omop_cdm_version") == "v5.4.1"
+            
+            # Test with default version
+            ht_default = HealthBase.HealthTable(person_df_good)
+            @test metadata(ht_default.source, "omop_cdm_version") == "v5.4.0" 
+        end
+
+        @testset "Invalid DataFrame Type Check" begin
+            @test_throws ArgumentError HealthBase.HealthTable(person_df_bad; omop_cdm_version="v5.4.1")
+        end
+        
+        @testset "Unsupported OMOP CDM Version" begin
+            @test_throws ArgumentError HealthBase.HealthTable(person_df_good; omop_cdm_version="v999.0")
+        end
+        
+        @testset "Type Enforcement Options" begin
+            # Test with type enforcement disabled (should warn, not error)
+            @test_logs (:warn, r"Type enforcement is disabled") HealthBase.HealthTable(person_df_bad; omop_cdm_version="v5.4.1", disable_type_enforcement=true)
+            
+            # Test with collect_errors=false (should fail on first error)
+            @test_throws ArgumentError HealthBase.HealthTable(person_df_bad; omop_cdm_version="v5.4.1", collect_errors=false)
+        end
+        
+        @testset "Extra Columns" begin
+            df_extra = copy(person_df_good)
+            df_extra[!, :extra_column] = ["extra_value"]
+
+            ht_extra = HealthBase.HealthTable(df_extra; omop_cdm_version="v5.4.1")
+            @test "extra_column" in names(ht_extra.source)
+        end
+        
+        @testset "Schema Validation Edge Cases" begin
+            # Test multiple validation errors collected
+            df_multiple_errors = DataFrame(
+                person_id = "invalid_string",  # Wrong type
+                gender_concept_id = "another_string"  # Wrong type  
+            )
+            @test_throws ArgumentError HealthBase.HealthTable(df_multiple_errors; omop_cdm_version="v5.4.1", collect_errors=true)
+        end
+    end
+
+    @testset "Version detection from metadata" begin
+        df_meta = DataFrame(person_id=1:3,
+                            gender_concept_id=[8507,8532,8507],
+                            year_of_birth=[1990,1985,2000],
+                            race_concept_id=[8527,8516,8527])
+        ht_meta = HealthBase.HealthTable(df_meta; omop_cdm_version="v5.4.1") 
+        @test metadata(ht_meta.source, "omop_cdm_version") == "v5.4.1"
+    end
+
+    @testset "Preprocessing Functions" begin
+        df = DataFrame(
+            person_id = 1:4,
+            gender_concept_id = [8507, 8507, 8532, 8532], 
+            condition_source_value = ["Diabetes", "Hypertension", "Diabetes", "RareCondition"],
+            bool_column = [true, false, true, false]
+        )
+        ht = HealthBase.HealthTable(df; omop_cdm_version="v5.4.1")
+
+        @testset "one_hot_encode function" begin
+            # Test basic functionality
+            result = HealthBase.one_hot_encode(ht; cols=[:gender_concept_id], return_features_only=true)
+            expected_cols = ["gender_concept_id_8507", "gender_concept_id_8532"]
+            @test all(col in string.(names(result)) for col in expected_cols)
+            
+            # Test with HealthTable return
+            result_ht = HealthBase.one_hot_encode(ht; cols=[:gender_concept_id], return_features_only=false)
+            @test result_ht isa HealthBase.HealthTable
+            
+            # Test with Boolean column (should warn and skip)
+            @test_logs (:warn, r"Column bool_column is already Boolean") HealthBase.one_hot_encode(ht; cols=[:bool_column], return_features_only=true)
+            
+            # Test with missing column
+            @test_throws AssertionError HealthBase.one_hot_encode(ht; cols=[:nonexistent_column], return_features_only=true)
+        end
+
+        @testset "apply_vocabulary_compression function" begin
+            # Test basic functionality
+            compressed = HealthBase.apply_vocabulary_compression(ht; cols=[:condition_source_value], min_freq=2)
+            @test "condition_source_value_compressed" in names(compressed.source)
+            compressed_vals = unique(compressed.source.condition_source_value_compressed)
+            @test "Other" in compressed_vals
+            
+            # Test with custom other_label
+            compressed_custom = HealthBase.apply_vocabulary_compression(ht; cols=[:condition_source_value], min_freq=2, other_label="RARE")
+            @test "RARE" in unique(compressed_custom.source.condition_source_value_compressed)
+            
+            # Test with missing column
+            @test_throws AssertionError HealthBase.apply_vocabulary_compression(ht; cols=[:nonexistent_column], min_freq=2)
+        end
+
+        @testset "map_concepts function (mocked)" begin
+            # Create a simple in-memory DuckDB for testing
+            db = DuckDB.DB()
+            
+            # Create a mock concept table
+            DBInterface.execute(db, """
+                CREATE TABLE concept (
+                    concept_id INTEGER,
+                    concept_name VARCHAR
+                )
+            """)
+            
+            DBInterface.execute(db, """
+                INSERT INTO concept VALUES 
+                (8507, 'Male'),
+                (8532, 'Female')
+            """)
+            
+            # Test map_concepts (returns new HealthTable)
+            ht_mapped = HealthBase.map_concepts(ht, :gender_concept_id, db; new_cols="gender_name")
+            @test "gender_name" in names(ht_mapped.source)
+            @test ht_mapped.source.gender_name[1] == "Male"
+            
+            # Test map_concepts! (modifies in place)
+            ht_copy = HealthBase.HealthTable(copy(df); omop_cdm_version="v5.4.1")
+            HealthBase.map_concepts!(ht_copy, :gender_concept_id, db; new_cols="gender_name_inplace")
+            @test "gender_name_inplace" in names(ht_copy.source)
+            
+            # Test error cases
+            @test_throws AssertionError HealthBase.map_concepts(ht, :nonexistent_column, db)
+            
+            # Close the database
+            DuckDB.close(db)
+        end
+    end
+    
+    @testset "Edge Cases and Error Handling" begin
+        @testset "HealthTable Constructor Error Paths" begin
+            @test_throws ArgumentError HealthBase.HealthTable(person_df_good; omop_cdm_version="v999.0")
+            
+            # Test with disable_type_enforcement=true for warning path
+            @test_logs (:warn, r"Type enforcement is disabled") HealthBase.HealthTable(person_df_bad; disable_type_enforcement=true)
+        end
+        
+        @testset "Internal Schema Validation Coverage" begin
+            # Get the extension to access internal constants
+            ext = Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt)
+            
+            if !isnothing(ext)
+                # Access the OMOPCDM_VERSIONS constant from the extension
+                omop_versions = getfield(ext, :OMOPCDM_VERSIONS)
+                
+                # Create a test scenario by making a copy and corrupting it temporarily
+                if haskey(omop_versions, "v5.4.1")
+                    original_fields = omop_versions["v5.4.1"][:fields]
+                    
+                    # Create a corrupted version for testing
+                    corrupted_fields = copy(original_fields)
+                    if haskey(corrupted_fields, :person_id)
+                        # Remove cdmDatatype from person_id field to trigger 
+                        original_person_field = corrupted_fields[:person_id]
+                        corrupted_person_field = Dict{Symbol, Any}()
+                        for (k, v) in original_person_field
+                            if k != :cdmDatatype  # Skip cdmDatatype to trigger the error
+                                corrupted_person_field[k] = v
+                            end
+                        end
+                        corrupted_fields[:person_id] = corrupted_person_field
+                        
+                        # Temporarily replace the schema
+                        corrupted_version = Dict{Symbol, Any}(:fields => corrupted_fields)
+                        omop_versions["v5.4.1"] = corrupted_version
+                        
+                        # Test the missing cdmDatatype error path 
+                        df_test = DataFrame(person_id=1)
+                        @test_throws ArgumentError HealthBase.HealthTable(df_test; omop_cdm_version="v5.4.1", collect_errors=false)
+                        
+                        # Test the missing cdmDatatype with collect_errors=true
+                        @test_throws ArgumentError HealthBase.HealthTable(df_test; omop_cdm_version="v5.4.1", collect_errors=true)
+                        
+                        # Restore original schema
+                        omop_versions["v5.4.1"] = Dict{Symbol, Any}(:fields => original_fields)
+                    end
+                    
+                    # Now test unrecognized datatype (line 141)
+                    corrupted_fields_2 = copy(original_fields)
+                    if haskey(corrupted_fields_2, :person_id)
+                        # Add an unrecognized datatype to trigger line 141
+                        corrupted_person_field_2 = copy(corrupted_fields_2[:person_id])
+                        corrupted_person_field_2[:cdmDatatype] = "INVALID_DATATYPE_XYZ"
+                        corrupted_fields_2[:person_id] = corrupted_person_field_2
+                        
+                        # Temporarily replace the schema
+                        corrupted_version_2 = Dict{Symbol, Any}(:fields => corrupted_fields_2)
+                        omop_versions["v5.4.1"] = corrupted_version_2
+                        
+                        # Test the unrecognized datatype error path 
+                        df_test2 = DataFrame(person_id=1)
+                        @test_throws ArgumentError HealthBase.HealthTable(df_test2; omop_cdm_version="v5.4.1", collect_errors=true)
+                        
+                        # Restore original schema
+                        omop_versions["v5.4.1"] = Dict{Symbol, Any}(:fields => original_fields)
+                    end
+                end
+            end
+        end
+        
+        @testset "map_concepts Edge Cases" begin
+            # Set up test database with concept table
+            db = DuckDB.DB()
+            DuckDB.execute(db, "CREATE TABLE concept (concept_id INTEGER, concept_name VARCHAR)")
+            DuckDB.execute(db, "INSERT INTO concept VALUES (8507, 'Male')")
+            
+            df_empty = DataFrame(empty_col=[missing, missing])
+            ht_empty = HealthBase.HealthTable(df_empty; omop_cdm_version="v5.4.1")
+            
+            @test_logs (:warn, r"No concept_ids found") HealthBase.map_concepts!(ht_empty, :empty_col, db; new_cols="mapped_empty")
+            
+            df_nonexistent = DataFrame(nonexistent_ids=[99999])
+            ht_nonexistent = HealthBase.HealthTable(df_nonexistent; omop_cdm_version="v5.4.1")
+            
+            # When mapping fails, the column is NOT added (the function continues/skips)
+            HealthBase.map_concepts!(ht_nonexistent, :nonexistent_ids, db; new_cols="mapped_nonexistent")
+            @test !("mapped_nonexistent" in names(ht_nonexistent.source))  # Column should NOT be added when mapping fails
+            
+            # Test drop_original=true for map_concepts!
+            df_drop = DataFrame(concept_col=[8507])
+            ht_drop = HealthBase.HealthTable(df_drop; omop_cdm_version="v5.4.1")
+            HealthBase.map_concepts!(ht_drop, :concept_col, db; new_cols="mapped_col", drop_original=true)
+            @test !("concept_col" in names(ht_drop.source))  # Original column should be dropped
+            @test "mapped_col" in names(ht_drop.source)
+            
+            DuckDB.close(db)
+        end
+        
+        @testset "apply_vocabulary_compression drop_original" begin
+            df_compress = DataFrame(
+                col1=["A", "A", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"],
+                col2=["X", "X", "X", "Y", "Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z"]
+            )
+            ht_compress = HealthBase.HealthTable(df_compress; omop_cdm_version="v5.4.1")
+            
+            # Apply compression with drop_original=true
+            ht_result = HealthBase.apply_vocabulary_compression(ht_compress; cols=[:col1, :col2], min_freq=3, drop_original=true)
+            
+            # Original columns should be dropped
+            @test !("col1" in names(ht_result.source))
+            @test !("col2" in names(ht_result.source))
+            # Compressed columns should exist
+            @test "col1_compressed" in names(ht_result.source)
+            @test "col2_compressed" in names(ht_result.source)
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index ceef466..04e5928 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,12 +1,32 @@
 using DrWatson
-using HealthBase
-using Pkg
 using Test
+using InlineStrings
+using FeatureTransforms
+using Serialization
+using DataFrames
+using OMOPCommonDataModel
+using Dates
+using DBInterface
+using DuckDB
+using Tables
+using HealthBase
 
 @testset "Exceptions" begin
     include("exceptions.jl")
 end
 
+@testset "HealthTable Interface" begin
+    include("healthtable_interface.jl")
+end
+
+@testset "HealthTable Show Method" begin
+    include("show.jl")
+end
+
 @testset "HealthBaseDrWatsonExt" begin
     include("drwatsonext.jl")
 end
+
+@testset "HealthBaseOMOPCDMExt" begin
+    include("omopcdmext.jl")
+end
diff --git a/test/show.jl b/test/show.jl
new file mode 100644
index 0000000..5b58024
--- /dev/null
+++ b/test/show.jl
@@ -0,0 +1,59 @@
+@testset "HealthTable Show Methods" begin
+    # Test with basic HealthTable
+    df = DataFrame(
+        person_id = 1:3,
+        gender_concept_id = [8507, 8532, 8507],
+        year_of_birth = [1990, 1985, 2000]
+    )
+    ht = HealthBase.HealthTable(df)
+    
+    @testset "Basic show functionality" begin
+        # Test that show returns nothing
+        output = show(IOBuffer(), ht)
+        @test output === nothing
+        
+        # Test show output contains table data
+        io = IOBuffer()
+        show(io, ht)
+        output_str = String(take!(io))
+        @test contains(output_str, "person_id")
+        @test contains(output_str, "gender_concept_id")
+        @test contains(output_str, "year_of_birth")
+    end
+    
+    @testset "Empty HealthTable show" begin
+        empty_df = DataFrame(person_id = Int[], gender_concept_id = Int[])
+        empty_ht = HealthBase.HealthTable(empty_df)
+        
+        io = IOBuffer()
+        show(io, empty_ht)
+        output_str = String(take!(io))
+        @test contains(output_str, "HealthTable is empty")
+    end
+    
+    @testset "Show with OMOP CDM metadata" begin
+        # Check if OMOP extension is available for metadata test
+        ext = Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt)
+        if !isnothing(ext)
+            ht_omop = HealthBase.HealthTable(df; omop_cdm_version="v5.4.1")
+            
+            io = IOBuffer()
+            show(io, ht_omop)
+            output_str = String(take!(io))
+            @test contains(output_str, "OMOP CDM version: v5.4.1")
+        else
+            @warn "HealthBaseOMOPCDMExt not available, skipping OMOP metadata test"
+        end
+    end
+    
+    @testset "Show with regular metadata" begin
+        df_with_meta = copy(df)
+        DataFrames.metadata!(df_with_meta, "omop_cdm_version", "v5.4.0")
+        ht_meta = HealthBase.HealthTable(df_with_meta)
+        
+        io = IOBuffer()
+        show(io, ht_meta)
+        output_str = String(take!(io))
+        @test contains(output_str, "OMOP CDM version: v5.4.0")
+    end
+end