Skip to content

Commit 161e326

Browse files
emmaling27Convex, Inc.
authored andcommitted
Support S3 storage for self-hosted backend (#34680)
Add S3 support for self-hosted-backend. Includes refactor to reduce code duplication for initializing storage, moving file, search, export, import storages to `ApplicationStorage` struct that gets passed around instead of re-creating each storage at different layers. GitOrigin-RevId: f24bfb0af21abeecc9d6ee580bf72e98882911a3
1 parent 8255dc1 commit 161e326

File tree

8 files changed

+202
-128
lines changed

8 files changed

+202
-128
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/application/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ async_lru = { path = "../async_lru" }
1616
async_zip = { workspace = true }
1717
async_zip_reader = { version = "0.1.0", path = "../async_zip_reader" }
1818
authentication = { path = "../../crates/authentication" }
19+
aws_s3 = { path = "../aws_s3" }
1920
bytes = { workspace = true }
2021
chrono = { workspace = true }
2122
cmd_util = { path = "../cmd_util" }

crates/application/src/lib.rs

Lines changed: 121 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use authentication::{
3131
validate_id_token,
3232
Auth0IdToken,
3333
};
34+
use aws_s3::storage::S3Storage;
3435
use bytes::Bytes;
3536
use chrono::{
3637
DateTime,
@@ -208,6 +209,10 @@ use model::{
208209
},
209210
ConfigModel,
210211
},
212+
database_globals::{
213+
types::StorageTagInitializer,
214+
DatabaseGlobalsModel,
215+
},
211216
deployment_audit_log::{
212217
types::DeploymentAuditLogEvent,
213218
DeploymentAuditLogModel,
@@ -287,9 +292,11 @@ use storage::{
287292
BufferedUpload,
288293
ClientDrivenUploadPartToken,
289294
ClientDrivenUploadToken,
295+
LocalDirStorage,
290296
Storage,
291297
StorageExt,
292298
StorageGetStream,
299+
StorageUseCase,
293300
Upload,
294301
};
295302
use sync_types::{
@@ -503,17 +510,22 @@ pub enum EnvVarChange {
503510
Set(EnvironmentVariable),
504511
}
505512

513+
#[derive(Clone)]
514+
pub struct ApplicationStorage {
515+
pub files_storage: Arc<dyn Storage>,
516+
pub modules_storage: Arc<dyn Storage>,
517+
search_storage: Arc<dyn Storage>,
518+
pub exports_storage: Arc<dyn Storage>,
519+
snapshot_imports_storage: Arc<dyn Storage>,
520+
}
521+
506522
pub struct Application<RT: Runtime> {
507523
runtime: RT,
508524
database: Database<RT>,
509525
runner: Arc<ApplicationFunctionRunner<RT>>,
510526
function_log: FunctionExecutionLog<RT>,
511527
file_storage: FileStorage<RT>,
512-
files_storage: Arc<dyn Storage>,
513-
modules_storage: Arc<dyn Storage>,
514-
search_storage: Arc<dyn Storage>,
515-
exports_storage: Arc<dyn Storage>,
516-
snapshot_imports_storage: Arc<dyn Storage>,
528+
application_storage: ApplicationStorage,
517529
usage_tracking: UsageCounter,
518530
key_broker: KeyBroker,
519531
instance_name: String,
@@ -544,11 +556,7 @@ impl<RT: Runtime> Clone for Application<RT> {
544556
runner: self.runner.clone(),
545557
function_log: self.function_log.clone(),
546558
file_storage: self.file_storage.clone(),
547-
files_storage: self.files_storage.clone(),
548-
modules_storage: self.modules_storage.clone(),
549-
search_storage: self.search_storage.clone(),
550-
exports_storage: self.exports_storage.clone(),
551-
snapshot_imports_storage: self.snapshot_imports_storage.clone(),
559+
application_storage: self.application_storage.clone(),
552560
usage_tracking: self.usage_tracking.clone(),
553561
key_broker: self.key_broker.clone(),
554562
instance_name: self.instance_name.clone(),
@@ -573,16 +581,79 @@ impl<RT: Runtime> Clone for Application<RT> {
573581
}
574582
}
575583

584+
/// Create storage based on the storage type configuration
585+
pub async fn create_storage<RT: Runtime>(
586+
runtime: RT,
587+
storage_type: &model::database_globals::types::StorageType,
588+
use_case: StorageUseCase,
589+
) -> anyhow::Result<Arc<dyn Storage>> {
590+
Ok(match storage_type {
591+
model::database_globals::types::StorageType::S3 { s3_prefix } => {
592+
Arc::new(S3Storage::for_use_case(use_case, s3_prefix.clone(), runtime).await?)
593+
},
594+
model::database_globals::types::StorageType::Local { dir } => {
595+
let storage = LocalDirStorage::for_use_case(runtime, dir, use_case)?;
596+
tracing::info!("{use_case} storage path: {:?}", storage.path());
597+
Arc::new(storage)
598+
},
599+
})
600+
}
601+
576602
impl<RT: Runtime> Application<RT> {
603+
pub async fn initialize_storage(
604+
runtime: RT,
605+
database: &Database<RT>,
606+
storage_tag_initializer: StorageTagInitializer,
607+
instance_name: String,
608+
) -> anyhow::Result<ApplicationStorage> {
609+
let storage_type = {
610+
let mut tx = database.begin_system().await?;
611+
let storage_type = DatabaseGlobalsModel::new(&mut tx)
612+
.initialize_storage_tag(storage_tag_initializer, instance_name)
613+
.await?;
614+
database
615+
.commit_with_write_source(tx, "init_storage")
616+
.await?;
617+
storage_type
618+
};
619+
620+
let files_storage =
621+
create_storage(runtime.clone(), &storage_type, StorageUseCase::Files).await?;
622+
let modules_storage =
623+
create_storage(runtime.clone(), &storage_type, StorageUseCase::Modules).await?;
624+
let search_storage = create_storage(
625+
runtime.clone(),
626+
&storage_type,
627+
StorageUseCase::SearchIndexes,
628+
)
629+
.await?;
630+
let exports_storage =
631+
create_storage(runtime.clone(), &storage_type, StorageUseCase::Exports).await?;
632+
let snapshot_imports_storage = create_storage(
633+
runtime.clone(),
634+
&storage_type,
635+
StorageUseCase::SnapshotImports,
636+
)
637+
.await?;
638+
639+
// Search storage needs to be set for Database to be fully initialized
640+
database.set_search_storage(search_storage.clone());
641+
tracing::info!("{:?} storage is configured.", storage_type);
642+
643+
Ok(ApplicationStorage {
644+
files_storage,
645+
modules_storage,
646+
search_storage,
647+
exports_storage,
648+
snapshot_imports_storage,
649+
})
650+
}
651+
577652
pub async fn new(
578653
runtime: RT,
579654
database: Database<RT>,
580655
file_storage: FileStorage<RT>,
581-
files_storage: Arc<dyn Storage>,
582-
modules_storage: Arc<dyn Storage>,
583-
search_storage: Arc<dyn Storage>,
584-
exports_storage: Arc<dyn Storage>,
585-
snapshot_imports_storage: Arc<dyn Storage>,
656+
application_storage: ApplicationStorage,
586657
usage_tracking: UsageCounter,
587658
key_broker: KeyBroker,
588659
instance_name: String,
@@ -598,7 +669,8 @@ impl<RT: Runtime> Application<RT> {
598669
app_auth: Arc<ApplicationAuth>,
599670
cache: QueryCache,
600671
) -> anyhow::Result<Self> {
601-
let module_cache = ModuleCache::new(runtime.clone(), modules_storage.clone()).await;
672+
let module_cache =
673+
ModuleCache::new(runtime.clone(), application_storage.modules_storage.clone()).await;
602674
let module_loader = Arc::new(module_cache.clone());
603675

604676
let system_env_vars = btreemap! {
@@ -622,7 +694,7 @@ impl<RT: Runtime> Application<RT> {
622694
runtime.clone(),
623695
database.clone(),
624696
persistence.reader(),
625-
search_storage.clone(),
697+
application_storage.search_storage.clone(),
626698
searcher,
627699
segment_term_metadata_fetcher,
628700
);
@@ -639,7 +711,7 @@ impl<RT: Runtime> Application<RT> {
639711
let system_table_cleanup_worker = SystemTableCleanupWorker::new(
640712
runtime.clone(),
641713
database.clone(),
642-
exports_storage.clone(),
714+
application_storage.exports_storage.clone(),
643715
);
644716
let system_table_cleanup_worker = Arc::new(Mutex::new(
645717
runtime.spawn("system_table_cleanup_worker", system_table_cleanup_worker),
@@ -657,7 +729,7 @@ impl<RT: Runtime> Application<RT> {
657729
function_runner.clone(),
658730
node_actions,
659731
file_storage.transactional_file_storage.clone(),
660-
modules_storage.clone(),
732+
application_storage.modules_storage.clone(),
661733
module_loader,
662734
function_log.clone(),
663735
system_env_vars.clone(),
@@ -687,8 +759,8 @@ impl<RT: Runtime> Application<RT> {
687759
let export_worker = ExportWorker::new(
688760
runtime.clone(),
689761
database.clone(),
690-
exports_storage.clone(),
691-
files_storage.clone(),
762+
application_storage.exports_storage.clone(),
763+
application_storage.files_storage.clone(),
692764
database.usage_counter().clone(),
693765
instance_name.clone(),
694766
);
@@ -697,7 +769,7 @@ impl<RT: Runtime> Application<RT> {
697769
let snapshot_import_worker = SnapshotImportWorker::start(
698770
runtime.clone(),
699771
database.clone(),
700-
snapshot_imports_storage.clone(),
772+
application_storage.snapshot_imports_storage.clone(),
701773
file_storage.clone(),
702774
database.usage_counter().clone(),
703775
);
@@ -709,7 +781,7 @@ impl<RT: Runtime> Application<RT> {
709781
runtime.clone(),
710782
persistence.clone(),
711783
database.clone(),
712-
modules_storage.clone(),
784+
application_storage.modules_storage.clone(),
713785
);
714786
let migration_worker = Arc::new(Mutex::new(Some(
715787
runtime.spawn("migration_worker", migration_worker.go()),
@@ -721,11 +793,7 @@ impl<RT: Runtime> Application<RT> {
721793
runner,
722794
function_log,
723795
file_storage,
724-
files_storage,
725-
modules_storage,
726-
search_storage,
727-
exports_storage,
728-
snapshot_imports_storage,
796+
application_storage,
729797
usage_tracking,
730798
key_broker,
731799
scheduled_job_runner,
@@ -754,7 +822,7 @@ impl<RT: Runtime> Application<RT> {
754822
}
755823

756824
pub fn modules_storage(&self) -> &Arc<dyn Storage> {
757-
&self.modules_storage
825+
&self.application_storage.modules_storage
758826
}
759827

760828
pub fn modules_cache(&self) -> &ModuleCache<RT> {
@@ -1442,14 +1510,15 @@ impl<RT: Runtime> Application<RT> {
14421510
},
14431511
}
14441512
};
1445-
let storage_get_stream =
1446-
self.exports_storage
1447-
.get(&object_key)
1448-
.await?
1449-
.context(ErrorMetadata::not_found(
1450-
"ExportNotFound",
1451-
format!("The requested export {snapshot_ts}/{object_key:?} was not found"),
1452-
))?;
1513+
let storage_get_stream = self
1514+
.application_storage
1515+
.exports_storage
1516+
.get(&object_key)
1517+
.await?
1518+
.context(ErrorMetadata::not_found(
1519+
"ExportNotFound",
1520+
format!("The requested export {snapshot_ts}/{object_key:?} was not found"),
1521+
))?;
14531522

14541523
let filename = format!(
14551524
// This should match the format in SnapshotExport.tsx.
@@ -1461,7 +1530,9 @@ impl<RT: Runtime> Application<RT> {
14611530

14621531
/// Returns the cloud export key - fully qualified to the instance.
14631532
pub fn cloud_export_key(&self, zip_export_key: ObjectKey) -> FullyQualifiedObjectKey {
1464-
self.exports_storage.fully_qualified_key(&zip_export_key)
1533+
self.application_storage
1534+
.exports_storage
1535+
.fully_qualified_key(&zip_export_key)
14651536
}
14661537

14671538
pub async fn update_environment_variables(
@@ -2035,6 +2106,7 @@ impl<RT: Runtime> Application<RT> {
20352106
));
20362107
}
20372108
let upload = self
2109+
.application_storage
20382110
.snapshot_imports_storage
20392111
.start_client_driven_upload()
20402112
.await?;
@@ -2055,6 +2127,7 @@ impl<RT: Runtime> Application<RT> {
20552127
));
20562128
}
20572129
let part_token = self
2130+
.application_storage
20582131
.snapshot_imports_storage
20592132
.upload_part(upload_token, part_number, part)
20602133
.await?;
@@ -2077,10 +2150,12 @@ impl<RT: Runtime> Application<RT> {
20772150
));
20782151
}
20792152
let object_key = self
2153+
.application_storage
20802154
.snapshot_imports_storage
20812155
.finish_client_driven_upload(upload_token, part_tokens)
20822156
.await?;
20832157
let fq_key = self
2158+
.application_storage
20842159
.snapshot_imports_storage
20852160
.fully_qualified_key(&object_key);
20862161
start_stored_import(
@@ -2099,13 +2174,18 @@ impl<RT: Runtime> Application<RT> {
20992174
&self,
21002175
body_stream: BoxStream<'_, anyhow::Result<Bytes>>,
21012176
) -> anyhow::Result<FullyQualifiedObjectKey> {
2102-
let mut upload: Box<BufferedUpload> = self.snapshot_imports_storage.start_upload().await?;
2177+
let mut upload: Box<BufferedUpload> = self
2178+
.application_storage
2179+
.snapshot_imports_storage
2180+
.start_upload()
2181+
.await?;
21032182
// unclear why this reassignment is necessary
21042183
let mut body_stream = body_stream;
21052184
upload.try_write_parallel(&mut body_stream).await?;
21062185
drop(body_stream);
21072186
let object_key = upload.complete().await?;
21082187
Ok(self
2188+
.application_storage
21092189
.snapshot_imports_storage
21102190
.fully_qualified_key(&object_key))
21112191
}
@@ -2148,7 +2228,7 @@ impl<RT: Runtime> Application<RT> {
21482228
};
21492229
let (storage_key, sha256, package_size) = upload_package(
21502230
package,
2151-
self.modules_storage.clone(),
2231+
self.application_storage.modules_storage.clone(),
21522232
external_deps_pkg.map(|pkg| pkg.storage_key),
21532233
)
21542234
.await?;
@@ -3016,7 +3096,7 @@ impl<RT: Runtime> Application<RT> {
30163096
}
30173097

30183098
pub fn files_storage(&self) -> Arc<dyn Storage> {
3019-
self.files_storage.clone()
3099+
self.application_storage.files_storage.clone()
30203100
}
30213101

30223102
/// Add hidden primary key indexes for the given tables. Developers do not

0 commit comments

Comments
 (0)