File tree Expand file tree Collapse file tree 1 file changed +65
-0
lines changed Expand file tree Collapse file tree 1 file changed +65
-0
lines changed Original file line number Diff line number Diff line change @@ -57,6 +57,9 @@ locals {
5757 # EFA configuration
5858 efa_config = {
5959 enabled = true
60+ device_name = " efa0"
61+ mtu_size = 9000
62+ enable_gpudirect = false
6063 instance_types = local.instance_types.spot_types
6164 security_group_rules = {
6265 efa_ports = {
@@ -444,6 +447,68 @@ locals {
444447 }
445448 }
446449
450+ # Storage Configuration
451+ storage = {
452+ s3 = {
453+ data_repository_bucket = " hpc-${ local . environment } -${ local . region } -data-repository"
454+ versioning_enabled = true
455+ encryption_algorithm = " AES256"
456+ lifecycle_rules = [
457+ {
458+ id = " archive-old-versions"
459+ enabled = true
460+ noncurrent_version_transitions = [
461+ {
462+ days = 30
463+ storage_class = " STANDARD_IA"
464+ },
465+ {
466+ days = 60
467+ storage_class = " GLACIER"
468+ },
469+ ]
470+ noncurrent_version_expiration = {
471+ days = 365
472+ }
473+ }
474+ ]
475+ intelligent_tiering = false
476+ }
477+ }
478+
479+ # Monitoring Configuration
480+ monitoring = {
481+ cloudwatch = {
482+ log_retention_days = 7
483+ detailed_monitoring = false
484+ enable_grafana = false
485+ log_groups = {
486+ parallel_cluster = {
487+ name = " /aws/parallelcluster/${ local . cluster_name } "
488+ retention_in_days = 7
489+ }
490+ vpc_flow_logs = {
491+ name = " /aws/vpc/flow-logs/${ local . cluster_name } "
492+ retention_in_days = 7
493+ }
494+ }
495+ alarms = {
496+ high_queue_depth = {
497+ alarm_name = " ${ local . cluster_name } -HighQueueDepth"
498+ comparison_operator = " GreaterThanThreshold"
499+ evaluation_periods = 2
500+ metric_name = " QueueDepth"
501+ namespace = " AWS/SQS"
502+ period = 300
503+ statistic = " Average"
504+ threshold = 100
505+ alarm_description = " Alarm when SQS queue depth is high"
506+ treat_missing_data = " notBreaching"
507+ }
508+ }
509+ }
510+ }
511+
447512 # Dev-specific overrides
448513 cluster_capacity = 100
449514 cloudwatch_log_retention_days = 7
You can’t perform that action at this time.
0 commit comments