mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-09 17:30:14 +00:00
Adds tooling and documentation for Day 2 cluster operations: - scripts/rotate-nodes.sh: interactive node-rotation driver that applies terraform to pick up the latest SSM-resolved EKS AMI and watches the rolling replacement. - scripts/demo-schema-change.sh: end-to-end demo of the zero-downtime is_favorited column migration via helm upgrade + migration hook. - scripts/verify-deployment.sh: post-deploy health check across pods, per-service HTTP health endpoints, rollout status, and PDBs. - docs/chaos-runbook.md: failure-mode playbook with simulate / Grafana / Loki / recovery steps for six scenarios (pod kill, node failure, DB pool exhaustion, inference OOM, high latency, SSL issues) plus a Loki quick-reference. - terraform/modules/eks: expose current_node_ami_id output, add update_config.max_unavailable_percentage (configurable, default 33) so node-group rolls are controlled. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
40 lines
1.1 KiB
HCL
40 lines
1.1 KiB
HCL
output "cluster_name" {
|
|
description = "EKS cluster name."
|
|
value = module.eks.cluster_name
|
|
}
|
|
|
|
output "cluster_endpoint" {
|
|
description = "EKS API server endpoint."
|
|
value = module.eks.cluster_endpoint
|
|
}
|
|
|
|
output "cluster_certificate_authority_data" {
|
|
description = "Base64-encoded cluster CA certificate."
|
|
value = module.eks.cluster_certificate_authority_data
|
|
}
|
|
|
|
output "cluster_security_group_id" {
|
|
description = "Cluster control-plane security group."
|
|
value = module.eks.cluster_security_group_id
|
|
}
|
|
|
|
output "node_security_group_id" {
|
|
description = "Security group attached to managed node group ENIs (used by RDS / EFS to allow inbound traffic from nodes)."
|
|
value = module.eks.node_security_group_id
|
|
}
|
|
|
|
output "oidc_provider_arn" {
|
|
description = "IRSA OIDC provider ARN."
|
|
value = module.eks.oidc_provider_arn
|
|
}
|
|
|
|
output "oidc_provider_url" {
|
|
description = "IRSA OIDC issuer URL (without https://)."
|
|
value = module.eks.oidc_provider
|
|
}
|
|
|
|
output "current_node_ami_id" {
|
|
description = "The current EKS-optimized AMI ID used by the node group."
|
|
value = data.aws_ssm_parameter.eks_ami_id.value
|
|
}
|