Terraform 自动化
概述
HashiCorp Terraform 是开源的基础设施即代码(Infrastructure as Code, IaC)工具,通过声明式配置文件管理云资源。支持 AWS、Azure、GCP、阿里云等 100+ Provider,以 HCL(HCL 语法)为配置语言,支持状态管理、计划预览、并行执行、资源图谱等特性。
核心特性:
- 声明式配置 — 描述"要什么"而非"如何做"
- 执行计划 —
terraform plan预览变更,防止误操作
- 状态管理 — 跟踪资源实际状态,支持远程状态存储(S3、DynamoDB 锁)
- 模块化 — Module 复用基础设施模式
- 多云支持 — 统一工具管理多云资源
- 团队协作 — 工作区(Workspace)、锁机制防止并发冲突
安装与配置
Linux/macOS 安装
# 直接下载二进制(推荐)
curl -fsSL https://releases.hashicorp.com/terraform/1.6.6/terraform_1.6.6_linux_amd64.zip -o /tmp/terraform.zip
sudo unzip /tmp/terraform.zip -d /usr/local/bin/
terraform version
# 或使用 tfenv 管理多版本
brew install tfenv
tfenv install 1.6.6
tfenv use 1.6.6
Docker 使用
# 运行 Terraform 容器(别名)
alias terraform='docker run --rm -it -v $(pwd):/workspace -w /workspace hashicorp/terraform:1.6.6'
# 持久化配置和插件
docker run --rm -it \
-v ~/.aws:/root/.aws:ro \
-v $(pwd):/workspace \
-w /workspace \
-e AWS_PROFILE=prod \
hashicorp/terraform:1.6.6 init
AWS Provider 配置
# versions.tf
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
# 远程状态存储(S3 + DynamoDB 锁)
backend "s3" {
bucket = "prod-terraform-state"
key = "network/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
provider "aws" {
region = "us-east-1"
# 多账号配置
alias = "prod"
default_tags {
tags = {
Environment = "production"
ManagedBy = "terraform"
Project = "opsdocs"
}
}
}
核心语法
资源定义
# VPC 创建
resource "aws_vpc" "prod" {
cidr_block = "10.0.0.0/16"
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Name = "prod-vpc"
}
}
# 子网
resource "aws_subnet" "prod_private_1" {
vpc_id = aws_vpc.prod.id
cidr_block = "10.0.1.0/24"
availability_zone = "us-east-1a"
map_public_ip_on_launch = false
tags = {
Name = "prod-private-subnet-az1"
Tier = "private"
}
}
resource "aws_subnet" "prod_public_1" {
vpc_id = aws_vpc.prod.id
cidr_block = "10.0.2.0/24"
availability_zone = "us-east-1a"
map_public_ip_on_launch = true
tags = {
Name = "prod-public-subnet-az1"
Tier = "public"
}
}
# 互联网网关
resource "aws_internet_gateway" "prod" {
vpc_id = aws_vpc.prod.id
tags = {
Name = "prod-igw"
}
}
# 路由表
resource "aws_route_table" "prod_public" {
vpc_id = aws_vpc.prod.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.prod.id
}
tags = {
Name = "prod-public-rt"
}
}
resource "aws_route_table_association" "prod_public_1" {
subnet_id = aws_subnet.prod_public_1.id
route_table_id = aws_route_table.prod_public.id
}
数据源(Data Source)
# 查询现有 AMI
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"] # Canonical
filter {
name = "name"
values = ["ubuntu/images/hvm-ssd/ubuntu-22.04-amd64-server-*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
}
# 查询可用区
data "aws_availability_zones" "available" {
state = "available"
}
# 查询 IAM 角色
data "aws_iam_role" "ecs_task_role" {
name = "ecsTaskExecutionRole"
}
变量与输出
# variables.tf
variable "environment" {
type = string
description = "环境名称"
default = "prod"
}
variable "vpc_cidr" {
type = string
description = "VPC CIDR 段"
default = "10.0.0.0/16"
validation {
condition = can(cidrhost(var.vpc_cidr, 0)) != null
error_message = "无效的 CIDR 格式。"
}
}
variable "availability_zones" {
type = list(string)
description = "可用区列表"
default = ["us-east-1a", "us-east-1b", "us-east-1c"]
}
variable "tags" {
type = map(string)
description = "全局标签"
default = {}
}
# outputs.tf
output "vpc_id" {
description = "VPC ID"
value = aws_vpc.prod.id
}
output "private_subnet_ids" {
description = "私有子网 ID 列表"
value = [aws_subnet.prod_private_1.id, aws_subnet.prod_private_2.id]
}
output "public_subnet_ids" {
description = "公有子网 ID 列表"
value = [aws_subnet.prod_public_1.id, aws_subnet.prod_public_2.id]
}
output "vpc_cidr" {
description = "VPC CIDR"
value = aws_vpc.prod.cidr_block
}
循环与条件
# Count 循环(创建多个资源)
resource "aws_subnet" "private" {
count = 3
vpc_id = aws_vpc.prod.id
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index + 1)
availability_zone = data.aws_availability_zones.available.names[count.index]
map_public_ip_on_launch = false
tags = {
Name = "private-subnet-${count.index + 1}"
}
}
# for_each 循环(创建 Map 资源,更精细控制)
resource "aws_security_group" "app" {
for_each = toset(["web", "api", "worker"])
name = "sg-${each.value}"
description = "Security group for ${each.value}"
vpc_id = aws_vpc.prod.id
ingress {
from_port = each.value == "web" ? 80 : (each.value == "api" ? 8080 : 9000)
to_port = each.value == "web" ? 80 : (each.value == "api" ? 8080 : 9000)
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "sg-${each.value}"
}
}
# 条件表达式
resource "aws_db_instance" "prod" {
# ...
multi_az = var.enable_multi_az ? true : false
backup_retention_period = var.environment == "prod" ? 14 : 1
}
本地资源(Local Values)
locals {
common_tags = {
Environment = var.environment
Project = "opsdocs"
ManagedBy = "terraform"
}
# 合并标签
all_tags = merge(local.common_tags, var.tags)
# 常用 CIDR 计算
private_subnets = {
az1 = cidrsubnet(var.vpc_cidr, 8, 1)
az2 = cidrsubnet(var.vpc_cidr, 8, 2)
az3 = cidrsubnet(var.vpc_cidr, 8, 3)
}
}
resource "aws_vpc" "prod" {
# ...
tags = local.all_tags
}
模块(Module)
# modules/vpc/main.tf
variable "vpc_cidr" { type = string }
variable "environment" { type = string }
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
tags = { Name = "${var.environment}-vpc" }
}
output "vpc_id" {
value = aws_vpc.main.id
}
# 调用模块
module "vpc" {
source = "./modules/vpc"
vpc_cidr = "10.0.0.0/16"
environment = "prod"
}
状态管理
本地 vs 远程状态
# 本地状态(不推荐生产使用)
terraform {
backend "local" {
path = "terraform.tfstate"
}
}
# 远程状态(S3 + DynamoDB 锁)
terraform {
backend "s3" {
bucket = "prod-terraform-state"
key = "network/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
State 锁定表
# 创建 DynamoDB 锁表
aws dynamodb create-table \
--table-name terraform-locks \
--attribute-definitions AttributeName=LockID,AttributeType=S \
--key-schema AttributeName=LockID,KeyType=HASH \
--billing-mode PAY_PER_REQUEST
State 操作
# 查看当前 state
terraform state list
# 查看特定资源
terraform state show aws_vpc.prod
# 移动资源(资源重构时使用)
terraform state mv aws_vpc.prod aws_vpc.prod_v2
# 移除丢失的资源(实际已删除)
terraform state rm aws_instance.missing
# 拉取远程 state 到本地
terraform state pull > terraform.tfstate.backup
工作流命令
完整工作流
# 1. 初始化(下载 Provider、Module、初始化 Backend)
terraform init
# 2. 格式化配置文件
terraform fmt
# 3. 验证配置语法
terraform validate
# 4. 预览变更(必看!生产前必须 review)
terraform plan -var-file="prod.tfvars"
# 5. 应用变更
terraform apply -var-file="prod.tfvars"
# 6. 确认销毁(测试环境清理)
terraform destroy -var-file="test.tfvars"
变量传递
# 命令行变量(优先级最高)
terraform apply -var="environment=prod" -var="enable_multi_az=true"
# 文件变量
terraform apply -var-file="prod.tfvars"
# 自动加载(按优先级)
# terraform.tfvars > terraform.tfvars.json > *.auto.tfvars > *.auto.tfvars.json
# prod.tfvars
environment = "prod"
vpc_cidr = "10.0.0.0/16"
enable_multi_az = true
instance_type = "t3.medium"
disk_size = 100
availability_zones = ["us-east-1a", "us-east-1b", "us-east-1c"]
tags = {
CostCenter = "engineering"
Owner = "ops-team"
}
Import 现有资源
# 将已存在的 AWS 资源导入到 Terraform 管理
# 1. 先写好配置
resource "aws_vpc" "existing" {
cidr_block = "10.1.0.0/16"
}
# 2. 执行 import
terraform import aws_vpc.existing vpc-0abcd1234efgh5678
# 生成 import 代码
terraform plan -generate-config-out=generated.tf
模块生态
常用 Module
# VPC Module(官方)
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
name = "prod-vpc"
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b", "us-east-1c"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.10.0/24", "10.0.11.0/24", "10.0.12.0/24"]
enable_nat_gateway = true
single_nat_gateway = false
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Environment = "prod"
}
}
# RDS Module
module "rds" {
source = "terraform-aws-modules/rds/aws"
version = "~> 6.0"
identifier = "prod-mysql"
engine = "mysql"
engine_version = "8.0"
family = "mysql8.0"
major_engine_version = "8.0"
instance_class = "db.r6g.large"
allocated_storage = 100
storage_encrypted = true
multi_az = true
backup_retention_period = 7
skip_final_snapshot = false
final_snapshot_identifier = "prod-mysql-final-snap"
db_name = "appdb"
username = "admin"
password = "YourSecurePassword123!" # 生产使用 secretsmanager
vpc_security_group_ids = [module.vpc.security_group_ids["default"]]
tags = {
Environment = "prod"
}
}
# EKS Module
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.0"
cluster_name = "prod-eks"
cluster_version = "1.28"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
eks_managed_node_groups = {
system = {
min_size = 2
max_size = 10
desired_size = 3
instance_types = ["t3.medium"]
capacity_type = "SPOT"
}
}
enable_cluster_creator_admin_permissions = true
}
Terragrunt(DRY 复用)
# prod/terraform.tfvars 目录
# terragrunt.hcl
generate "provider" {
path = "provider.tf"
if_exists = "overwrite"
contents = <<EOF
provider "aws" {
region = "us-east-1"
default_tags {
tags = {
Environment = "prod"
ManagedBy = "terragrunt"
}
}
}
EOF
}
# 远程模块调用
generate "vpc" {
path = "vpc.tf"
if_exists = "overwrite"
contents = <<EOF
module "vpc" {
source = "git::https://github.com/terraform-aws-modules/terraform-aws-vpc.git?ref=v5.0.0"
# ... 具体配置
}
EOF
}
inputs = {
environment = "prod"
# ...
}
CI/CD 集成
GitHub Actions
# .github/workflows/terraform.yml
name: Terraform
on:
push:
branches: [main]
paths: ['terraform/**']
pull_request:
branches: [main]
env:
TF_VERSION: '1.6.6'
AWS_REGION: 'us-east-1'
jobs:
terraform:
name: Terraform Plan
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{ env.TF_VERSION }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Terraform Init
id: init
working-directory: terraform
run: |
terraform init -upgrade
- name: Terraform Format
id: fmt
working-directory: terraform
run: terraform fmt -check -recursive
- name: Terraform Validate
id: validate
working-directory: terraform
run: terraform validate
- name: Terraform Plan
id: plan
working-directory: terraform
run: terraform plan -no-color
env:
TF_VAR_environment: prod
- name: Update PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: '```
${{ steps.plan.outputs.stdout }}
})
- name: Terraform Apply
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
working-directory: terraform
run: terraform apply -auto-approve -var-file="prod.tfvars"
## 完整实战项目
### 项目一:生产级 VPC 自动创建
以下配置创建完整的三层网络(公有子网 + 私有子网 + RDS 子网),包含 NAT网关、安全组和路由的完整生产级模板:
─── versions.tf ───────────────────────────────────────────────
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
}
─── main.tf ──────────────────────────────────────────────────
VPC
resource "aws_vpc" "prod" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(local.common_tags, {
Name = "${var.environment}-vpc"
})
}
公有子网(面向 Internet,用于 ALB/NAT Gateway)
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.prod.id
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = false # 公有子网不放实例,通过 NAT 出去
tags = merge(local.common_tags, {
Name = "${var.environment}-public-${var.availability_zones[count.index]}"
Tier = "public"
})
}
私有子网(应用层,EC2/ECS 所在)
resource "aws_subnet" "private_app" {
count = length(var.availability_zones)
vpc_id = aws_vpc.prod.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + 1)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = false
tags = merge(local.common_tags, {
Name = "${var.environment}-private-app-${var.availability_zones[count.index]}"
Tier = "private_app"
})
}
数据子网(RDS/ElastiCache 所在,无外部路由)
resource "aws_subnet" "private_data" {
count = length(var.availability_zones)
vpc_id = aws_vpc.prod.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + 10)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = false
tags = merge(local.common_tags, {
Name = "${var.environment}-private-data-${var.availability_zones[count.index]}"
Tier = "private_data"
})
}
─── 网络层 ───────────────────────────────────────────────────
NAT Gateway(放在公有子网,让私有子网可以上外网)
resource "aws_eip" "nat" {
count = length(var.availability_zones) > 1 ? 1 : 1
domain = "vpc"
tags = local.common_tags
}
resource "aws_nat_gateway" "main" {
count = length(var.availability_zones) > 1 ? 1 : 1
subnet_id = aws_subnet.public[0].id
allocation_id = aws_eip.nat[0].id
tags = merge(local.common_tags, {
Name = "${var.environment}-nat"
})
}
私有子网路由表(通过 NAT Gateway 出公网)
resource "aws_route_table" "private_app" {
vpc_id = aws_vpc.prod.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main[0].id
}
tags = merge(local.common_tags, {
Name = "${var.environment}-private-app-rt"
})
}
数据子网路由表(完全内网,无公网出口)
resource "aws_route_table" "private_data" {
vpc_id = aws_vpc.prod.id
# 无 0.0.0.0/0 路由,RDS/Redis 只能内网访问
tags = merge(local.common_tags, {
Name = "${var.environment}-private-data-rt"
})
}
路由表关联
resource "aws_route_table_association" "private_app" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private_app[count.index].id
route_table_id = aws_route_table.private_app.id
}
resource "aws_route_table_association" "private_data" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private_data[count.index].id
route_table_id = aws_route_table.private_data.id
}
─── 安全组 ──────────────────────────────────────────────────
resource "aws_security_group" "alb" {
name = "${var.environment}-alb-sg"
description = "Load Balancer 安全组"
vpc_id = aws_vpc.prod.id
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, { Name = "${var.environment}-alb-sg" })
}
resource "aws_security_group" "app" {
name = "${var.environment}-app-sg"
description = "应用层安全组(接受 ALB 流量)"
vpc_id = aws_vpc.prod.id
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
security_groups = [aws_security_group.alb.id]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, { Name = "${var.environment}-app-sg" })
}
resource "aws_security_group" "rds" {
name = "${var.environment}-rds-sg"
description = "RDS 安全组(只允许 App 层访问)"
vpc_id = aws_vpc.prod.id
ingress {
from_port = 3306
to_port = 3306
protocol = "tcp"
security_groups = [aws_security_group.app.id]
}
tags = merge(local.common_tags, { Name = "${var.environment}-rds-sg" })
}
─── RDS MySQL ───────────────────────────────────────────────
resource "aws_db_subnet_group" "prod" {
name = "${var.environment}-db-subnet"
subnet_ids = aws_subnet.private_data[*].id
tags = merge(local.common_tags, { Name = "${var.environment}-db-subnet" })
}
resource "aws_db_instance" "prod" {
identifier = "${var.environment}-mysql"
engine = "mysql"
engine_version = "8.0"
instance_class = var.db_instance_class
allocated_storage = var.db_allocated_storage
storage_encrypted = true
storage_type = "gp3"
db_name = replace(var.environment, "-", "_")
username = "admin"
password = var.db_password # 生产从 Secrets Manager 读取
db_subnet_group_name = aws_db_subnet_group.prod.name
vpc_security_group_ids = [aws_security_group.rds.id]
multi_az = var.environment == "prod" ? true : false
backup_retention_period = var.environment == "prod" ? 14 : 1
backup_window = "03:00-04:00"
maintenance_window = "mon:04:00-mon:05:00"
final_snapshot_identifier = "${var.environment}-mysql-final-snap"
skip_final_snapshot = false
enabled_cloudwatch_logs_exports = ["error", "general", "slowquery"]
tags = local.common_tags
}
─── variables.tf ────────────────────────────────────────────
variable "environment" {
type = string
default = "prod"
}
variable "vpc_cidr" {
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
type = list(string)
default = ["us-east-1a", "us-east-1b", "us-east-1c"]
}
variable "db_instance_class" {
type = string
default = "db.r6g.large"
}
variable "db_allocated_storage" {
type = number
default = 100
}
variable "db_password" {
type = string
sensitive = true
default = "" # 生产必须通过 -var 传入或使用 secretsmanager
}
─── locals.tf ────────────────────────────────────────────────
locals {
common_tags = {
Environment = var.environment
Project = "opsdocs"
ManagedBy = "terraform"
Owner = "ops-team"
}
}
─── outputs.tf ───────────────────────────────────────────────
output "vpc_id" { value = aws_vpc.prod.id }
output "private_app_subnets" { value = aws_subnet.private_app[*].id }
output "private_data_subnets" { value = aws_subnet.private_data[*].id }
output "rds_endpoint" { value = aws_db_instance.prod.endpoint }
output "rds_arn" { value = aws_db_instance.prod.arn }
output "security_group_app_id" { value = aws_security_group.app.id }
output "security_group_rds_id" { value = aws_security_group.rds.id }
执行顺序:
terraform init
terraform validate
terraform plan -var="db_password=YourStrongPass123!" -var-file="prod.tfvars"
terraform apply -var="db_password=YourStrongPass123!" -var-file="prod.tfvars"
---
### 项目二:EKS 集群自动创建
─── EKS Cluster ────────────────────────────────────────────
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.0"
cluster_name = "${var.environment}-eks"
cluster_version = "1.28"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
# EKS 托管节点组(系统 Pod 用)
eks_managed_node_groups = {
system = {
min_size = 2
max_size = 5
desired_size = 2
instance_types = ["t3.medium"]
capacity_type = "ON_DEMAND"
labels = { node-group = "system" }
taints = [
{
key = "node-role"
value = "system"
effect = "NO_SCHEDULE"
}
]
}
app = {
min_size = 2
max_size = 10
desired_size = 3
instance_types = ["t3.medium"]
capacity_type = "SPOT" # 生产推荐混用 SPOT 降低成本
labels = { node-group = "app" }
}
}
# kubeconfig 生成
enable_irsa = true # IAM Roles for Service Accounts(Pod 级 IAM 权限)
create_cluster_security_group = false
cluster_security_group_id = var.eks_cluster_sg_id
tags = {
Environment = var.environment
}
}
─── Karpenter 自动扩缩容 ────────────────────────────────────
resource "aws_karpenter_node_pool" "default" {
name = "${var.environment}-default"
cluster_name = module.eks.cluster_name
cluster_endpoint = module.eks.cluster_endpoint
cluster_ca_base64 = module.eks.cluster_certificate_authority_data
capacity_types = ["ON_DEMAND", "SPOT"]
instance_types = ["t3.medium", "t3.large", "m6i.large"]
weight = 100
requirements {
key = "node.kubernetes.org/instance-type"
operator = "In"
values = ["t3.medium", "t3.large", "m6i.large"]
}
requirements {
key = "topology.kubernetes.io/zone"
operator = "In"
values = ["us-east-1a", "us-east-1b", "us-east-1c"]
}
requirements {
key = "kubernetes.io/os"
operator = "In"
values = ["linux"]
}
disruption {
consolidationPolicy = "WhenEmpty"
expireAfter = "72h"
}
}
─── AWS Load Balancer Controller ─────────────────────────────
EKS 集群创建后部署 ALB Ingress Controller
resource "helm_release" "lb_controller" {
name = "aws-load-balancer-controller"
repository = "https://aws.github.io/eks-charts"
chart = "aws-load-balancer-controller"
namespace = "kube-system"
version = "1.6.0"
set {
name = "clusterName"
value = module.eks.cluster_name
}
depends_on = [module.eks]
}
---
## Workspace 多环境管理
### 概念与适用场景
Terraform Workspace 通过状态隔离实现同一套代码管理多个环境(dev/stag/prod)。每个 Workspace 有独立的 State 文件。
创建 workspace
terraform workspace new prod
terraform workspace new stag
terraform workspace new dev
切换 workspace
terraform workspace select prod
列出所有 workspace
terraform workspace list
### 多环境 Backend 隔离
每个 Workspace 使用独立的 S3 Key 路径,State 完全隔离:
versions.tf
terraform {
backend "s3" {
bucket = "prod-terraform-state"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
# key 按 workspace 分开:envs/prod/terraform.tfstate
key = "envs/${terraform.workspace}/terraform.tfstate"
}
}
通过 -var-file 传递环境差异配置
prod.tfvars / stag.tfvars / dev.tfvars
### dev/stag/prod 变量示例
dev.tfvars
environment = "dev"
db_instance_class = "db.t3.micro"
db_allocated_storage = 20
enable_multi_az = false
instance_type = "t3.micro"
stag.tfvars
environment = "stag"
db_instance_class = "db.r6g.large"
db_allocated_storage = 50
enable_multi_az = false
instance_type = "t3.medium"
prod.tfvars
environment = "prod"
db_instance_class = "db.r6g.2xlarge"
db_allocated_storage = 200
enable_multi_az = true
instance_type = "m6i.2xlarge"
### Workspace 感知资源
根据 workspace 动态调整资源配置
resource "aws_db_instance" "prod" {
instance_class = terraform.workspace == "prod" ? "db.r6g.2xlarge" : "db.t3.micro"
multi_az = terraform.workspace == "prod" ? true : false
tags = {
Workspace = terraform.workspace
}
}
---
## Terraform Cloud / Atlantis 远程运行
### Terraform Cloud 远程执行
Terrform Cloud 提供免费的远程状态存储、团队执行、策略检查(OPA/Sentinel)。
versions.tf
terraform {
cloud {
organization = "your-org"
workspaces {
name = "prod-infra"
# 或通过 tags 管理:
# tags = ["production", "infrastructure"]
}
}
required_version = ">= 1.6.0"
required_providers {
aws = { source = "hashicorp/aws", version = "~> 5.0" }
}
}
配置完成后,`terraform login` 获取 Token,`terraform init` 自动连接到 Cloud。
.terraformrc 或 ~/.config/terraform/credentials.tfrc.json
credentials "app.terraform.io" {
token = "xxxx.atlasv1.xxxxx"
}
### Atlantis 本地远程运行
适合不想用 Terraform Cloud 的团队,Atlantis 在 Git webhooks 触发 `terraform plan/apply`,通过 PR 评论返回结果:
atlantis.yaml
version: 1
automerge: true
parallel_apply: false
projects:
- name: prod-vpc
dir: terraform/vpc
workspace: prod
terraform_version: "1.6.6"
delete_source_branch_on_merge: true
apply_requirements: ["approved", "mergeable"]
- name: prod-eks
dir: terraform/eks
workspace: prod
terraform_version: "1.6.6"
apply_requirements: ["approved", "mergeable"]
autoplan:
when_modified: ["*.tf", "../modules/**/*.tf"]
enabled: true
Atlantis 部署(Docker):
docker run --name atlantis -e ATLANTIS_GH_TOKEN=your_gh_token -e ATLANTIS_GH_USER=atlantis-bot -e ATLANTIS_GH_WEBHOOK_SECRET=your_webhook_secret -e ATLANTIS_REPO_ALLOWLIST="github.com/your-org/*" -v /root/atlantis.yaml:/atlantis.yaml -v /root/.aws:/root/.aws:ro -p 4141:4141 ghcr.io/runatlantis/atlantis:v0.26.0
---
## GitLab CI/CD 集成
### 完整 .gitlab-ci.yml
.gitlab-ci.yml
image:
name: hashicorp/terraform:1.6.6
entrypoint:
- /usr/bin/env
variables:
AWS_REGION: us-east-1
TF_STATE_BUCKET: prod-terraform-state
TF_DYNAMODB_TABLE: terraform-locks
GIT_DEPTH: 1
stages:
- validate
- plan
- apply
- destroy
.before_template: &before_template
- apk add --no-cache aws-cli curl
- export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID
- export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY
- export AWS_DEFAULT_REGION=$AWS_REGION
- terraform --version
- terraform init -upgrade -backend-config="bucket=$TF_STATE_BUCKET" -backend-config="key=${CI_PROJECT_PATH}/${CI_COMMIT_REF_NAME}/terraform.tfstate" -backend-config="dynamodb_table=$TF_DYNAMODB_TABLE" -backend-config="region=$AWS_REGION"
terraform_validate:
stage: validate
before_script:
- *before_template
script:
- terraform validate
- terraform fmt -check -recursive
only:
- merge_requests
- main
- develop
terraform_plan:
stage: plan
before_script:
- *before_template
script:
- terraform plan -var-file="${CI_COMMIT_REF_NAME}.tfvars" -out=tfplan
- echo "Plan complete"
artifacts:
name: tfplan
paths:
- tfplan
expire_in: 1 day
only:
- merge_requests
- main
- develop
dependencies:
- terraform_validate
terraform_apply:
stage: apply
before_script:
- *before_template
script:
- terraform apply -var-file="${CI_COMMIT_REF_NAME}.tfvars" -auto-approve tfplan
environment:
name: $CI_COMMIT_REF_NAME
only:
- main
when: manual
dependencies:
- terraform_plan
after_script:
- terraform output -json > terraform_output.json
artifacts:
name: tf_output
paths:
- terraform_output.json
expire_in: 30 days
terraform_destroy:
stage: destroy
before_script:
- *before_template
script:
- terraform destroy -var-file="${CI_COMMIT_REF_NAME}.tfvars" -auto-approve
environment:
name: $CI_COMMIT_REF_NAME
action: destroy
only:
- develop
when: manual
---
## 生产最佳实践
### 目录结构
terraform/
├── .terraform-version # 指定 Terraform 版本
├── .terraform.lock.hcl # 依赖锁定文件(提交到 Git)
├── versions.tf # Provider 和版本约束
├── provider.tf # Provider 配置
├── variables.tf # 变量定义
├── outputs.tf # 输出定义
├── locals.tf # 本地变量
├── main.tf # 根模块入口
├── terraform.tfvars # 本地测试变量
├── prod.tfvars # 生产变量
├── modules/ # 自定义模块
│ ├── vpc/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── outputs.tf
│ └── eks/
│ └── ...
└── env/
├── prod/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
└── dev/
└── ...
### 安全实践
使用 KMS 加密 S3 状态
terraform {
backend "s3" {
bucket = "prod-terraform-state"
key = "network/terraform.tfstate"
region = "us-east-1"
encrypt = true
kms_key_id = "arn:aws:kms:us-east-1:123456789012:key/xxxxx"
dynamodb_table = "terraform-locks"
}
}
敏感变量不提交到 Git
.gitignore
*.tfvars
*.tfstate
.terraform/