Commit f7adab10 authored by Aleksander Cynarski's avatar Aleksander Cynarski 💬
Browse files

Merge branch 'dev-63' into 'master'

SST-63: Terraform module for CW alert notifications

See merge request !1
parents 4d327d3c fd797ef5
data "aws_instance" "monitored" {
for_each = toset(var.ec2_ids)
instance_id = each.key
}
resource "aws_cloudwatch_metric_alarm" "cpu" {
for_each = data.aws_instance.monitored
alarm_name = "CPU Usage:${each.value.tags.Name}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = "60"
statistic = "Average"
threshold = "95"
alarm_description = "CPU alarm on ${each.value.tags.Name} IPs: ${each.value.public_ip} (${each.value.private_ip})"
alarm_actions = [aws_sns_topic.qw_alarms.arn]
dimensions = {
InstanceId = each.value.id
}
}
resource "aws_cloudwatch_metric_alarm" "health" {
for_each = data.aws_instance.monitored
alarm_name = "HealthCheck:${each.value.tags.Name}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = "StatusCheckFailed"
namespace = "AWS/EC2"
period = "60"
statistic = "Average"
threshold = "1"
alarm_description = "EC2 Health on ${each.value.tags.Name} IPs: ${each.value.public_ip} (${each.value.private_ip})"
alarm_actions = [aws_sns_topic.qw_alarms.arn]
dimensions = {
InstanceId = each.value.id
}
}
resource "aws_cloudwatch_metric_alarm" "mem_usage" {
for_each = data.aws_instance.monitored
alarm_name = "MemUsage:${each.value.tags.Name}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = "mem_used_percent"
namespace = "CWAgent"
period = "60"
statistic = "Average"
threshold = "90"
alarm_description = "Memory alarm on ${each.value.tags.Name} IPs: ${each.value.public_ip} (${each.value.private_ip})"
alarm_actions = [aws_sns_topic.qw_alarms.arn]
dimensions = {
InstanceId = each.value.id
}
}
resource "aws_cloudwatch_metric_alarm" "processes_zombies" {
for_each = data.aws_instance.monitored
alarm_name = "ZombieProcess:${each.value.tags.Name}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = "processes_zombies"
namespace = "CWAgent"
period = "60"
statistic = "Average"
threshold = "1"
alarm_description = "Zombie processes on ${each.value.tags.Name} IPs: ${each.value.public_ip} (${each.value.private_ip})"
alarm_actions = [aws_sns_topic.qw_alarms.arn]
dimensions = {
InstanceId = each.value.id
}
}
resource "aws_cloudwatch_metric_alarm" "disk_usage" {
for_each = data.aws_instance.monitored
alarm_name = "DiskUsage:${each.value.tags.Name}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = "disk_used_percent"
namespace = "CWAgent"
period = "60"
statistic = "Average"
threshold = "90"
alarm_description = "Disk alarm on ${each.value.tags.Name} IPs: ${each.value.public_ip} (${each.value.private_ip})"
alarm_actions = [aws_sns_topic.qw_alarms.arn]
dimensions = {
InstanceId = each.value.id
fstype = "xfs"
path = "/"
}
}
{
"agent": {
"metrics_collection_interval": 60,
"run_as_user": "root"
},
"metrics": {
"append_dimensions": {
"InstanceId": "${aws:InstanceId}"
},
"metrics_collected": {
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"metrics_collection_interval": 60,
"totalcpu": false
},
"disk": {
"drop_device": true,
"measurement": [
"used_percent",
"inodes_free"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"diskio": {
"measurement": [
"io_time"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 60
},
"swap": {
"measurement": [
"swap_used_percent"
],
"metrics_collection_interval": 60
},
"processes": {
"measurement": [
"zombies"
],
"metrics_collection_interval": 60
}
}
}
}
\ No newline at end of file
......@@ -111,10 +111,9 @@ function prepareCWAlarmPayload(record) {
exports.handler = async function (event, context) {
try {
var channel = process.env.SLACK_CHANNEL
var username = process.env.SLACK_USERNAME
var webhookUrl = process.env.SLACK_WEBHOOK
var channel = process.env.NOTIFICATION_CHANNEL
var username = process.env.NOTIFICATION_USERNAME
var webhookUrl = process.env.NOTIFICATION_WEBHOOK
var urlsList = webhookUrl.split(',')
var channelsList = channel.split(',')
......
No preview for this file type
......@@ -4,26 +4,25 @@ data "archive_file" "notify_js" {
output_path = "${path.module}/lambda/notification.zip"
}
resource "aws_lambda_function" "notify" {
depends_on = [data.archive_file.notify_js]
function_name = "notification"
function_name = "${var.env}-notification"
description = "Send notifications to Mattermost"
runtime = "nodejs12.x"
handler = "notification.handler"
role = aws_iam_role.slack_notify.arn
role = aws_iam_role.notify.arn
filename = data.archive_file.notify_js.output_path
source_code_hash = filebase64sha256(data.archive_file.notify_js.output_path)
environment {
variables = {
SLACK_CHANNEL = var.slack_notification_channel
SLACK_USERNAME = var.slack_notification_user
SLACK_WEBHOOK = var.slack_notification_url
NOTIFICATION_CHANNEL = var.alert_notify_channel
NOTIFICATION_USERNAME = var.alert_notify_user
NOTIFICATION_WEBHOOK = var.alert_notify_url
}
}
}
......@@ -42,11 +41,11 @@ data "aws_iam_policy_document" "assume_role" {
}
resource "aws_iam_role" "notify" {
name = "SlackNotifications"
name = "${var.env}-alert-notify"
assume_role_policy = data.aws_iam_policy_document.assume_role.json
}
data "aws_iam_policy_document" "slack_notify" {
data "aws_iam_policy_document" "notify" {
statement {
sid = "CloudwatchLogs"
effect = "Allow"
......@@ -60,8 +59,8 @@ data "aws_iam_policy_document" "slack_notify" {
}
}
resource "aws_iam_role_policy" "slack_notify" {
name = "SlackNotifications"
resource "aws_iam_role_policy" "notify" {
name = "${var.env}-alert-notify"
role = aws_iam_role.notify.id
policy = data.aws_iam_policy_document.notify.json
}
......
resource "aws_ssm_parameter" "cloudwatch_agent_configuration" {
name = "CloudWatchConfiguration"
type = "String"
value = file("${path.module}/cloudwatch_config.json")
overwrite = true
}
\ No newline at end of file
resource "aws_sns_topic" "qw_alarms" {
name = "cloudwatch-alarms"
display_name = "Cloudwatch Alarms"
name = "${var.env}-cloudwatch-alarms"
display_name = "Cloudwatch Alarms (${var.env})"
}
resource "aws_lambda_permission" "perm_alarms" {
statement_id = "AllowExecutionFromCloudWatchSNS"
statement_id = "${var.env}-AllowExecutionFromCloudWatchSNSAlarms"
action = "lambda:InvokeFunction"
function_name = aws_lambda_function.notify.arn
principal = "sns.amazonaws.com"
......@@ -12,19 +12,19 @@ resource "aws_lambda_permission" "perm_alarms" {
}
resource "aws_sns_topic_subscription" "sub_alarms" {
depends_on = [aws_lambda_permission.with_sns]
depends_on = [aws_lambda_permission.perm_alarms]
topic_arn = aws_sns_topic.qw_alarms.arn
protocol = "lambda"
endpoint = aws_lambda_function.notify.arn
}
resource "aws_sns_topic" "qw_warnings" {
name = "cloudwatch-warnings"
display_name = "Cloudwatch Warnings"
name = "${var.env}-cloudwatch-warnings"
display_name = "Cloudwatch Warnings (${var.env})"
}
resource "aws_lambda_permission" "perm_warnings" {
statement_id = "AllowExecutionFromCloudWatchSNS"
statement_id = "${var.env}-AllowExecutionFromCloudWatchSNSWarnings"
action = "lambda:InvokeFunction"
function_name = aws_lambda_function.notify.arn
principal = "sns.amazonaws.com"
......@@ -32,7 +32,7 @@ resource "aws_lambda_permission" "perm_warnings" {
}
resource "aws_sns_topic_subscription" "sub_warnings" {
depends_on = [aws_lambda_permission.with_sns]
depends_on = [aws_lambda_permission.perm_warnings]
topic_arn = aws_sns_topic.qw_alarms.arn
protocol = "lambda"
endpoint = aws_lambda_function.notify.arn
......
variable "notification_channel" {
variable "env" {
type = string
description = "Deployment environment."
}
variable "alert_notify_channel" {
type = string
description = "Mattermost notification channel"
}
variable "notification_user" {
variable "alert_notify_user" {
type = string
default = "AWS"
description = "Mattermost notification user"
}
variable "notification_url" {
variable "alert_notify_url" {
type = string
description = "Mattermost notification url"
}
variable "ec2_ids" {
type = list(any)
default = []
description = "List of EC2 instances to which add monitoring"
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment