Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(aws): aws check and metadata fixes #4251

Merged
merged 7 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
"Data Protection"
],
"ServiceName": "ec2",
"SubServiceName": "snapshot",
"SubServiceName": "volume",
"ResourceIdTemplate": "arn:partition:service:region:account-id:resource-id",
"Severity": "medium",
"ResourceType": "AwsEc2Snapshot",
"ResourceType": "AwsEc2Volume",
"Description": "Check if EBS snapshots exists.",
"Risk": "Ensure that your EBS volumes (available or in-use) have recent snapshots (taken weekly) available for point-in-time recovery for a better, more reliable data backup strategy.",
"RelatedUrl": "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSSnapshots.html",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,23 @@ class ec2_instance_managed_by_ssm(Check):
def execute(self):
findings = []
for instance in ec2_client.instances:
if instance.state != "terminated":
report = Check_Report_AWS(self.metadata())
report.region = instance.region
report.resource_arn = instance.arn
report.resource_tags = instance.tags
report.status = "PASS"
report = Check_Report_AWS(self.metadata())
report.region = instance.region
report.resource_arn = instance.arn
report.resource_tags = instance.tags
report.status = "PASS"
report.status_extended = (
f"EC2 Instance {instance.id} is managed by Systems Manager."
)
report.resource_id = instance.id
# instances not running should pass the check
if instance.state in ["pending", "terminated", "stopped"]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some test to handle this new behaviour? One for each new state. Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New tests added for "running", "stopped", and "terminated". Moto doesn't appear to support mocking the temporary statuses like "pending" in a way that the prowler services can read.

report.status_extended = f"EC2 Instance {instance.id} is unmanaged by Systems Manager because it is {instance.state}."
elif not ssm_client.managed_instances.get(instance.id):
report.status = "FAIL"
report.status_extended = (
f"EC2 Instance {instance.id} is managed by Systems Manager."
f"EC2 Instance {instance.id} is not managed by Systems Manager."
)
report.resource_id = instance.id
if not ssm_client.managed_instances.get(instance.id):
report.status = "FAIL"
report.status_extended = (
f"EC2 Instance {instance.id} is not managed by Systems Manager."
)
findings.append(report)
findings.append(report)

return findings
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"SubServiceName": "",
"ResourceIdTemplate": "arn:aws:sns:region:account-id:topic",
"Severity": "high",
"ResourceType": "AwsSNSTopic",
"ResourceType": "AwsSnsTopic",
"Description": "Ensure there are no SNS Topics unencrypted",
"Risk": "If not enabled sensitive information at rest is not protected.",
"RelatedUrl": "https://docs.aws.amazon.com/sns/latest/dg/sns-server-side-encryption.html",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"SubServiceName": "",
"ResourceIdTemplate": "arn:aws:sns:region:account-id:topic",
"Severity": "high",
"ResourceType": "AwsSNSTopic",
"ResourceType": "AwsSnsTopic",
"Description": "Check if SNS topics have policy set as Public",
"Risk": "Publicly accessible services could expose sensitive data to bad actors.",
"RelatedUrl": "https://docs.aws.amazon.com/config/latest/developerguide/sns-topic-policy.html",
Expand Down
5 changes: 5 additions & 0 deletions prowler/providers/aws/services/ssm/ssm_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import time
from enum import Enum
from typing import Optional

Expand Down Expand Up @@ -145,6 +146,10 @@ def __describe_instance_information__(self, regional_client):
id=resource_id,
region=regional_client.region,
)
# boto3 does not properly handle throttling exceptions for
# ssm:DescribeInstanceInformation when there are large numbers of instances
# AWS support recommends manually reducing frequency of requests
time.sleep(0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about including this here, it doesn't seem as something deterministic. If there are rate limiting errors we should handle that explicitly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The boto3 paginator should be handling the throttling exceptions and retrying, but it doesn't work consistently for ssm:DescribeInstanceInformation and exhausts retries. We escalated this to AWS support and their recommendation was "Reduce the frequency of the API calls", which is why I've been using a short sleep between pages like this which has been working. The rate limits for this action are not documented and seem to behave differently from other AWS actions, and the problem is difficult to test for because it only shows up when you have large numbers of instances running, in our case ephemeral Spot processing jobs. When the problem occurs it generates large numbers of false positives because the ssm instance metadata is not captured and ec2_instance_managed_by_ssm fails for most of the instances, and we have not seen that happen after adding the sleep.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add this message as a comment above the time sleep? It'd be great to have this.

Thanks for your analysis 👏

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, done.


except Exception as error:
logger.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,177 @@ def test_ec2_instance_managed_by_ssm_compliance_instance(self):
== f"EC2 Instance {instance.id} is managed by Systems Manager."
)
assert result[0].resource_id == instance.id

@mock_aws
def test_ec2_instance_managed_by_ssm_running(self):
ec2 = resource("ec2", region_name=AWS_REGION_US_EAST_1)
instances_pending = ec2.create_instances(
ImageId=EXAMPLE_AMI_ID,
MinCount=2,
MaxCount=2,
UserData="This is some user_data",
)
instance_managed = ec2.Instance(instances_pending[0].id)
instance_unmanaged = ec2.Instance(instances_pending[1].id)
assert instance_managed.state["Name"] == "running"
assert instance_unmanaged.state["Name"] == "running"

ssm_client = mock.MagicMock
ssm_client.managed_instances = {
instance_managed.id: ManagedInstance(
arn=f"arn:aws:ec2:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:instance/{instance_managed.id}",
id=instance_managed.id,
region=AWS_REGION_US_EAST_1,
)
}

from prowler.providers.aws.services.ec2.ec2_service import EC2

aws_provider = set_mocked_aws_provider(
[AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1]
)

with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=aws_provider,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_service.SSM",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_client.ssm_client",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm.ec2_client",
new=EC2(aws_provider),
):
# Test Check
from prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm import (
ec2_instance_managed_by_ssm,
)

check = ec2_instance_managed_by_ssm()
results = check.execute()

assert len(results) == 2
for result in results:
if result.resource_id == instance_managed.id:
assert result.status == "PASS"
assert result.region == AWS_REGION_US_EAST_1
assert result.resource_tags is None
assert (
result.status_extended
== f"EC2 Instance {instance_managed.id} is managed by Systems Manager."
)

if result.resource_id == instance_unmanaged.id:
assert result.status == "FAIL"
assert result.region == AWS_REGION_US_EAST_1
assert result.resource_tags is None
assert (
result.status_extended
== f"EC2 Instance {instance_unmanaged.id} is not managed by Systems Manager."
)

@mock_aws
def test_ec2_instance_managed_by_ssm_stopped(self):
ec2 = resource("ec2", region_name=AWS_REGION_US_EAST_1)
instances_pending = ec2.create_instances(
ImageId=EXAMPLE_AMI_ID,
MinCount=1,
MaxCount=1,
UserData="This is some user_data",
)
instances_pending[0].stop()
instance = ec2.Instance(instances_pending[0].id)
assert instance.state["Name"] == "stopped"

ssm_client = mock.MagicMock
ssm_client.managed_instances = {}

from prowler.providers.aws.services.ec2.ec2_service import EC2

aws_provider = set_mocked_aws_provider(
[AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1]
)

with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=aws_provider,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_service.SSM",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_client.ssm_client",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm.ec2_client",
new=EC2(aws_provider),
):
# Test Check
from prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm import (
ec2_instance_managed_by_ssm,
)

check = ec2_instance_managed_by_ssm()
result = check.execute()

assert len(result) == 1
assert result[0].status == "PASS"
assert result[0].region == AWS_REGION_US_EAST_1
assert result[0].resource_tags is None
assert (
result[0].status_extended
== f"EC2 Instance {instance.id} is unmanaged by Systems Manager because it is stopped."
)

@mock_aws
def test_ec2_instance_managed_by_ssm_terminated(self):
ec2 = resource("ec2", region_name=AWS_REGION_US_EAST_1)
instances_pending = ec2.create_instances(
ImageId=EXAMPLE_AMI_ID,
MinCount=1,
MaxCount=1,
UserData="This is some user_data",
)
instances_pending[0].terminate()
instance = ec2.Instance(instances_pending[0].id)
assert instance.state["Name"] == "terminated"

ssm_client = mock.MagicMock
ssm_client.managed_instances = {}

from prowler.providers.aws.services.ec2.ec2_service import EC2

aws_provider = set_mocked_aws_provider(
[AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1]
)

with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=aws_provider,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_service.SSM",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ssm.ssm_client.ssm_client",
new=ssm_client,
), mock.patch(
"prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm.ec2_client",
new=EC2(aws_provider),
):
# Test Check
from prowler.providers.aws.services.ec2.ec2_instance_managed_by_ssm.ec2_instance_managed_by_ssm import (
ec2_instance_managed_by_ssm,
)

check = ec2_instance_managed_by_ssm()
result = check.execute()

assert len(result) == 1
assert result[0].status == "PASS"
assert result[0].region == AWS_REGION_US_EAST_1
assert result[0].resource_tags is None
assert (
result[0].status_extended
== f"EC2 Instance {instance.id} is unmanaged by Systems Manager because it is terminated."
)