Skip to content
This repository has been archived by the owner on Jun 20, 2023. It is now read-only.

Changes to scan_bucket.py to introduce --profile #153

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/deploy_lambda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
on:
schedule:
- cron: '0 0 1 * *'

jobs:
# This workflow contains a single job called "build"
deploy:
# The type of runner that the job will run on
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@master

- name: Build lambda function
run: make archive

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-1

- name: Deploy to lambda
run: |
aws lambda update-function-code --function-name bucket-antivirus-update --zip-file fileb://build/lambda.zip
aws lambda update-function-code --function-name bucket-antivirus-function --zip-file fileb://build/lambda.zip

- name: Notify of update
run: aws sns publish --topic-arn arn:aws:sns:eu-west-1:224019267248:TechnicalAlerts-Group --message "bucket-antivirus-function/update has been updated" --subject "eu-west-1 Antivirus Update"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,6 @@ tmp/

# EICAR Files
*eicar*

# Rebase from upstream fork
rebase-upstream.sh
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,17 @@ RUN rm -rf /root/.cache/pip

# Download libraries we need to run in lambda
WORKDIR /tmp
RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2
RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 libprelude gnutls libtasn1 lib64nettle nettle libtool-ltdl
RUN rpm2cpio clamav-0*.rpm | cpio -idmv
RUN rpm2cpio clamav-lib*.rpm | cpio -idmv
RUN rpm2cpio clamav-update*.rpm | cpio -idmv
RUN rpm2cpio json-c*.rpm | cpio -idmv
RUN rpm2cpio pcre*.rpm | cpio -idmv
RUN rpm2cpio gnutls*.rpm | cpio -idmv
RUN rpm2cpio nettle*.rpm | cpio -idmv
RUN rpm2cpio libprelude*.rpm | cpio -idmv
RUN rpm2cpio libtasn1*.rpm | cpio -idmv
RUN rpm2cpio libtool-ltdl*.rpm | cpio -idmv

# Copy over the binaries and libraries
RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -333,17 +333,21 @@ It should be in the format provided below:
## Manually Scanning Buckets

You may want to scan all the objects in a bucket that have not previously been scanned or were created
prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility.
prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility. If the
function and bucket live in a separate account, you can profile a profile name to be used (please ensure
the region is configured correctly for the profile)

```sh
pip install boto3
scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan>
scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan> --profile=<aws-profile-name> --limit=<number-of-files-to-scan>
```

This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function
asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally,
the script uses the same environment variables you'd use in your lambda so you can configure them similarly.

Providing --limit can be used for testing purposes, this will limit the number of files scanned before triggering Lambda.

## Testing

There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of
Expand Down
21 changes: 15 additions & 6 deletions scan_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,23 @@


# Get all objects in an S3 bucket that have not been previously scanned
def get_objects(s3_client, s3_bucket_name):
def get_objects(s3_client, s3_bucket_name, limit):

s3_object_list = []

s3_list_objects_result = {"IsTruncated": True}
while s3_list_objects_result["IsTruncated"]:
print(f"Update: Objects to be scanned = {len(s3_object_list)}")
s3_list_objects_config = {"Bucket": s3_bucket_name}
continuation_token = s3_list_objects_result.get("NextContinuationToken")
if continuation_token:
s3_list_objects_config["ContinuationToken"] = continuation_token
s3_list_objects_result = s3_client.list_objects_v2(**s3_list_objects_config)
if "Contents" not in s3_list_objects_result:
break
if limit:
if len(s3_object_list) >= limit:
break
for key in s3_list_objects_result["Contents"]:
key_name = key["Key"]
# Don't include objects that have been scanned
Expand Down Expand Up @@ -85,27 +89,29 @@ def format_s3_event(s3_bucket_name, key_name):
return s3_event


def main(lambda_function_name, s3_bucket_name, limit):
def main(lambda_function_name, s3_bucket_name, profile, limit):
# Verify the lambda exists
lambda_client = boto3.client("lambda")
sess = boto3.session.Session(profile_name=profile)
lambda_client = sess.client("lambda")
try:
lambda_client.get_function(FunctionName=lambda_function_name)
except Exception:
print("Lambda Function '{}' does not exist".format(lambda_function_name))
sys.exit(1)

# Verify the S3 bucket exists
s3_client = boto3.client("s3")
s3_client = sess.client("s3")
try:
s3_client.head_bucket(Bucket=s3_bucket_name)
except Exception:
print("S3 Bucket '{}' does not exist".format(s3_bucket_name))
sys.exit(1)

# Scan the objects in the bucket
s3_object_list = get_objects(s3_client, s3_bucket_name)
s3_object_list = get_objects(s3_client, s3_bucket_name, limit)
if limit:
s3_object_list = s3_object_list[: min(limit, len(s3_object_list))]
print(f"Final: Objects to be scanned: {len(s3_object_list)}")
for key_name in s3_object_list:
scan_object(lambda_client, lambda_function_name, s3_bucket_name, key_name)

Expand All @@ -120,7 +126,10 @@ def main(lambda_function_name, s3_bucket_name, limit):
parser.add_argument(
"--s3-bucket-name", required=True, help="The name of the S3 bucket to scan"
)
parser.add_argument(
"--profile", help="AWS profile to use", default="default"
)
parser.add_argument("--limit", type=int, help="The number of records to limit to")
args = parser.parse_args()

main(args.lambda_function_name, args.s3_bucket_name, args.limit)
main(args.lambda_function_name, args.s3_bucket_name, args.profile, args.limit)