bluesentry · andybkay · Jan 15, 2021 · Jan 15, 2021 · Jan 25, 2021 · Jan 25, 2021
diff --git a/.github/workflows/deploy_lambda.yml b/.github/workflows/deploy_lambda.yml
@@ -0,0 +1,31 @@
+on:
+  schedule:
+    - cron: '0 0 1 * *'
+
+jobs:
+  # This workflow contains a single job called "build"
+  deploy:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@master
+
+      - name: Build lambda function
+        run: make archive
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: eu-west-1
+
+      - name: Deploy to lambda
+        run: |
+          aws lambda update-function-code --function-name bucket-antivirus-update --zip-file fileb://build/lambda.zip
+          aws lambda update-function-code --function-name bucket-antivirus-function --zip-file fileb://build/lambda.zip
+
+      - name: Notify of update
+        run: aws sns publish --topic-arn arn:aws:sns:eu-west-1:224019267248:TechnicalAlerts-Group --message "bucket-antivirus-function/update has been updated" --subject "eu-west-1 Antivirus Update"
diff --git a/.gitignore b/.gitignore
@@ -117,3 +117,6 @@ tmp/
 
 # EICAR Files
 *eicar*
+
+# Rebase from upstream fork
+rebase-upstream.sh
diff --git a/Dockerfile b/Dockerfile
@@ -21,12 +21,17 @@ RUN rm -rf /root/.cache/pip
 
 # Download libraries we need to run in lambda
 WORKDIR /tmp
-RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2
+RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 libprelude gnutls libtasn1 lib64nettle nettle libtool-ltdl
 RUN rpm2cpio clamav-0*.rpm | cpio -idmv
 RUN rpm2cpio clamav-lib*.rpm | cpio -idmv
 RUN rpm2cpio clamav-update*.rpm | cpio -idmv
 RUN rpm2cpio json-c*.rpm | cpio -idmv
 RUN rpm2cpio pcre*.rpm | cpio -idmv
+RUN rpm2cpio gnutls*.rpm | cpio -idmv
+RUN rpm2cpio nettle*.rpm | cpio -idmv
+RUN rpm2cpio libprelude*.rpm | cpio -idmv
+RUN rpm2cpio libtasn1*.rpm | cpio -idmv
+RUN rpm2cpio libtool-ltdl*.rpm | cpio -idmv
 
 # Copy over the binaries and libraries
 RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/

diff --git a/README.md b/README.md
@@ -333,17 +333,21 @@ It should be in the format provided below:
 ## Manually Scanning Buckets
 
 You may want to scan all the objects in a bucket that have not previously been scanned or were created
-prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility.
+prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility. If the 
+function and bucket live in a separate account, you can profile a profile name to be used (please ensure
+the region is configured correctly for the profile)
 
 ```sh
 pip install boto3
-scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan>
+scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan> --profile=<aws-profile-name> --limit=<number-of-files-to-scan>
 ```
 
 This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function
 asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally,
 the script uses the same environment variables you'd use in your lambda so you can configure them similarly.
 
+Providing --limit can be used for testing purposes, this will limit the number of files scanned before triggering Lambda.
+
 ## Testing
 
 There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of

diff --git a/scan_bucket.py b/scan_bucket.py
@@ -26,19 +26,23 @@
 
 
 # Get all objects in an S3 bucket that have not been previously scanned
-def get_objects(s3_client, s3_bucket_name):
+def get_objects(s3_client, s3_bucket_name, limit):
 
     s3_object_list = []
 
     s3_list_objects_result = {"IsTruncated": True}
     while s3_list_objects_result["IsTruncated"]:
+        print(f"Update: Objects to be scanned = {len(s3_object_list)}")
         s3_list_objects_config = {"Bucket": s3_bucket_name}
         continuation_token = s3_list_objects_result.get("NextContinuationToken")
         if continuation_token:
             s3_list_objects_config["ContinuationToken"] = continuation_token
         s3_list_objects_result = s3_client.list_objects_v2(**s3_list_objects_config)
         if "Contents" not in s3_list_objects_result:
             break
+        if limit:
+            if len(s3_object_list) >= limit:
+                break
         for key in s3_list_objects_result["Contents"]:
             key_name = key["Key"]
             # Don't include objects that have been scanned
@@ -85,27 +89,29 @@ def format_s3_event(s3_bucket_name, key_name):
     return s3_event
 
 
-def main(lambda_function_name, s3_bucket_name, limit):
+def main(lambda_function_name, s3_bucket_name, profile, limit):
     # Verify the lambda exists
-    lambda_client = boto3.client("lambda")
+    sess = boto3.session.Session(profile_name=profile)
+    lambda_client = sess.client("lambda")
     try:
         lambda_client.get_function(FunctionName=lambda_function_name)
     except Exception:
         print("Lambda Function '{}' does not exist".format(lambda_function_name))
         sys.exit(1)
 
     # Verify the S3 bucket exists
-    s3_client = boto3.client("s3")
+    s3_client = sess.client("s3")
     try:
         s3_client.head_bucket(Bucket=s3_bucket_name)
     except Exception:
         print("S3 Bucket '{}' does not exist".format(s3_bucket_name))
         sys.exit(1)
 
     # Scan the objects in the bucket
-    s3_object_list = get_objects(s3_client, s3_bucket_name)
+    s3_object_list = get_objects(s3_client, s3_bucket_name, limit)
     if limit:
         s3_object_list = s3_object_list[: min(limit, len(s3_object_list))]
+        print(f"Final: Objects to be scanned: {len(s3_object_list)}")
     for key_name in s3_object_list:
         scan_object(lambda_client, lambda_function_name, s3_bucket_name, key_name)
 
@@ -120,7 +126,10 @@ def main(lambda_function_name, s3_bucket_name, limit):
     parser.add_argument(
         "--s3-bucket-name", required=True, help="The name of the S3 bucket to scan"
     )
+    parser.add_argument(
+        "--profile", help="AWS profile to use", default="default"
+    )
     parser.add_argument("--limit", type=int, help="The number of records to limit to")
     args = parser.parse_args()
 
-    main(args.lambda_function_name, args.s3_bucket_name, args.limit)
+    main(args.lambda_function_name, args.s3_bucket_name, args.profile, args.limit)