Skip to content


clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
mrahbar committed Oct 7, 2024
1 parent a592078 commit 75839dc
Showing 1 changed file with 215 additions and 0 deletions.
215 changes: 215 additions & 0 deletions scripts/
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import subprocess
import argparse

parser = argparse.ArgumentParser(description='GCP Footprint Scanner')
parser.add_argument('-c', '--cloud', action='append', dest='clouds',
help='Specify which clouds to scan. Can be GCP, Github or AWS', required=False, default=[])

parser.add_argument('-a', '--aws-account', action='append', dest='aws_accounts', help='Specify AWS account', required=False)
parser.add_argument('-p', '--gcp-project', action='append', dest='gcp_projects', help='Specify GCP project', required=False)
parser.add_argument('-o', '--github-org', action='append', dest='github_orgs', help='Specify GitHub organizations', required=False)

parser.add_argument('-d', '--debug', action="store_true", dest='debug_logging', help='Enable debug logging', required=False)
parser.add_argument('-v', '--verbose', action="store_true", dest='verbose_logging', help='Enable verbose logging', required=False)
args = parser.parse_args()

gcp_totals = {
"Cloud Functions": 0,
"Cloud Run Services": 0,
"App Engine Services": 0,
"Compute Instances": 0,
"GKE Clusters": 0,
"Notebook Instances": 0,
"Redis Instances": 0,
"AlloyDB Instances": 0,
"CloudSQL Instances": 0,
"BigTable Instances": 0,
"Dataflow Jobs": 0,
"BigQuery Datasets": 0,
"BigTable Tables": 0,
"GCS Buckets": 0

github_totals = {
"Repositories": 0,

def filter_line(line):
if "DeprecationWarning:" in line or "import pipes" in line:
return True
return False

def count_lines(output):
splitlines = output.splitlines()
return len([l for l in splitlines if not filter_line(l)])

# Function to safely execute gcloud commands and ignore some errors
def safe_call(command):
output = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, text=True)
return output.strip()
except subprocess.CalledProcessError as e:
if "SERVICE_DISABLED" in e.output or "PERMISSION_DENIED" in e.output:
return ''

# Function to safely execute gcloud commands and ignore SERVICE_DISABLED errors
def count_call(command):
if args.debug_logging:
print(f"Executing command: {command}")
result = safe_call(command)
lines = count_lines(result)
if args.verbose_logging:
print(f"Got result: {result} with count {lines}")
return lines

def count_gcp_regional_resources(regions, project):
notebook_instances = 0
redis_instances = 0
dataflow_jobs = 0

for region in regions:
notebook_instances += count_call(
f"gcloud --quiet notebooks instances list --project={project} --location={region} --format=\"value(name)\"")
redis_instances += count_call(
f"gcloud --quiet redis instances list --project={project} --region={region} --format=\"value(name)\"")
dataflow_jobs += count_call(
f"gcloud --quiet dataflow jobs list --project={project} --region={region} --format=\"value(id)\"")

return {
"Notebook Instances": notebook_instances,
"Redis Instances": redis_instances,
"Dataflow Jobs": dataflow_jobs,

def report_counts(aca, counts):
print(f"Scanned resources for: {aca}")
for resource, count in counts.items():
print(f"{resource}: {count}")

def count_gcp_resources(regions, project):
# Count resources
cloud_functions = count_call(f"gcloud --quiet functions list --project={project} --format=\"value(name)\"")
cloud_run_services = count_call(
f"gcloud --quiet run services list --project={project} --platform managed --format=\"value(\"")
app_engine_services = count_call(f"gcloud --quiet app services list --project={project} --format=\"value(id)\"")
compute_instances = count_call(
f"gcloud --quiet compute instances list --project={project} --format=\"value(name)\"")
gke_clusters = count_call(f"gcloud --quiet container clusters list --project={project} --format=\"value(name)\"")

# regional resources
regional_counts = count_gcp_regional_resources(regions, project)
notebook_instances = regional_counts["Notebook Instances"]
redis_instances = regional_counts["Redis Instances"]
dataflow_jobs = regional_counts["Dataflow Jobs"]

alloydb_instances = count_call(
f"gcloud --quiet alloydb instances list --project={project} --format=\"value(name)\"")
cloudsql_instances = count_call(f"gcloud --quiet sql instances list --project={project} --format=\"value(name)\"")
bigquery_datasets = count_call(f"bq ls -q --project_id={project} --format=csv")
gcs_buckets = count_call(f"gsutil -q ls -p {project}")

bigtable_tables_output = safe_call(f"gcloud --quiet bigtable instances list --project={project} --format=\"value(name)\"")
bigtable_instances = count_lines(bigtable_tables_output)
bigtable_tables = 0
for instance in bigtable_tables_output.splitlines():
bigtable_tables += count_call(f"gcloud --quiet bigtable tables list --project={project} --instance={instance} --format=\"value(name)\"")

counts = {
"Cloud Functions": cloud_functions,
"Cloud Run Services": cloud_run_services,
"App Engine Services": app_engine_services,
"Compute Instances": compute_instances,
"GKE Clusters": gke_clusters,
"Notebook Instances": notebook_instances,
"Redis Instances": redis_instances,
"Dataflow Jobs": dataflow_jobs,
"AlloyDB Instances": alloydb_instances,
"CloudSQL Instances": cloudsql_instances,
"BigTable Instances": bigtable_instances,
"BigTable Tables": bigtable_tables,
"BigQuery Datasets": bigquery_datasets,
"GCS Buckets": gcs_buckets

report_counts(project, counts)
return counts

def count_github_resources(org):
repos = count_call(f"gh repo list {org} -L 10000 --no-archived")

counts = {
"Repositories": repos,
report_counts(org, counts)
return counts

def scan_gcp():
projects = args.gcp_projects
if not args.gcp_projects or len(projects) == 0:
projects_output = safe_call("gcloud --quiet projects list --format=\"value(projectId)\"")
projects = projects_output.splitlines()

if len(projects) == 0:
print("No projects found.")

regions_output = safe_call(
f"gcloud --quiet compute regions list --format=\"value(name)\" --project={projects[0]}")
regions = regions_output.splitlines()

print(f"Starting GCP resource scanning for {len(projects)} projects")
# Aggregate resources for each project
for project in projects:
counts = count_gcp_resources(regions, project)
for resource, count in counts.items():
gcp_totals[resource] += count

# Print totals
print(f"Total GCP resource count in {len(projects)} projects")
for resource, total in gcp_totals.items():
print(f"Total {resource}: {total}")

def scan_github():
orgs = args.github_orgs
if not args.github_orgs or len(orgs) == 0:
orgs_output = safe_call("gh org list")
orgs = orgs_output.splitlines()

if len(orgs) == 0:
print("No organizations found.")

print(f"Starting GitHub resource scanning for {len(orgs)} organizations")
# Aggregate resources for each project
for org in orgs:
counts = count_github_resources(org)
for resource, count in counts.items():
github_totals[resource] += count

# Print totals
print(f"Total Github resource count in {len(orgs)} organizations")
for resource, total in github_totals.items():
print(f"Total {resource}: {total}")

def run_cloud_module(cloud):
return any(cloud.lower() == cm.lower() for cm in args.clouds)

if run_cloud_module("GCP"):
if run_cloud_module("GitHub"):

0 comments on commit 75839dc

Please sign in to comment.