From 3a26d19652d44cc5958a492efb5bddf818d42da3 Mon Sep 17 00:00:00 2001 From: Allen Lee Date: Tue, 10 Dec 2024 23:39:07 -0700 Subject: [PATCH] refactor: clean up code - remove dead imports - delete_ror_affiliations seems unnecessary, added a --force option to update_ror_affiliations instead - return dict object for update instead of tuple values in update_ror_affiliations.lookup_ror_id - fix invalid test_metrics method call ref --- .../commands/delete_ror_affiliations.py | 31 --------- .../commands/update_ror_affiliations.py | 68 +++++++++---------- .../home/management/commands/cache_metrics.py | 3 +- django/home/metrics.py | 63 ++++++++--------- django/home/tests/test_metrics.py | 2 +- 5 files changed, 66 insertions(+), 101 deletions(-) delete mode 100644 django/core/management/commands/delete_ror_affiliations.py diff --git a/django/core/management/commands/delete_ror_affiliations.py b/django/core/management/commands/delete_ror_affiliations.py deleted file mode 100644 index 804168f32..000000000 --- a/django/core/management/commands/delete_ror_affiliations.py +++ /dev/null @@ -1,31 +0,0 @@ -from django.core.management.base import BaseCommand -from core.models import MemberProfile -import logging - -logger = logging.getLogger(__name__) - -class Command(BaseCommand): - help = """Remove coordinates from each affiliation in all member profiles""" - - def handle(self, *args, **options): - all_member_profiles = MemberProfile.objects.all() - - for profile in all_member_profiles: - updated = False - affiliations = profile.affiliations - - - for affiliation in affiliations: - - if "coordinates" in affiliation: - del affiliation["coordinates"] - #del affiliation["links"] - #del affiliation["types"] - updated = True - - if updated: - profile.affiliations = affiliations - profile.save() - - - diff --git a/django/core/management/commands/update_ror_affiliations.py b/django/core/management/commands/update_ror_affiliations.py index a2fcdd59a..6a036d232 100644 --- a/django/core/management/commands/update_ror_affiliations.py +++ b/django/core/management/commands/update_ror_affiliations.py @@ -1,23 +1,17 @@ -from django.core.management.base import BaseCommand -from core.models import MemberProfile -import requests -import time -import logging -from pathlib import Path -import concurrent.futures -import json -import http.server -import socketserver import argparse +import logging +import requests +from django.core.management.base import BaseCommand from core.models import MemberProfile + logger = logging.getLogger(__name__) class Command(BaseCommand): - help = """something""" + help = """Update all MemberProfile affiliations with lat/lon locations pulled from the ROR API""" def __init__(self): super().__init__() @@ -30,43 +24,49 @@ def lookup_ror_id(self, ror_id): response.raise_for_status() data = response.json() print(".", end="", flush=True) - return data["name"], data["addresses"][0]["lat"], data["addresses"][0]["lng"], data["links"][0], data["types"][0] - except requests.RequestException as e: + return { + "name": data["name"], + "coordinates": { + "lat": data["addresses"][0]["lat"], + "lon": data["addresses"][0]["lng"], + }, + "link": data["links"][0], + "type": data["types"][0], + } + except requests.RequestException: print("E", end="", flush=True) - return None, None, None, None, None - + return {} + def add_arguments(self, parser): + parser.add_argument( + "--force", + action=argparse.BooleanOptionalAction, + default=False, + dest="force", + help="Force update of all affiliations with geo lat/lon data, name, link, and type", + ) def handle(self, *args, **options): # TODO: look up every memberprofile.affiliations that has a ror_id and add the lat, lon to each json record - #add sessions to make faster - #get links and types as well - #make metrics.py function to get list of institution data + # add sessions to make faster + # get links and types as well + # make metrics.py function to get list of institution data + force = options["force"] all_member_profiles = MemberProfile.objects.all() for profile in all_member_profiles: updated = False - affiliations = profile.affiliations - - - for affiliation in affiliations: + for affiliation in profile.affiliations: if "ror_id" not in affiliation: continue - - if "coordinates" not in affiliation: - name, lat, lon, links, types = self.lookup_ror_id(affiliation["ror_id"]) - if lat is not None and lon is not None: - affiliation["name"] = name - affiliation["coordinates"] = {"lat": lat, "lon": lon} - affiliation["link"] = links - affiliation["type"] = types + if "coordinates" not in affiliation or force: + updated_affiliation_data = self.lookup_ror_id(affiliation["ror_id"]) + if updated_affiliation_data: + affiliation.update(**updated_affiliation_data) updated = True if updated: - profile.affiliations = affiliations profile.save() - - self.session.close() - + self.session.close() diff --git a/django/home/management/commands/cache_metrics.py b/django/home/management/commands/cache_metrics.py index 865ea9668..eb6db252b 100644 --- a/django/home/management/commands/cache_metrics.py +++ b/django/home/management/commands/cache_metrics.py @@ -9,8 +9,7 @@ class Command(BaseCommand): - help = """Dump active user emails for mailchimp import with filtered by is_active=True - and optional date_joined --after=yyyy-mm-dd""" + help = """generate metrics and cache in redis""" def add_arguments(self, parser): """ diff --git a/django/home/metrics.py b/django/home/metrics.py index 1b62ac670..bc09a9ee2 100644 --- a/django/home/metrics.py +++ b/django/home/metrics.py @@ -12,15 +12,15 @@ class Metrics: DEFAULT_METRICS_CACHE_TIMEOUT = 60 * 60 * 24 * 7 # 1 week MINIMUM_CATEGORY_COUNT = 10 # the threshold at which we group all other nominal values into an "other" category - def get_all_data(self): #update this function + def get_all_data(self, force=False): data = cache.get(Metrics.REDIS_METRICS_KEY) - if not data: + if not data or force: return self.cache_all() return data def cache_all(self): """ - caches metrics data in redis + recompute and cache metrics data in redis """ all_data = self.generate_metrics_data() cache.set( @@ -72,11 +72,13 @@ def generate_metrics_data(self): release_metrics, release_start_year = self.get_release_metrics_timeseries() institution_metrics = self.get_member_affiliation_data() min_start_year = min(members_start_year, model_start_year, release_start_year) - all_metrics = dict( - startYear=min_start_year, **member_metrics, **model_metrics, **release_metrics, - institution_metrics=institution_metrics + return dict( + startYear=min_start_year, + institutionData=institution_metrics, + **member_metrics, + **model_metrics, + **release_metrics, ) - return all_metrics def get_members_by_year_timeseries(self): """ @@ -157,7 +159,6 @@ def get_model_metrics_timeseries(self): .order_by("year") ) - release_downloads = list( CodebaseReleaseDownload.objects.values(year=F("date_created__year")) .annotate(total=Count("year")) @@ -203,15 +204,13 @@ def get_model_metrics_timeseries(self): ], }, "releasesByOs": self.get_release_os_timeseries(min_start_year), - "releasesByPlatform": self.get_release_platform_timeseries( - min_start_year - ), + "releasesByPlatform": self.get_release_platform_timeseries(min_start_year), "releasesByLanguage": self.get_release_programming_language_timeseries( min_start_year ), } return model_metrics, min_start_year - + def get_release_metrics_timeseries(self): total_releases_by_year = list( @@ -255,12 +254,11 @@ def get_release_metrics_timeseries(self): ), } ], - } + }, } - + return release_metrics, min_start_year - def get_member_affiliation_data(self): sql_query = """ SELECT @@ -274,7 +272,7 @@ def get_member_affiliation_data(self): AND affiliation->'coordinates' ?& array['lat', 'lon'] GROUP BY institution_name, latitude, longitude ORDER BY total DESC; - """ + """ with connection.cursor() as cursor: cursor.execute(sql_query) @@ -356,27 +354,28 @@ def get_release_programming_language_timeseries(self, start_year): .order_by("year") ) - #temporary fix to combine netlogo and logo + # temporary fix to combine netlogo and logo combined_metrics = defaultdict(lambda: defaultdict(int)) for metric in programming_language_metrics: - language = metric['programming_language_names'] - if language in ['NetLogo', 'Logo']: - language = 'NetLogo' - - year = metric['year'] - combined_metrics[year][language] += metric['count'] - + language = metric["programming_language_names"] + if language in ["NetLogo", "Logo"]: + language = "NetLogo" + + year = metric["year"] + combined_metrics[year][language] += metric["count"] + flattened_metrics = [] for year, languages in combined_metrics.items(): for language, count in languages.items(): - flattened_metrics.append({ - 'programming_language_names': language, - 'year': year, - 'count': count - }) - + flattened_metrics.append( + { + "programming_language_names": language, + "year": year, + "count": count, + } + ) return { "title": "Models by Language", @@ -400,9 +399,7 @@ def to_timeseries(self, queryset_data, start_year): data.append(queryset_dict[year] if year in queryset_dict else 0) return data - def convert_release_metrics_to_timeseries( - self, metrics, start_year, category=None - ): + def convert_release_metrics_to_timeseries(self, metrics, start_year, category=None): """ Converts Django queryset metrics to a list of timeseries dicts e.g., [{"name": "OS Counts", "data": [0, 37, 43, 14, 95, ...]}, ...] diff --git a/django/home/tests/test_metrics.py b/django/home/tests/test_metrics.py index 3529069b9..75d7297a3 100644 --- a/django/home/tests/test_metrics.py +++ b/django/home/tests/test_metrics.py @@ -34,7 +34,7 @@ def test_convert_timeseries(self): m = Metrics() min_start_year = 2012 OS_NAMES = ("linux", "other", "platform_independent", "windows", "macos") - highcharts_timeseries = m.convert_codebase_metrics_to_timeseries( + highcharts_timeseries = m.convert_release_metrics_to_timeseries( self.os_metrics, start_year=min_start_year ) for chart_data in highcharts_timeseries: