Skip to content

Commit

Permalink
refactor: clean up code
Browse files Browse the repository at this point in the history
- remove dead imports
- delete_ror_affiliations seems unnecessary, added a --force option to
  update_ror_affiliations instead
- return dict object for update instead of tuple values in
  update_ror_affiliations.lookup_ror_id
- fix invalid test_metrics method call ref
  • Loading branch information
alee committed Dec 11, 2024
1 parent 9fd7100 commit 3a26d19
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 101 deletions.
31 changes: 0 additions & 31 deletions django/core/management/commands/delete_ror_affiliations.py

This file was deleted.

68 changes: 34 additions & 34 deletions django/core/management/commands/update_ror_affiliations.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
from django.core.management.base import BaseCommand
from core.models import MemberProfile
import requests
import time
import logging
from pathlib import Path
import concurrent.futures
import json
import http.server
import socketserver
import argparse
import logging
import requests

from django.core.management.base import BaseCommand

from core.models import MemberProfile


logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = """something"""
help = """Update all MemberProfile affiliations with lat/lon locations pulled from the ROR API"""

def __init__(self):
super().__init__()
Expand All @@ -30,43 +24,49 @@ def lookup_ror_id(self, ror_id):
response.raise_for_status()
data = response.json()
print(".", end="", flush=True)
return data["name"], data["addresses"][0]["lat"], data["addresses"][0]["lng"], data["links"][0], data["types"][0]
except requests.RequestException as e:
return {
"name": data["name"],
"coordinates": {
"lat": data["addresses"][0]["lat"],
"lon": data["addresses"][0]["lng"],
},
"link": data["links"][0],
"type": data["types"][0],
}
except requests.RequestException:
print("E", end="", flush=True)
return None, None, None, None, None

return {}

def add_arguments(self, parser):
parser.add_argument(
"--force",
action=argparse.BooleanOptionalAction,
default=False,
dest="force",
help="Force update of all affiliations with geo lat/lon data, name, link, and type",
)

def handle(self, *args, **options):
# TODO: look up every memberprofile.affiliations that has a ror_id and add the lat, lon to each json record
#add sessions to make faster
#get links and types as well
#make metrics.py function to get list of institution data
# add sessions to make faster
# get links and types as well
# make metrics.py function to get list of institution data
force = options["force"]
all_member_profiles = MemberProfile.objects.all()

for profile in all_member_profiles:

updated = False
affiliations = profile.affiliations


for affiliation in affiliations:
for affiliation in profile.affiliations:
if "ror_id" not in affiliation:
continue

if "coordinates" not in affiliation:
name, lat, lon, links, types = self.lookup_ror_id(affiliation["ror_id"])
if lat is not None and lon is not None:
affiliation["name"] = name
affiliation["coordinates"] = {"lat": lat, "lon": lon}
affiliation["link"] = links
affiliation["type"] = types
if "coordinates" not in affiliation or force:
updated_affiliation_data = self.lookup_ror_id(affiliation["ror_id"])
if updated_affiliation_data:
affiliation.update(**updated_affiliation_data)
updated = True

if updated:
profile.affiliations = affiliations
profile.save()

self.session.close()


self.session.close()
3 changes: 1 addition & 2 deletions django/home/management/commands/cache_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@


class Command(BaseCommand):
help = """Dump active user emails for mailchimp import with filtered by is_active=True
and optional date_joined --after=yyyy-mm-dd"""
help = """generate metrics and cache in redis"""

def add_arguments(self, parser):
"""
Expand Down
63 changes: 30 additions & 33 deletions django/home/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ class Metrics:
DEFAULT_METRICS_CACHE_TIMEOUT = 60 * 60 * 24 * 7 # 1 week
MINIMUM_CATEGORY_COUNT = 10 # the threshold at which we group all other nominal values into an "other" category

def get_all_data(self): #update this function
def get_all_data(self, force=False):
data = cache.get(Metrics.REDIS_METRICS_KEY)
if not data:
if not data or force:
return self.cache_all()
return data

def cache_all(self):
"""
caches metrics data in redis
recompute and cache metrics data in redis
"""
all_data = self.generate_metrics_data()
cache.set(
Expand Down Expand Up @@ -72,11 +72,13 @@ def generate_metrics_data(self):
release_metrics, release_start_year = self.get_release_metrics_timeseries()
institution_metrics = self.get_member_affiliation_data()
min_start_year = min(members_start_year, model_start_year, release_start_year)
all_metrics = dict(
startYear=min_start_year, **member_metrics, **model_metrics, **release_metrics,
institution_metrics=institution_metrics
return dict(
startYear=min_start_year,
institutionData=institution_metrics,
**member_metrics,
**model_metrics,
**release_metrics,
)
return all_metrics

def get_members_by_year_timeseries(self):
"""
Expand Down Expand Up @@ -157,7 +159,6 @@ def get_model_metrics_timeseries(self):
.order_by("year")
)


release_downloads = list(
CodebaseReleaseDownload.objects.values(year=F("date_created__year"))
.annotate(total=Count("year"))
Expand Down Expand Up @@ -203,15 +204,13 @@ def get_model_metrics_timeseries(self):
],
},
"releasesByOs": self.get_release_os_timeseries(min_start_year),
"releasesByPlatform": self.get_release_platform_timeseries(
min_start_year
),
"releasesByPlatform": self.get_release_platform_timeseries(min_start_year),
"releasesByLanguage": self.get_release_programming_language_timeseries(
min_start_year
),
}
return model_metrics, min_start_year

def get_release_metrics_timeseries(self):

total_releases_by_year = list(
Expand Down Expand Up @@ -255,12 +254,11 @@ def get_release_metrics_timeseries(self):
),
}
],
}
},
}

return release_metrics, min_start_year


def get_member_affiliation_data(self):
sql_query = """
SELECT
Expand All @@ -274,7 +272,7 @@ def get_member_affiliation_data(self):
AND affiliation->'coordinates' ?& array['lat', 'lon']
GROUP BY institution_name, latitude, longitude
ORDER BY total DESC;
"""
"""

with connection.cursor() as cursor:
cursor.execute(sql_query)
Expand Down Expand Up @@ -356,27 +354,28 @@ def get_release_programming_language_timeseries(self, start_year):
.order_by("year")
)

#temporary fix to combine netlogo and logo
# temporary fix to combine netlogo and logo
combined_metrics = defaultdict(lambda: defaultdict(int))

for metric in programming_language_metrics:
language = metric['programming_language_names']
if language in ['NetLogo', 'Logo']:
language = 'NetLogo'
year = metric['year']
combined_metrics[year][language] += metric['count']
language = metric["programming_language_names"]
if language in ["NetLogo", "Logo"]:
language = "NetLogo"

year = metric["year"]
combined_metrics[year][language] += metric["count"]

flattened_metrics = []

for year, languages in combined_metrics.items():
for language, count in languages.items():
flattened_metrics.append({
'programming_language_names': language,
'year': year,
'count': count
})

flattened_metrics.append(
{
"programming_language_names": language,
"year": year,
"count": count,
}
)

return {
"title": "Models by Language",
Expand All @@ -400,9 +399,7 @@ def to_timeseries(self, queryset_data, start_year):
data.append(queryset_dict[year] if year in queryset_dict else 0)
return data

def convert_release_metrics_to_timeseries(
self, metrics, start_year, category=None
):
def convert_release_metrics_to_timeseries(self, metrics, start_year, category=None):
"""
Converts Django queryset metrics to a list of timeseries dicts e.g.,
[{"name": "OS Counts", "data": [0, 37, 43, 14, 95, ...]}, ...]
Expand Down
2 changes: 1 addition & 1 deletion django/home/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_convert_timeseries(self):
m = Metrics()
min_start_year = 2012
OS_NAMES = ("linux", "other", "platform_independent", "windows", "macos")
highcharts_timeseries = m.convert_codebase_metrics_to_timeseries(
highcharts_timeseries = m.convert_release_metrics_to_timeseries(
self.os_metrics, start_year=min_start_year
)
for chart_data in highcharts_timeseries:
Expand Down

0 comments on commit 3a26d19

Please sign in to comment.