diff --git a/django/core/mixins.py b/django/core/mixins.py index 19ed9a49b..d362bd658 100644 --- a/django/core/mixins.py +++ b/django/core/mixins.py @@ -262,7 +262,6 @@ def _validate_content_object(self, instance): "spam_moderation", "is_marked_spam", "get_absolute_url", - "title", ] for field in required_fields: if not hasattr(instance, field): @@ -270,6 +269,12 @@ def _validate_content_object(self, instance): f"instance {instance} does not have a {field} attribute" ) + # Ensure either 'title' () or 'username' (for MemberProfile) is present + if not (hasattr(instance, "title") or hasattr(instance, "username")): + raise ValueError( + f"instance {instance} must have either a 'title' or a 'username' attribute" + ) + @action(detail=True, methods=["post"], permission_classes=[ModeratorPermissions]) def mark_spam(self, request, **kwargs): instance = self.get_object() diff --git a/django/core/models.py b/django/core/models.py index ff9ab76f4..7b0e5ca12 100644 --- a/django/core/models.py +++ b/django/core/models.py @@ -1,7 +1,7 @@ -from datetime import timedelta -from enum import Enum import logging import pathlib +from datetime import timedelta +from enum import Enum from allauth.account.models import EmailAddress from django import forms @@ -358,7 +358,7 @@ def find_users_with_email(self, candidate_email, exclude_user=None): @add_to_comses_permission_whitelist @register_snippet -class MemberProfile(index.Indexed, ClusterableModel): +class MemberProfile(index.Indexed, ModeratedContent, ClusterableModel): """ Contains additional comses.net information, possibly linked to a CoMSES Member / site account """ diff --git a/django/core/views.py b/django/core/views.py index 0695269c4..50739d196 100644 --- a/django/core/views.py +++ b/django/core/views.py @@ -8,8 +8,8 @@ from django.contrib.auth.decorators import login_required from django.contrib.auth.mixins import LoginRequiredMixin from django.contrib.auth.models import User -from django.core.files.images import ImageFile from django.core.exceptions import PermissionDenied +from django.core.files.images import ImageFile from django.http import ( Http404, HttpResponseBadRequest, @@ -17,38 +17,25 @@ HttpResponseServerError, ) from django.shortcuts import get_object_or_404, redirect, render -from django.views.generic import DetailView, TemplateView, RedirectView from django.urls import reverse -from rest_framework import ( - viewsets, - generics, - parsers, - mixins, - filters, -) +from django.views.generic import DetailView, RedirectView, TemplateView +from rest_framework import filters, generics, mixins, parsers, viewsets +from rest_framework.decorators import action from rest_framework.exceptions import ( - PermissionDenied as DrfPermissionDenied, + APIException, NotAuthenticated, NotFound, - APIException, + PermissionDenied as DrfPermissionDenied, ) -from rest_framework.decorators import action from rest_framework.filters import OrderingFilter -from rest_framework.permissions import IsAuthenticated, AllowAny +from rest_framework.permissions import AllowAny, IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView, exception_handler from taggit.models import Tag from wagtail.images.models import Image from library.models import Codebase -from .models import Event, FollowUser, Job, MemberProfile -from .serializers import ( - EventSerializer, - JobSerializer, - MemberProfileSerializer, - RelatedMemberProfileSerializer, - TagSerializer, -) +from .discourse import build_discourse_url from .mixins import ( CommonViewSetMixin, HtmlListModelMixin, @@ -56,16 +43,22 @@ PermissionRequiredByHttpMethodMixin, SpamCatcherViewSetMixin, ) +from .models import Event, FollowUser, Job, MemberProfile from .pagination import SmallResultSetPagination from .permissions import ObjectPermissions, ViewRestrictedObjectPermissions -from .discourse import build_discourse_url +from .serializers import ( + EventSerializer, + JobSerializer, + MemberProfileSerializer, + RelatedMemberProfileSerializer, + TagSerializer, +) +from .utils import parse_date, parse_datetime from .view_helpers import ( add_user_retrieve_perms, get_search_queryset, retrieve_with_perms, ) -from .utils import parse_date, parse_datetime - logger = logging.getLogger(__name__) @@ -316,7 +309,9 @@ def filter_queryset(self, request, queryset, view): return get_search_queryset(qs, queryset, tags=tags) -class MemberProfileViewSet(CommonViewSetMixin, HtmlNoDeleteViewSet): +class MemberProfileViewSet( + SpamCatcherViewSetMixin, CommonViewSetMixin, HtmlNoDeleteViewSet +): lookup_field = "user__pk" lookup_url_kwarg = "pk" queryset = MemberProfile.objects.public().with_tags() diff --git a/django/curator/serializers.py b/django/curator/serializers.py index 778fd3cf8..2a0278947 100644 --- a/django/curator/serializers.py +++ b/django/curator/serializers.py @@ -1,6 +1,6 @@ from rest_framework import serializers -from core.models import Event, Job, SpamModeration +from core.models import Event, Job, MemberProfile, SpamModeration from library.models import Codebase @@ -56,6 +56,23 @@ class Meta: ] +class MinimalMemberProfileSerializer(serializers.ModelSerializer): + class Meta: + model = MemberProfile + fields = [ + "id", + "username", + "name", + "email", + "bio", + "research_interests", + "affiliations_string", + "degrees", + "personal_url", + "professional_url", + ] + + class SpamUpdateSerializer(serializers.Serializer): id = serializers.IntegerField() is_spam = serializers.BooleanField() diff --git a/django/curator/tests/test_llm_spam_moderation.py b/django/curator/tests/test_llm_spam_moderation_api_endpoints.py similarity index 78% rename from django/curator/tests/test_llm_spam_moderation.py rename to django/curator/tests/test_llm_spam_moderation_api_endpoints.py index f385d2003..b8552ba24 100644 --- a/django/curator/tests/test_llm_spam_moderation.py +++ b/django/curator/tests/test_llm_spam_moderation_api_endpoints.py @@ -5,8 +5,9 @@ from rest_framework import status from rest_framework.test import APIClient -from core.models import Event, Job, SpamModeration -from core.tests.base import BaseModelTestCase, EventFactory, JobFactory +from core.models import Event, Job, MemberProfile, SpamModeration +from core.tests.base import BaseModelTestCase, EventFactory, JobFactory, \ + UserFactory from library.models import Codebase from library.tests.base import CodebaseFactory @@ -42,14 +43,17 @@ def setUp(self): title="Test Codebase", description="Codebase Description" ) - # Create SpamModeration objects - self.job_spam = SpamModeration.objects.create( + self.user_factory = UserFactory() + self.spammy_user = self.user_factory.create(username="scamlikely") + + # Create SpamModeration objects (for MemberProfile the SpamModeration will be created automatically when user is created) + self.job_spam_moderation = SpamModeration.objects.create( content_object=self.job, status=SpamModeration.Status.SCHEDULED_FOR_CHECK ) - self.event_spam = SpamModeration.objects.create( + self.event_spam_moderation = SpamModeration.objects.create( content_object=self.event, status=SpamModeration.Status.SCHEDULED_FOR_CHECK ) - self.codebase_spam = SpamModeration.objects.create( + self.codebase_spam_moderation = SpamModeration.objects.create( content_object=self.codebase, status=SpamModeration.Status.SCHEDULED_FOR_CHECK, ) @@ -101,13 +105,16 @@ def test_get_latest_spam_batch(self): self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertEqual(len(data), 3) # We expect 3 items in the batch + self.assertEqual( + len(data), 5 + ) # We expect 5 items in the batch (Event, Job, Codebase, MemberProfile) + MemberProfile of the test_user from super().setUp() # Check if all content types are present content_types = [item["contentType"] for item in data] self.assertIn("job", content_types) self.assertIn("event", content_types) self.assertIn("codebase", content_types) + self.assertIn("memberprofile", content_types) # Check structure of a job item job_item = next(item for item in data if item["contentType"] == "job") @@ -163,6 +170,40 @@ def test_update_spam_moderation_success(self): # Check if related content object was updated self.assertTrue(job.is_marked_spam) + def test_update_spam_moderation_success_memberprofile(self): + self.client.credentials(HTTP_X_API_KEY=self.api_key) + + mp = MemberProfile.objects.get(id=self.spammy_user.member_profile.id) + + data = { + "id": mp.spam_moderation.id, + "is_spam": True, + "spam_indicators": ["indicator1", "indicator2"], + "reasoning": "Test reasoning", + "confidence": 0.9, + } + + response = self.client.post("/api/spam/update/", data, format="json") + self.assertEqual(response.status_code, status.HTTP_200_OK) + + # Check if SpamModeration object was updated + mp.refresh_from_db() + self.assertIsNotNone(mp.spam_moderation) + self.assertEqual(mp.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY) + self.assertTrue(mp.is_marked_spam) + self.assertEqual(mp.spam_moderation.detection_method, "LLM") + self.assertEqual( + mp.spam_moderation.detection_details["spam_indicators"], + ["indicator1", "indicator2"], + ) + self.assertEqual( + mp.spam_moderation.detection_details["reasoning"], "Test reasoning" + ) + self.assertEqual(mp.spam_moderation.detection_details["confidence"], 0.9) + + # Check if related content object was updated + self.assertTrue(mp.is_marked_spam) + def test_update_spam_moderation_not_spam(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) @@ -194,7 +235,7 @@ def test_update_spam_moderation_invalid_data(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "id": self.codebase_spam.id, + "id": self.codebase_spam_moderation.id, # Missing required 'is_spam' field } @@ -205,7 +246,7 @@ def test_update_spam_moderation_partial_update(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "id": self.codebase_spam.id, + "id": self.codebase_spam_moderation.id, "is_spam": True, # Only providing partial data } diff --git a/django/curator/views.py b/django/curator/views.py index e3dbb650a..787bed922 100644 --- a/django/curator/views.py +++ b/django/curator/views.py @@ -25,6 +25,7 @@ MinimalCodebaseSerializer, MinimalEventSerializer, MinimalJobSerializer, + MinimalMemberProfileSerializer, SpamModerationSerializer, SpamUpdateSerializer, ) @@ -105,6 +106,8 @@ def get_latest_spam_batch(request): content_serializer = MinimalEventSerializer(content_object) elif content_type == "codebase": content_serializer = MinimalCodebaseSerializer(content_object) + elif content_type == "memberprofile": + content_serializer = MinimalMemberProfileSerializer(content_object) else: continue diff --git a/django/home/signals.py b/django/home/signals.py index 8868e6826..62ea643e0 100644 --- a/django/home/signals.py +++ b/django/home/signals.py @@ -1,15 +1,16 @@ import logging -import shortuuid +import shortuuid from django.conf import settings from django.contrib.auth.models import User +from django.contrib.contenttypes.models import ContentType from django.contrib.sites.models import Site from django.db.models.signals import post_save from django.dispatch import receiver from wagtail.models import Site as WagtailSite from core.discourse import create_discourse_user -from core.models import MemberProfile, EXCLUDED_USERNAMES +from core.models import EXCLUDED_USERNAMES, MemberProfile, SpamModeration logger = logging.getLogger(__name__) @@ -34,6 +35,27 @@ def sync_discourse_user(user: User): return success +def create_spam_moderation(mp: MemberProfile): + content_type = ContentType.objects.get_for_model(type(mp)) + default_status = SpamModeration.Status.SCHEDULED_FOR_CHECK + + default_spam_moderation = { + "status": default_status, + "detection_method": "", + "detection_details": "", + } + + sm, created = SpamModeration.objects.update_or_create( + content_type=content_type, + object_id=mp.id, + defaults=default_spam_moderation, + ) + + # update the related object + mp.spam_moderation = sm + mp.save() + + @receiver(post_save, sender=User, dispatch_uid="member_profile_sync") def on_user_save(sender, instance: User, created, **kwargs): """ @@ -42,7 +64,9 @@ def on_user_save(sender, instance: User, created, **kwargs): if instance.username in EXCLUDED_USERNAMES: return if created: - sync_member_profile(instance) + mp = sync_member_profile(instance) + if mp: + create_spam_moderation(mp) if instance.email: # sync with discourse # to test discourse synchronization locally eliminate the DEPLOY_ENVIRONMENT check