diff --git a/bookwyrm/forms/forms.py b/bookwyrm/forms/forms.py index ea60937500..3d555f308d 100644 --- a/bookwyrm/forms/forms.py +++ b/bookwyrm/forms/forms.py @@ -25,6 +25,10 @@ class ImportForm(forms.Form): csv_file = forms.FileField() +class ImportUserForm(forms.Form): + archive_file = forms.FileField() + + class ShelfForm(CustomForm): class Meta: model = models.Shelf diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py index 6ce50f160b..8e92872f25 100644 --- a/bookwyrm/importers/__init__.py +++ b/bookwyrm/importers/__init__.py @@ -1,6 +1,7 @@ """ import classes """ from .importer import Importer +from .bookwyrm_import import BookwyrmImporter from .calibre_import import CalibreImporter from .goodreads_import import GoodreadsImporter from .librarything_import import LibrarythingImporter diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py new file mode 100644 index 0000000000..070b6a5db2 --- /dev/null +++ b/bookwyrm/importers/bookwyrm_import.py @@ -0,0 +1,18 @@ +"""Import data from Bookwyrm export files""" +from bookwyrm import settings +from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob + +class BookwyrmImporter: + """Import a Bookwyrm User export JSON file. + This is kind of a combination of an importer and a connector. + """ + + def process_import(self, user, archive_file, settings): + """import user data from a Bookwyrm export file""" + + required = [k for k in settings if settings.get(k) == "on"] + + job = BookwyrmImportJob.objects.create( + user=user, archive_file=archive_file, required=required + ) + return job diff --git a/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py b/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py new file mode 100644 index 0000000000..6d2ce6635d --- /dev/null +++ b/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py @@ -0,0 +1,71 @@ +# Generated by Django 3.2.19 on 2023-08-31 22:57 + +from django.conf import settings +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0178_auto_20230328_2132'), + ] + + operations = [ + migrations.CreateModel( + name='ParentJob', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('task_id', models.UUIDField(blank=True, null=True, unique=True)), + ('created_date', models.DateTimeField(default=django.utils.timezone.now)), + ('updated_date', models.DateTimeField(default=django.utils.timezone.now)), + ('complete', models.BooleanField(default=False)), + ('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='BookwyrmExportJob', + fields=[ + ('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')), + ('export_data', models.FileField(null=True, upload_to='')), + ], + options={ + 'abstract': False, + }, + bases=('bookwyrm.parentjob',), + ), + migrations.CreateModel( + name='BookwyrmImportJob', + fields=[ + ('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')), + ('archive_file', models.FileField(blank=True, null=True, upload_to='')), + ('import_data', models.JSONField(null=True)), + ('required', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=50), blank=True, size=None)), + ], + options={ + 'abstract': False, + }, + bases=('bookwyrm.parentjob',), + ), + migrations.CreateModel( + name='ChildJob', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('task_id', models.UUIDField(blank=True, null=True, unique=True)), + ('created_date', models.DateTimeField(default=django.utils.timezone.now)), + ('updated_date', models.DateTimeField(default=django.utils.timezone.now)), + ('complete', models.BooleanField(default=False)), + ('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)), + ('parent_job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='child_jobs', to='bookwyrm.parentjob')), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/bookwyrm/migrations/0182_merge_20230905_2240.py b/bookwyrm/migrations/0182_merge_20230905_2240.py new file mode 100644 index 0000000000..abcf54863e --- /dev/null +++ b/bookwyrm/migrations/0182_merge_20230905_2240.py @@ -0,0 +1,14 @@ +# Generated by Django 3.2.19 on 2023-09-05 22:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob'), + ('bookwyrm', '0181_merge_20230806_2302'), + ] + + operations = [ + ] diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 7b779190b6..c2e5308cca 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -26,6 +26,7 @@ from .group import Group, GroupMember, GroupMemberInvitation from .import_job import ImportJob, ImportItem +from .bookwyrm_import_job import BookwyrmImportJob from .site import SiteSettings, Theme, SiteInvite from .site import PasswordReset, InviteRequest diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py new file mode 100644 index 0000000000..c69294fb4c --- /dev/null +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -0,0 +1,222 @@ +import logging + +from django.db.models import FileField +from django.db.models import Q +from django.core.serializers.json import DjangoJSONEncoder +from django.core.files.base import ContentFile + +from bookwyrm import models +from bookwyrm.settings import DOMAIN +from bookwyrm.tasks import app, IMPORTS +from bookwyrm.models.job import ParentJob, ParentTask, SubTask, create_child_job +from uuid import uuid4 +from bookwyrm.utils.tar import BookwyrmTarFile + +logger = logging.getLogger(__name__) + +class BookwyrmExportJob(ParentJob): + """entry for a specific request to export a bookwyrm user""" + export_data = FileField(null=True) + + def start_job(self): + """Start the job""" + start_export_task.delay(job_id=self.id, no_children=True) + + return self + +@app.task(queue=IMPORTS, base=ParentTask) +def start_export_task(**kwargs): + """trigger the child tasks for each row""" + job = BookwyrmExportJob.objects.get(id=kwargs["job_id"]) + + # don't start the job if it was stopped from the UI + if job.complete: + return + + # This is where ChildJobs get made + job.export_data = ContentFile(b"", str(uuid4())) + + json_data = json_export(job.user) + tar_export(json_data, job.user, job.export_data) + + job.save(update_fields=["export_data"]) + +def tar_export(json_data: str, user, f): + f.open("wb") + with BookwyrmTarFile.open(mode="w:gz", fileobj=f) as tar: + tar.write_bytes(json_data.encode("utf-8")) + + # Add avatar image if present + if getattr(user, "avatar", False): + tar.add_image(user.avatar, filename="avatar") + + editions, books = get_books_for_user(user) + for book in editions: + tar.add_image(book.cover) + + f.close() + +def json_export(user): + """Generate an export for a user""" + # user + exported_user = {} + vals = [ + "username", + "name", + "summary", + "manually_approves_followers", + "hide_follows", + "show_goal", + "show_suggested_users", + "discoverable", + "preferred_timezone", + "default_post_privacy", + ] + for k in vals: + exported_user[k] = getattr(user, k) + + if getattr(user, "avatar", False): + exported_user["avatar"] = f'https://{DOMAIN}{getattr(user, "avatar").url}' + + # reading goals + reading_goals = models.AnnualGoal.objects.filter(user=user).distinct() + goals_list = [] + try: + for goal in reading_goals: + goals_list.append( + {"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} + ) + except Exception: + pass + + try: + readthroughs = models.ReadThrough.objects.filter( + user=user + ).distinct().values() + readthroughs = list(readthroughs) + except Exception as e: + readthroughs = [] + + # books + editions, books = get_books_for_user(user) + final_books = [] + + for book in books.values(): + edition = editions.filter(id=book["id"]) + book["edition"] = edition.values()[0] + # authors + book["authors"] = list(edition.first().authors.all().values()) + # readthroughs + book_readthroughs = models.ReadThrough.objects.filter( + user=user, book=book["id"] + ).distinct().values() + book["readthroughs"] = list(book_readthroughs) + # shelves + shelf_books = models.ShelfBook.objects.filter( + user=user, book=book['id'] + ).distinct() + shelves_from_books = models.Shelf.objects.filter( + shelfbook__in=shelf_books, user=user + ) + + book["shelves"] = list(shelves_from_books.values()) + book["shelf_books"] = {} + + for shelf in shelves_from_books: + shelf_contents = models.ShelfBook.objects.filter( + user=user, shelf=shelf + ).distinct() + + book["shelf_books"][shelf.identifier] = list(shelf_contents.values()) + + # book lists + book_lists = models.List.objects.filter( + books__in=[book['id']], user=user + ).distinct() + book["lists"] = list(book_lists.values()) + book["list_items"] = {} + for blist in book_lists: + list_items = models.ListItem.objects.filter(book_list=blist).distinct() + book["list_items"][blist.name] = list(list_items.values()) + + # reviews + reviews = models.Review.objects.filter( + user=user, book=book["id"] + ).distinct() + + book["reviews"] = list(reviews.values()) + + # comments + comments = models.Comment.objects.filter( + user=user, book=book["id"] + ).distinct() + + book["comments"] = list(comments.values()) + logger.error("FINAL COMMENTS") + logger.error(book["comments"]) + + # quotes + quotes = models.Quotation.objects.filter( + user=user, book=book["id"] + ).distinct() + # quote_statuses = models.Status.objects.filter( + # id__in=quotes, user=kwargs["user"] + # ).distinct() + + book["quotes"] = list(quotes.values()) + + logger.error("FINAL QUOTES") + logger.error(book["quotes"]) + + # append everything + final_books.append(book) + + # saved book lists + saved_lists = models.List.objects.filter( + id__in=user.saved_lists.all() + ).distinct() + saved_lists = [l.remote_id for l in saved_lists] + + # follows + follows = models.UserFollows.objects.filter( + user_subject=user + ).distinct() + following = models.User.objects.filter( + userfollows_user_object__in=follows + ).distinct() + follows = [f.remote_id for f in following] + + # blocks + blocks = models.UserBlocks.objects.filter(user_subject=user).distinct() + blocking = models.User.objects.filter( + userblocks_user_object__in=blocks + ).distinct() + + blocks = [b.remote_id for b in blocking] + + data = { + "user": exported_user, + "goals": goals_list, + "books": final_books, + "saved_lists": saved_lists, + "follows": follows, + "blocked_users": blocks, + } + + return DjangoJSONEncoder().encode(data) + +def get_books_for_user(user): + """Get all the books and editions related to a user + :returns: tuple of editions, books + """ + all_books = models.Edition.viewer_aware_objects(user) + editions = all_books.filter( + Q(shelves__user=user) + | Q(readthrough__user=user) + | Q(review__user=user) + | Q(list__user=user) + | Q(comment__user=user) + | Q(quotation__user=user) + ).distinct() + books = models.Book.objects.filter(id__in=editions).distinct() + return editions, books diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py new file mode 100644 index 0000000000..68d05a1eb5 --- /dev/null +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -0,0 +1,507 @@ +from functools import reduce +import json +import operator + +from django.db.models import FileField, JSONField, CharField +from django.db.models import Q +from django.utils.dateparse import parse_datetime + +from bookwyrm import activitypub +from bookwyrm import models +from django.contrib.postgres.fields import ArrayField as DjangoArrayField +from bookwyrm.tasks import app, IMPORTS +from bookwyrm.models.job import ParentJob, ParentTask, ChildJob, SubTask, create_child_job +from bookwyrm.utils.tar import BookwyrmTarFile +import json + +class BookwyrmImportJob(ParentJob): + """entry for a specific request for importing a bookwyrm user backup""" + archive_file = FileField(null=True, blank=True) + import_data = JSONField(null=True) + required = DjangoArrayField(CharField(max_length=50, blank=True), blank=True) + + def start_job(self): + """Start the job""" + start_import_task.delay(job_id=self.id, no_children=True) + +@app.task(queue=IMPORTS, base=ParentTask) +def start_import_task(**kwargs): + """trigger the child import tasks for each user data""" + job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) + archive_file = job.archive_file + + # don't start the job if it was stopped from the UI + if job.complete: + return + + archive_file.open("rb") + with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: + job.import_data = json.loads(tar.read("archive.json").decode("utf-8")) + + if "include_user_profile" in job.required: + update_user_profile(job.user, tar, job.import_data.get("user")) + if "include_user_settings" in job.required: + update_user_settings(job.user, job.import_data.get("user")) + if "include_goals" in job.required: + update_goals(job.user, job.import_data.get("goals")) + if "include_saved_lists" in job.required: + upsert_saved_lists(job.user, job.import_data.get("saved_lists")) + if "include_follows" in job.required: + upsert_follows(job.user, job.import_data.get("follows")) + if "include_blocks" in job.required: + upsert_user_blocks(job.user, job.import_data.get("blocked_users")) + + process_books(job, tar) + + job.save() + archive_file.close() + + +def process_books(job, tar): + """process user import data related to books""" + + # create the books. We need to merge Book and Edition instances + # and also check whether these books already exist in the DB + books = job.import_data.get("books") + + for data in books: + book = get_or_create_edition(data, tar) + + if "include_shelves" in job.required: + upsert_shelves(book, job.user, data) + + if "include_readthroughs" in job.required: + upsert_readthroughs( + data.get("readthroughs"), job.user, book.id + ) + + if "include_reviews" in job.required: + get_or_create_statuses( + job.user, models.Review, data.get("reviews"), book.id + ) + + if "include_comments" in job.required: + get_or_create_statuses( + job.user, models.Comment, data.get("comments"), book.id + ) + + if "include_quotes" in job.required: + get_or_create_statuses( + job.user, models.Quotation, data.get("quotes"), book.id + ) + if "include_lists" in job.required: + upsert_lists( + job.user, data.get("lists"), data.get("list_items"), book.id + ) + + +def get_or_create_edition(book_data, tar): + """Take a JSON string of book and edition data, + find or create the edition in the database and + return an edition instance""" + + cover_path = book_data.get("cover", None) # we use this further down but need to assign a var before cleaning + + clean_book = clean_values(book_data) + book = clean_book.copy() # don't mutate the original book data + + # prefer edition values only if they are not null + edition = clean_values(book["edition"]) + for key in edition.keys(): + if key not in book.keys() or ( + key in book.keys() and (edition[key] not in [None, ""]) + ): + book[key] = edition[key] + + existing = find_existing(models.Edition, book, None) + if existing: + return existing + + # the book is not in the local database, so we have to do this the hard way + local_authors = get_or_create_authors(book["authors"]) + + # get rid of everything that's not strictly in a Book + # or is many-to-many so can't be set directly + associated_values = [ + "edition", + "authors", + "readthroughs", + "shelves", + "shelf_books", + "lists", + "list_items", + "reviews", + "comments", + "quotes", + ] + + for val in associated_values: + del book[val] + + # now we can save the book as an Edition + new_book = models.Edition.objects.create(**book) + new_book.authors.set(local_authors) # now we can add authors with set() + + # get cover from original book_data because we lost it in clean_values + if cover_path: + tar.write_image_to_file(cover_path, new_book.cover) + + # NOTE: clean_values removes "last_edited_by" because it's a user ID from the old database + # if this is required, bookwyrm_export_job will need to bring in the user who edited it. + + # create parent + work = models.Work.objects.create(title=book["title"]) + work.authors.set(local_authors) + new_book.parent_work = work + + new_book.save(broadcast=False) + return new_book + + +def clean_values(data): + """clean values we don't want when creating new instances""" + + values = [ + "id", + "pk", + "remote_id", + "cover", + "preview_image", + "last_edited_by", + "last_edited_by_id", + "user", + "book_list", + "shelf_book", + "parent_work_id" + ] + + common = data.keys() & values + new_data = data + for val in common: + del new_data[val] + return new_data + + +def find_existing(cls, data, user): + """Given a book or author, find any existing model instances""" + + identifiers = [ + "openlibrary_key", + "inventaire_id", + "librarything_key", + "goodreads_key", + "asin", + "isfdb", + "isbn_10", + "isbn_13", + "oclc_number", + "origin_id", + "viaf", + "wikipedia_link", + "isni", + "gutenberg_id" + ] + + match_fields = [] + for i in identifiers: + if data.get(i) not in [None, ""]: + match_fields.append({i: data.get(i)}) + + if len(match_fields) > 0: + match = cls.objects.filter( + reduce(operator.or_, (Q(**f) for f in match_fields)) + ) + return match.first() + return None + +def get_or_create_authors(data): + """Take a JSON string of authors find or create the authors + in the database and return a list of author instances""" + + authors = [] + for author in data: + clean = clean_values(author) + existing = find_existing(models.Author, clean, None) + if existing: + authors.append(existing) + else: + new = models.Author.objects.create(**clean) + authors.append(new) + return authors + + +def upsert_readthroughs(data, user, book_id): + """Take a JSON string of readthroughs, find or create the + instances in the database and return a list of saved instances""" + + for rt in data: + start_date = ( + parse_datetime(rt["start_date"]) + if rt["start_date"] is not None + else None + ) + finish_date = ( + parse_datetime(rt["finish_date"]) + if rt["finish_date"] is not None + else None + ) + stopped_date = ( + parse_datetime(rt["stopped_date"]) + if rt["stopped_date"] is not None + else None + ) + readthrough = { + "user": user, + "book": models.Edition.objects.get(id=book_id), + "progress": rt["progress"], + "progress_mode": rt["progress_mode"], + "start_date": start_date, + "finish_date": finish_date, + "stopped_date": stopped_date, + "is_active": rt["is_active"], + } + + existing = models.ReadThrough.objects.filter(**readthrough).exists() + if not existing: + models.ReadThrough.objects.create(**readthrough) + + +def get_or_create_statuses(user, cls, data, book_id): + """Take a JSON string of a status and + find or create the instances in the database""" + + for book_status in data: + + keys = [ + "content", + "raw_content", + "content_warning", + "privacy", + "sensitive", + "published_date", + "reading_status", + "name", + "rating", + "quote", + "raw_quote", + "progress", + "progress_mode", + "position", + "position_mode" + ] + common = book_status.keys() & keys + status = {k: book_status[k] for k in common} + status["published_date"] = parse_datetime(book_status["published_date"]) + if "rating" in common: + status["rating"] = float(book_status["rating"]) + book = models.Edition.objects.get(id=book_id) + exists = cls.objects.filter(**status, book=book, user=user).exists() + if not exists: + cls.objects.create(**status, book=book, user=user) + + +def upsert_lists(user, lists, items, book_id): + """Take a list and ListItems as JSON and create DB entries if they don't already exist""" + + book = models.Edition.objects.get(id=book_id) + + for lst in lists: + book_list = models.List.objects.filter( + name=lst["name"], user=user + ).first() + if not book_list: + book_list = models.List.objects.create( + user=user, + name=lst["name"], + description=lst["description"], + curation=lst["curation"], + privacy=lst["privacy"], + ) + + # If the list exists but the ListItem doesn't don't try to add it + # with the same order as an existing item + count = models.ListItem.objects.filter(book_list=book_list).count() + + for i in items[lst["name"]]: + if not models.ListItem.objects.filter( + book=book, book_list=book_list, user=user + ).exists(): + models.ListItem.objects.create( + book=book, + book_list=book_list, + user=user, + notes=i["notes"], + order=i["order"] + count, + ) + +def upsert_shelves(book, user, book_data): + """Take shelf and ShelfBooks JSON objects and create + DB entries if they don't already exist""" + + shelves = book_data["shelves"] + + for shelf in shelves: + book_shelf = models.Shelf.objects.filter( + name=shelf["name"], user=user + ).first() + if not book_shelf: + book_shelf = models.Shelf.objects.create( + name=shelf["name"], + user=user, + identifier=shelf["identifier"], + description=shelf["description"], + editable=shelf["editable"], + privacy=shelf["privacy"], + ) + + for shelfbook in book_data["shelf_books"][book_shelf.identifier]: + + shelved_date = parse_datetime(shelfbook["shelved_date"]) + + if not models.ShelfBook.objects.filter( + book=book, shelf=book_shelf, user=user + ).exists(): + models.ShelfBook.objects.create( + book=book, + shelf=book_shelf, + user=user, + shelved_date=shelved_date, + ) + +def update_user_profile(user, tar, data): + """update the user's profile from import data""" + name = data.get("name") + username = data.get("username").split("@")[0] + user.name = name if name else username + user.summary = data.get("summary") + user.save(update_fields=["name", "summary"]) + + if data.get("avatar") is not None: + avatar_filename = next(filter(lambda n: n.startswith("avatar"), tar.getnames())) + tar.write_image_to_file(avatar_filename, user.avatar) + +def update_user_settings(user, data): + """update the user's settings from import data""" + + update_fields = [ + "manually_approves_followers", + "hide_follows", + "show_goal", + "show_suggested_users", + "discoverable", + "preferred_timezone", + "default_post_privacy", + ] + + for field in update_fields: + setattr(user, field, data[field]) + user.save(update_fields=update_fields) + + +@app.task(queue=IMPORTS, base=SubTask) +def update_user_settings_task(job_id, child_id): + """wrapper task for user's settings import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return update_user_settings(parent_job.user, parent_job.import_data.get("user")) + + +def update_goals(user, data): + """update the user's goals from import data""" + + for goal in data: + # edit the existing goal if there is one instead of making a new one + existing = models.AnnualGoal.objects.filter( + year=goal["year"], user=user + ).first() + if existing: + for k in goal.keys(): + setattr(existing, k, goal[k]) + existing.save() + else: + goal["user"] = user + models.AnnualGoal.objects.create(**goal) + + +@app.task(queue=IMPORTS, base=SubTask) +def update_goals_task(job_id, child_id): + """wrapper task for user's goals import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return update_goals(parent_job.user, parent_job.import_data.get("goals")) + + +def upsert_saved_lists(user, values): + """Take a list of remote ids and add as saved lists""" + + for remote_id in values: + book_list = activitypub.resolve_remote_id(remote_id, models.List) + if book_list: + user.saved_lists.add(book_list) + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_saved_lists_task(job_id, child_id): + """wrapper task for user's saved lists import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_saved_lists( + parent_job.user, parent_job.import_data.get("saved_lists") + ) + + +def upsert_follows(user, values): + """Take a list of remote ids and add as follows""" + + for remote_id in values: + followee = activitypub.resolve_remote_id(remote_id, models.User) + if followee: + ( + follow_request, + created, + ) = models.UserFollowRequest.objects.get_or_create( + user_subject=user, + user_object=followee, + ) + + if not created: + # this request probably failed to connect with the remote + # that means we should save to trigger a re-broadcast + follow_request.save() + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_follows_task(job_id, child_id): + """wrapper task for user's follows import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_follows(parent_job.user, parent_job.import_data.get("follows")) + + +def upsert_user_blocks(user, user_ids): + """block users""" + + for user_id in user_ids: + user_object = activitypub.resolve_remote_id(user_id, models.User) + if user_object: + exists = models.UserBlocks.objects.filter( + user_subject=user, user_object=user_object + ).exists() + if not exists: + models.UserBlocks.objects.create( + user_subject=user, user_object=user_object + ) + # remove the blocked users's lists from the groups + models.List.remove_from_group(user, user_object) + # remove the blocked user from all blocker's owned groups + models.GroupMember.remove(user, user_object) + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_user_blocks_task(job_id, child_id): + """wrapper task for user's blocks import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_user_blocks( + parent_job.user, parent_job.import_data.get("blocked_users") + ) diff --git a/bookwyrm/models/job.py b/bookwyrm/models/job.py new file mode 100644 index 0000000000..9c97b57b48 --- /dev/null +++ b/bookwyrm/models/job.py @@ -0,0 +1,277 @@ +"""Everything needed for Celery to multi-thread complex tasks.""" + +from django.db import models +from django.db import transaction +from django.utils.translation import gettext_lazy as _ +from django.utils import timezone +from bookwyrm.models.user import User + +from bookwyrm.tasks import app + +class Job(models.Model): + """Abstract model to store the state of a Task.""" + + class Status(models.TextChoices): + """Possible job states.""" + + PENDING = "pending", _("Pending") + ACTIVE = "active", _("Active") + COMPLETE = "complete", _("Complete") + STOPPED = "stopped", _("Stopped") + + task_id = models.UUIDField(unique=True, null=True, blank=True) + + created_date = models.DateTimeField(default=timezone.now) + updated_date = models.DateTimeField(default=timezone.now) + complete = models.BooleanField(default=False) + status = models.CharField( + max_length=50, choices=Status.choices, default=Status.PENDING, null=True + ) + + class Meta: + abstract = True + + def complete_job(self): + """Report that the job has completed""" + if self.complete: + return + + self.status = self.Status.COMPLETE + self.complete = True + self.updated_date = timezone.now() + + self.save(update_fields=["status", "complete", "updated_date"]) + + def stop_job(self): + """Stop the job""" + if self.complete: + return + + self.__terminate_job() + + self.status = self.Status.STOPPED + self.complete = True + self.updated_date = timezone.now() + + self.save(update_fields=["status", "complete", "updated_date"]) + + def set_status(self, status): + """Set job status""" + if self.complete: + return + + if self.status == status: + return + + if status == self.Status.COMPLETE: + self.complete_job() + return + + if status == self.Status.STOPPED: + self.stop_job() + return + + self.updated_date = timezone.now() + self.status = status + + self.save(update_fields=["status", "updated_date"]) + + def __terminate_job(self): + """Tell workers to ignore and not execute this task.""" + app.control.revoke(self.task_id, terminate=True) + +class ParentJob(Job): + """Store the state of a Task which can spawn many :model:`ChildJob`s to spread + resource load. + + Intended to be sub-classed if necessary via proxy or + multi-table inheritance. + Extends :model:`Job`. + """ + + user = models.ForeignKey(User, on_delete=models.CASCADE) + + def complete_job(self): + """Report that the job has completed and stop pending + children. Extend. + """ + super().complete_job() + self.__terminate_pending_child_jobs() + + def notify_child_job_complete(self): + """let the job know when the items get work done""" + if self.complete: + return + + self.updated_date = timezone.now() + self.save(update_fields=["updated_date"]) + + if not self.complete and self.has_completed: + self.complete_job() + + def __terminate_job(self): + """Tell workers to ignore and not execute this task + & pending child tasks. Extend. + """ + super().__terminate_job() + self.__terminate_pending_child_jobs() + + def __terminate_pending_child_jobs(self): + """Tell workers to ignore and not execute any pending child tasks.""" + tasks = self.pending_child_jobs.filter(task_id__isnull=False).values_list( + "task_id", flat=True + ) + app.control.revoke(list(tasks)) + + for task in self.pending_child_jobs: + task.update(status=self.Status.STOPPED) + + @property + def has_completed(self): + """has this job finished""" + return not self.pending_child_jobs.exists() + + @property + def pending_child_jobs(self): + """items that haven't been processed yet""" + return self.child_jobs.filter(complete=False) + +class ChildJob(Job): + """ Stores the state of a Task for the related :model:`ParentJob`. + + Intended to be sub-classed if necessary via proxy or + multi-table inheritance. + Extends :model:`Job`. + """ + + parent_job = models.ForeignKey( + ParentJob, on_delete=models.CASCADE, related_name="child_jobs" + ) + + def set_status(self, status): + """Set job and parent_job status. Extend.""" + super().set_status(status) + + if status == self.Status.ACTIVE and self.parent_job.status == self.Status.PENDING: + self.parent_job.set_status(self.Status.ACTIVE) + + def complete_job(self): + """Report to parent_job that the job has completed. Extend.""" + super().complete_job() + self.parent_job.notify_child_job_complete() + +class ParentTask(app.Task): + """Used with ParentJob, Abstract Tasks execute code at specific points in + a Task's lifecycle, applying to all Tasks with the same 'base'. + + All status & ParentJob.task_id assignment is managed here for you. + Usage e.g. @app.task(base=ParentTask) + """ + + def before_start(self, task_id, args, kwargs): + """Handler called before the task starts. Override. + + Prepare ParentJob before the task starts. + + Arguments: + task_id (str): Unique id of the task to execute. + args (Tuple): Original arguments for the task to execute. + kwargs (Dict): Original keyword arguments for the task to execute. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + no_children (bool): If 'True' this is the only Task expected to run + for the given ParentJob. + + Returns: + None: The return value of this handler is ignored. + """ + job = ParentJob.objects.get(id=kwargs["job_id"]) + job.task_id = task_id + job.save(update_fields=["task_id"]) + + if kwargs["no_children"]: + job.set_status(ChildJob.Status.ACTIVE) + + def on_success(self, retval, task_id, args, kwargs): + """Run by the worker if the task executes successfully. Override. + + Update ParentJob on Task complete. + + Arguments: + retval (Any): The return value of the task. + task_id (str): Unique id of the executed task. + args (Tuple): Original arguments for the executed task. + kwargs (Dict): Original keyword arguments for the executed task. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + no_children (bool): If 'True' this is the only Task expected to run + for the given ParentJob. + + Returns: + None: The return value of this handler is ignored. + """ + + if kwargs["no_children"]: + job = ParentJob.objects.get(id=kwargs["job_id"]) + job.complete_job() + +class SubTask(app.Task): + """Used with ChildJob, Abstract Tasks execute code at specific points in + a Task's lifecycle, applying to all Tasks with the same 'base'. + + All status & ChildJob.task_id assignment is managed here for you. + Usage e.g. @app.task(base=SubTask) + """ + + def before_start(self, task_id, args, kwargs): + """Handler called before the task starts. Override. + + Prepare ChildJob before the task starts. + + Arguments: + task_id (str): Unique id of the task to execute. + args (Tuple): Original arguments for the task to execute. + kwargs (Dict): Original keyword arguments for the task to execute. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + child_id (int): Unique 'id' of the ChildJob. + + Returns: + None: The return value of this handler is ignored. + """ + child_job = ChildJob.objects.get(id=kwargs["child_id"]) + child_job.task_id = task_id + child_job.save(update_fields=["task_id"]) + child_job.set_status(ChildJob.Status.ACTIVE) + + def on_success(self, retval, task_id, args, kwargs): + """Run by the worker if the task executes successfully. Override. + + Notify ChildJob of task completion. + + Arguments: + retval (Any): The return value of the task. + task_id (str): Unique id of the executed task. + args (Tuple): Original arguments for the executed task. + kwargs (Dict): Original keyword arguments for the executed task. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + child_id (int): Unique 'id' of the ChildJob. + + Returns: + None: The return value of this handler is ignored. + """ + subtask = ChildJob.objects.get(id=kwargs["child_id"]) + subtask.complete_job() + +@transaction.atomic +def create_child_job(parent_job, task_callback): + """Utility method for creating a ChildJob and running a task to avoid DB race conditions""" + child_job = ChildJob.objects.create(parent_job=parent_job) + transaction.on_commit(lambda: task_callback.delay(job_id=parent_job.id, child_id=child_job.id)) + + return child_job diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html new file mode 100644 index 0000000000..86e99f6578 --- /dev/null +++ b/bookwyrm/templates/import/import_user.html @@ -0,0 +1,163 @@ +{% extends 'layout.html' %} +{% load i18n %} +{% load humanize %} + +{% block title %}{% trans "Import User" %}{% endblock %} + +{% block content %} +
{% blocktrans %}Currently you are allowed to import one user every {{ user_import_limit_reset }} days.{% endblocktrans %}
+{% blocktrans %}You have {{ allowed_imports }} left.{% endblocktrans %}
++ {% if recent_avg_hours %} + {% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %} + On average, recent imports have taken {{ hours }} hours. + {% endblocktrans %} + {% else %} + {% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %} + On average, recent imports have taken {{ minutes }} minutes. + {% endblocktrans %} + {% endif %} +
++ {% trans "Date Created" %} + | ++ {% trans "Last Updated" %} + | ++ {% trans "Status" %} + | +|
---|---|---|---|
+ {% trans "No recent imports" %} + | +|||
+ {{ job.created_date }} + |
+ {{ job.updated_date }} | ++ + {% if job.status %} + {{ job.status }} + {{ job.status_display }} + {% elif job.complete %} + {% trans "Complete" %} + {% else %} + {% trans "Active" %} + {% endif %} + + | +
+ {% trans "Your exported archive file will include all user data for import into another Bookwyrm server" %} +
++
+ ++ {% trans "User export files will show 'complete' once ready. This may take a little while. Click the link to download your file." %} +
++ {% trans "Date Created" %} + | ++ {% trans "Last Updated" %} + | ++ {% trans "Status" %} + | +|
---|---|---|---|
+ {% trans "No recent imports" %} + | +|||
+ {% if job.complete %}
+
+ {% else %}
+ {{ job.created_date }} + {% endif %} + |
+ {{ job.updated_date }} | ++ + {% if job.status %} + {{ job.status }} + {{ job.status_display }} + {% elif job.complete %} + {% trans "Complete" %} + {% else %} + {% trans "Active" %} + {% endif %} + + | +
- {% trans "Your export will include all the books on your shelves, books you have reviewed, and books with reading activity." %}
+ {% trans "Your CSV export file will include all the books on your shelves, books you have reviewed, and books with reading activity.
Use this to import into a service like Goodreads." %}