-
-
Notifications
You must be signed in to change notification settings - Fork 267
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement self-contained archives to import and export entire users b…
…etween instances (#38) Co-authored-by: Daniel Burgess <developerdannymate@gmail.com> Co-authored-by: Hugh Rundle <hugh@hughrundle.net> Co-authored-by: dannymate <dannymate@noreply.codeberg.org> Co-authored-by: hughrun <hughrun@noreply.codeberg.org> Reviewed-on: https://codeberg.org/GuildAlpha/bookwyrm/pulls/38 Co-authored-by: CSDUMMI <csdummi.misquality@simplelogin.co> Co-committed-by: CSDUMMI <csdummi.misquality@simplelogin.co>
- Loading branch information
1 parent
bc870a3
commit 26ecea0
Showing
24 changed files
with
2,614 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
"""Import data from Bookwyrm export files""" | ||
from bookwyrm import settings | ||
from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob | ||
|
||
class BookwyrmImporter: | ||
"""Import a Bookwyrm User export JSON file. | ||
This is kind of a combination of an importer and a connector. | ||
""" | ||
|
||
def process_import(self, user, archive_file, settings): | ||
"""import user data from a Bookwyrm export file""" | ||
|
||
required = [k for k in settings if settings.get(k) == "on"] | ||
|
||
job = BookwyrmImportJob.objects.create( | ||
user=user, archive_file=archive_file, required=required | ||
) | ||
return job |
71 changes: 71 additions & 0 deletions
71
bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Generated by Django 3.2.19 on 2023-08-31 22:57 | ||
|
||
from django.conf import settings | ||
import django.contrib.postgres.fields | ||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
import django.utils.timezone | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('bookwyrm', '0178_auto_20230328_2132'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='ParentJob', | ||
fields=[ | ||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), | ||
('task_id', models.UUIDField(blank=True, null=True, unique=True)), | ||
('created_date', models.DateTimeField(default=django.utils.timezone.now)), | ||
('updated_date', models.DateTimeField(default=django.utils.timezone.now)), | ||
('complete', models.BooleanField(default=False)), | ||
('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)), | ||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
), | ||
migrations.CreateModel( | ||
name='BookwyrmExportJob', | ||
fields=[ | ||
('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')), | ||
('export_data', models.FileField(null=True, upload_to='')), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
bases=('bookwyrm.parentjob',), | ||
), | ||
migrations.CreateModel( | ||
name='BookwyrmImportJob', | ||
fields=[ | ||
('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')), | ||
('archive_file', models.FileField(blank=True, null=True, upload_to='')), | ||
('import_data', models.JSONField(null=True)), | ||
('required', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=50), blank=True, size=None)), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
bases=('bookwyrm.parentjob',), | ||
), | ||
migrations.CreateModel( | ||
name='ChildJob', | ||
fields=[ | ||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), | ||
('task_id', models.UUIDField(blank=True, null=True, unique=True)), | ||
('created_date', models.DateTimeField(default=django.utils.timezone.now)), | ||
('updated_date', models.DateTimeField(default=django.utils.timezone.now)), | ||
('complete', models.BooleanField(default=False)), | ||
('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)), | ||
('parent_job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='child_jobs', to='bookwyrm.parentjob')), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Generated by Django 3.2.19 on 2023-09-05 22:40 | ||
|
||
from django.db import migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('bookwyrm', '0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob'), | ||
('bookwyrm', '0181_merge_20230806_2302'), | ||
] | ||
|
||
operations = [ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
import logging | ||
|
||
from django.db.models import FileField | ||
from django.db.models import Q | ||
from django.core.serializers.json import DjangoJSONEncoder | ||
from django.core.files.base import ContentFile | ||
|
||
from bookwyrm import models | ||
from bookwyrm.settings import DOMAIN | ||
from bookwyrm.tasks import app, IMPORTS | ||
from bookwyrm.models.job import ParentJob, ParentTask, SubTask, create_child_job | ||
from uuid import uuid4 | ||
from bookwyrm.utils.tar import BookwyrmTarFile | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
class BookwyrmExportJob(ParentJob): | ||
"""entry for a specific request to export a bookwyrm user""" | ||
export_data = FileField(null=True) | ||
|
||
def start_job(self): | ||
"""Start the job""" | ||
start_export_task.delay(job_id=self.id, no_children=True) | ||
|
||
return self | ||
|
||
@app.task(queue=IMPORTS, base=ParentTask) | ||
def start_export_task(**kwargs): | ||
"""trigger the child tasks for each row""" | ||
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"]) | ||
|
||
# don't start the job if it was stopped from the UI | ||
if job.complete: | ||
return | ||
|
||
# This is where ChildJobs get made | ||
job.export_data = ContentFile(b"", str(uuid4())) | ||
|
||
json_data = json_export(job.user) | ||
tar_export(json_data, job.user, job.export_data) | ||
|
||
job.save(update_fields=["export_data"]) | ||
|
||
def tar_export(json_data: str, user, f): | ||
f.open("wb") | ||
with BookwyrmTarFile.open(mode="w:gz", fileobj=f) as tar: | ||
tar.write_bytes(json_data.encode("utf-8")) | ||
|
||
# Add avatar image if present | ||
if getattr(user, "avatar", False): | ||
tar.add_image(user.avatar, filename="avatar") | ||
|
||
editions, books = get_books_for_user(user) | ||
for book in editions: | ||
tar.add_image(book.cover) | ||
|
||
f.close() | ||
|
||
def json_export(user): | ||
"""Generate an export for a user""" | ||
# user | ||
exported_user = {} | ||
vals = [ | ||
"username", | ||
"name", | ||
"summary", | ||
"manually_approves_followers", | ||
"hide_follows", | ||
"show_goal", | ||
"show_suggested_users", | ||
"discoverable", | ||
"preferred_timezone", | ||
"default_post_privacy", | ||
] | ||
for k in vals: | ||
exported_user[k] = getattr(user, k) | ||
|
||
if getattr(user, "avatar", False): | ||
exported_user["avatar"] = f'https://{DOMAIN}{getattr(user, "avatar").url}' | ||
|
||
# reading goals | ||
reading_goals = models.AnnualGoal.objects.filter(user=user).distinct() | ||
goals_list = [] | ||
try: | ||
for goal in reading_goals: | ||
goals_list.append( | ||
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} | ||
) | ||
except Exception: | ||
pass | ||
|
||
try: | ||
readthroughs = models.ReadThrough.objects.filter( | ||
user=user | ||
).distinct().values() | ||
readthroughs = list(readthroughs) | ||
except Exception as e: | ||
readthroughs = [] | ||
|
||
# books | ||
editions, books = get_books_for_user(user) | ||
final_books = [] | ||
|
||
for book in books.values(): | ||
edition = editions.filter(id=book["id"]) | ||
book["edition"] = edition.values()[0] | ||
# authors | ||
book["authors"] = list(edition.first().authors.all().values()) | ||
# readthroughs | ||
book_readthroughs = models.ReadThrough.objects.filter( | ||
user=user, book=book["id"] | ||
).distinct().values() | ||
book["readthroughs"] = list(book_readthroughs) | ||
# shelves | ||
shelf_books = models.ShelfBook.objects.filter( | ||
user=user, book=book['id'] | ||
).distinct() | ||
shelves_from_books = models.Shelf.objects.filter( | ||
shelfbook__in=shelf_books, user=user | ||
) | ||
|
||
book["shelves"] = list(shelves_from_books.values()) | ||
book["shelf_books"] = {} | ||
|
||
for shelf in shelves_from_books: | ||
shelf_contents = models.ShelfBook.objects.filter( | ||
user=user, shelf=shelf | ||
).distinct() | ||
|
||
book["shelf_books"][shelf.identifier] = list(shelf_contents.values()) | ||
|
||
# book lists | ||
book_lists = models.List.objects.filter( | ||
books__in=[book['id']], user=user | ||
).distinct() | ||
book["lists"] = list(book_lists.values()) | ||
book["list_items"] = {} | ||
for blist in book_lists: | ||
list_items = models.ListItem.objects.filter(book_list=blist).distinct() | ||
book["list_items"][blist.name] = list(list_items.values()) | ||
|
||
# reviews | ||
reviews = models.Review.objects.filter( | ||
user=user, book=book["id"] | ||
).distinct() | ||
|
||
book["reviews"] = list(reviews.values()) | ||
|
||
# comments | ||
comments = models.Comment.objects.filter( | ||
user=user, book=book["id"] | ||
).distinct() | ||
|
||
book["comments"] = list(comments.values()) | ||
logger.error("FINAL COMMENTS") | ||
logger.error(book["comments"]) | ||
|
||
# quotes | ||
quotes = models.Quotation.objects.filter( | ||
user=user, book=book["id"] | ||
).distinct() | ||
# quote_statuses = models.Status.objects.filter( | ||
# id__in=quotes, user=kwargs["user"] | ||
# ).distinct() | ||
|
||
book["quotes"] = list(quotes.values()) | ||
|
||
logger.error("FINAL QUOTES") | ||
logger.error(book["quotes"]) | ||
|
||
# append everything | ||
final_books.append(book) | ||
|
||
# saved book lists | ||
saved_lists = models.List.objects.filter( | ||
id__in=user.saved_lists.all() | ||
).distinct() | ||
saved_lists = [l.remote_id for l in saved_lists] | ||
|
||
# follows | ||
follows = models.UserFollows.objects.filter( | ||
user_subject=user | ||
).distinct() | ||
following = models.User.objects.filter( | ||
userfollows_user_object__in=follows | ||
).distinct() | ||
follows = [f.remote_id for f in following] | ||
|
||
# blocks | ||
blocks = models.UserBlocks.objects.filter(user_subject=user).distinct() | ||
blocking = models.User.objects.filter( | ||
userblocks_user_object__in=blocks | ||
).distinct() | ||
|
||
blocks = [b.remote_id for b in blocking] | ||
|
||
data = { | ||
"user": exported_user, | ||
"goals": goals_list, | ||
"books": final_books, | ||
"saved_lists": saved_lists, | ||
"follows": follows, | ||
"blocked_users": blocks, | ||
} | ||
|
||
return DjangoJSONEncoder().encode(data) | ||
|
||
def get_books_for_user(user): | ||
"""Get all the books and editions related to a user | ||
:returns: tuple of editions, books | ||
""" | ||
all_books = models.Edition.viewer_aware_objects(user) | ||
editions = all_books.filter( | ||
Q(shelves__user=user) | ||
| Q(readthrough__user=user) | ||
| Q(review__user=user) | ||
| Q(list__user=user) | ||
| Q(comment__user=user) | ||
| Q(quotation__user=user) | ||
).distinct() | ||
books = models.Book.objects.filter(id__in=editions).distinct() | ||
return editions, books |
Oops, something went wrong.