Skip to content

Commit

Permalink
Implement self-contained archives to import and export entire users b…
Browse files Browse the repository at this point in the history
…etween instances (bookwyrm-social#38)

Co-authored-by: Daniel Burgess <developerdannymate@gmail.com>
Co-authored-by: Hugh Rundle <hugh@hughrundle.net>
Co-authored-by: dannymate <dannymate@noreply.codeberg.org>
Co-authored-by: hughrun <hughrun@noreply.codeberg.org>
Reviewed-on: https://codeberg.org/GuildAlpha/bookwyrm/pulls/38
Co-authored-by: CSDUMMI <csdummi.misquality@simplelogin.co>
Co-committed-by: CSDUMMI <csdummi.misquality@simplelogin.co>
  • Loading branch information
5 people committed Sep 5, 2023
1 parent bc870a3 commit 26ecea0
Show file tree
Hide file tree
Showing 24 changed files with 2,614 additions and 7 deletions.
4 changes: 4 additions & 0 deletions bookwyrm/forms/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class ImportForm(forms.Form):
csv_file = forms.FileField()


class ImportUserForm(forms.Form):
archive_file = forms.FileField()


class ShelfForm(CustomForm):
class Meta:
model = models.Shelf
Expand Down
1 change: 1 addition & 0 deletions bookwyrm/importers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" import classes """

from .importer import Importer
from .bookwyrm_import import BookwyrmImporter
from .calibre_import import CalibreImporter
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter
Expand Down
18 changes: 18 additions & 0 deletions bookwyrm/importers/bookwyrm_import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Import data from Bookwyrm export files"""
from bookwyrm import settings
from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob

class BookwyrmImporter:
"""Import a Bookwyrm User export JSON file.
This is kind of a combination of an importer and a connector.
"""

def process_import(self, user, archive_file, settings):
"""import user data from a Bookwyrm export file"""

required = [k for k in settings if settings.get(k) == "on"]

job = BookwyrmImportJob.objects.create(
user=user, archive_file=archive_file, required=required
)
return job
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Generated by Django 3.2.19 on 2023-08-31 22:57

from django.conf import settings
import django.contrib.postgres.fields
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone


class Migration(migrations.Migration):

dependencies = [
('bookwyrm', '0178_auto_20230328_2132'),
]

operations = [
migrations.CreateModel(
name='ParentJob',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('task_id', models.UUIDField(blank=True, null=True, unique=True)),
('created_date', models.DateTimeField(default=django.utils.timezone.now)),
('updated_date', models.DateTimeField(default=django.utils.timezone.now)),
('complete', models.BooleanField(default=False)),
('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='BookwyrmExportJob',
fields=[
('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')),
('export_data', models.FileField(null=True, upload_to='')),
],
options={
'abstract': False,
},
bases=('bookwyrm.parentjob',),
),
migrations.CreateModel(
name='BookwyrmImportJob',
fields=[
('parentjob_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='bookwyrm.parentjob')),
('archive_file', models.FileField(blank=True, null=True, upload_to='')),
('import_data', models.JSONField(null=True)),
('required', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=50), blank=True, size=None)),
],
options={
'abstract': False,
},
bases=('bookwyrm.parentjob',),
),
migrations.CreateModel(
name='ChildJob',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('task_id', models.UUIDField(blank=True, null=True, unique=True)),
('created_date', models.DateTimeField(default=django.utils.timezone.now)),
('updated_date', models.DateTimeField(default=django.utils.timezone.now)),
('complete', models.BooleanField(default=False)),
('status', models.CharField(choices=[('pending', 'Pending'), ('active', 'Active'), ('complete', 'Complete'), ('stopped', 'Stopped')], default='pending', max_length=50, null=True)),
('parent_job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='child_jobs', to='bookwyrm.parentjob')),
],
options={
'abstract': False,
},
),
]
14 changes: 14 additions & 0 deletions bookwyrm/migrations/0182_merge_20230905_2240.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Generated by Django 3.2.19 on 2023-09-05 22:40

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('bookwyrm', '0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob'),
('bookwyrm', '0181_merge_20230806_2302'),
]

operations = [
]
1 change: 1 addition & 0 deletions bookwyrm/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .group import Group, GroupMember, GroupMemberInvitation

from .import_job import ImportJob, ImportItem
from .bookwyrm_import_job import BookwyrmImportJob

from .site import SiteSettings, Theme, SiteInvite
from .site import PasswordReset, InviteRequest
Expand Down
222 changes: 222 additions & 0 deletions bookwyrm/models/bookwyrm_export_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
import logging

from django.db.models import FileField
from django.db.models import Q
from django.core.serializers.json import DjangoJSONEncoder
from django.core.files.base import ContentFile

from bookwyrm import models
from bookwyrm.settings import DOMAIN
from bookwyrm.tasks import app, IMPORTS
from bookwyrm.models.job import ParentJob, ParentTask, SubTask, create_child_job
from uuid import uuid4
from bookwyrm.utils.tar import BookwyrmTarFile

logger = logging.getLogger(__name__)

class BookwyrmExportJob(ParentJob):
"""entry for a specific request to export a bookwyrm user"""
export_data = FileField(null=True)

def start_job(self):
"""Start the job"""
start_export_task.delay(job_id=self.id, no_children=True)

return self

@app.task(queue=IMPORTS, base=ParentTask)
def start_export_task(**kwargs):
"""trigger the child tasks for each row"""
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])

# don't start the job if it was stopped from the UI
if job.complete:
return

# This is where ChildJobs get made
job.export_data = ContentFile(b"", str(uuid4()))

json_data = json_export(job.user)
tar_export(json_data, job.user, job.export_data)

job.save(update_fields=["export_data"])

def tar_export(json_data: str, user, f):
f.open("wb")
with BookwyrmTarFile.open(mode="w:gz", fileobj=f) as tar:
tar.write_bytes(json_data.encode("utf-8"))

# Add avatar image if present
if getattr(user, "avatar", False):
tar.add_image(user.avatar, filename="avatar")

editions, books = get_books_for_user(user)
for book in editions:
tar.add_image(book.cover)

f.close()

def json_export(user):
"""Generate an export for a user"""
# user
exported_user = {}
vals = [
"username",
"name",
"summary",
"manually_approves_followers",
"hide_follows",
"show_goal",
"show_suggested_users",
"discoverable",
"preferred_timezone",
"default_post_privacy",
]
for k in vals:
exported_user[k] = getattr(user, k)

if getattr(user, "avatar", False):
exported_user["avatar"] = f'https://{DOMAIN}{getattr(user, "avatar").url}'

# reading goals
reading_goals = models.AnnualGoal.objects.filter(user=user).distinct()
goals_list = []
try:
for goal in reading_goals:
goals_list.append(
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
)
except Exception:
pass

try:
readthroughs = models.ReadThrough.objects.filter(
user=user
).distinct().values()
readthroughs = list(readthroughs)
except Exception as e:
readthroughs = []

# books
editions, books = get_books_for_user(user)
final_books = []

for book in books.values():
edition = editions.filter(id=book["id"])
book["edition"] = edition.values()[0]
# authors
book["authors"] = list(edition.first().authors.all().values())
# readthroughs
book_readthroughs = models.ReadThrough.objects.filter(
user=user, book=book["id"]
).distinct().values()
book["readthroughs"] = list(book_readthroughs)
# shelves
shelf_books = models.ShelfBook.objects.filter(
user=user, book=book['id']
).distinct()
shelves_from_books = models.Shelf.objects.filter(
shelfbook__in=shelf_books, user=user
)

book["shelves"] = list(shelves_from_books.values())
book["shelf_books"] = {}

for shelf in shelves_from_books:
shelf_contents = models.ShelfBook.objects.filter(
user=user, shelf=shelf
).distinct()

book["shelf_books"][shelf.identifier] = list(shelf_contents.values())

# book lists
book_lists = models.List.objects.filter(
books__in=[book['id']], user=user
).distinct()
book["lists"] = list(book_lists.values())
book["list_items"] = {}
for blist in book_lists:
list_items = models.ListItem.objects.filter(book_list=blist).distinct()
book["list_items"][blist.name] = list(list_items.values())

# reviews
reviews = models.Review.objects.filter(
user=user, book=book["id"]
).distinct()

book["reviews"] = list(reviews.values())

# comments
comments = models.Comment.objects.filter(
user=user, book=book["id"]
).distinct()

book["comments"] = list(comments.values())
logger.error("FINAL COMMENTS")
logger.error(book["comments"])

# quotes
quotes = models.Quotation.objects.filter(
user=user, book=book["id"]
).distinct()
# quote_statuses = models.Status.objects.filter(
# id__in=quotes, user=kwargs["user"]
# ).distinct()

book["quotes"] = list(quotes.values())

logger.error("FINAL QUOTES")
logger.error(book["quotes"])

# append everything
final_books.append(book)

# saved book lists
saved_lists = models.List.objects.filter(
id__in=user.saved_lists.all()
).distinct()
saved_lists = [l.remote_id for l in saved_lists]

# follows
follows = models.UserFollows.objects.filter(
user_subject=user
).distinct()
following = models.User.objects.filter(
userfollows_user_object__in=follows
).distinct()
follows = [f.remote_id for f in following]

# blocks
blocks = models.UserBlocks.objects.filter(user_subject=user).distinct()
blocking = models.User.objects.filter(
userblocks_user_object__in=blocks
).distinct()

blocks = [b.remote_id for b in blocking]

data = {
"user": exported_user,
"goals": goals_list,
"books": final_books,
"saved_lists": saved_lists,
"follows": follows,
"blocked_users": blocks,
}

return DjangoJSONEncoder().encode(data)

def get_books_for_user(user):
"""Get all the books and editions related to a user
:returns: tuple of editions, books
"""
all_books = models.Edition.viewer_aware_objects(user)
editions = all_books.filter(
Q(shelves__user=user)
| Q(readthrough__user=user)
| Q(review__user=user)
| Q(list__user=user)
| Q(comment__user=user)
| Q(quotation__user=user)
).distinct()
books = models.Book.objects.filter(id__in=editions).distinct()
return editions, books
Loading

0 comments on commit 26ecea0

Please sign in to comment.