Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add make targets for cleaning db and inserting fake data. #122

Merged
merged 21 commits into from
Jun 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.EXPORT_ALL_VARIABLES:
BETTER_EXCEPTIONS=1
DJANGO_SETTINGS_MODULE=backend.settings.lint
DJANGO_SETTINGS_MODULE?=backend.settings.lint

migrate:
python src/manage.py migrate
Expand Down Expand Up @@ -32,3 +32,15 @@ dev-server:

dev-test: dev-server
docker-compose exec backend pytest --cov=src src

fake-data:
python -m scripts/fake generate $(ARGS)

fake-bulk-data:
python -m scripts.fake generate --teams 10000 --users 2 --categories 10 --challenges 100 --solves 1000000

clean-db:
python scripts/clean_db.py

clean-test:
rm -rf /tmp/ractf-linting.cache /tmp/ractf-linting.db .testmondata
141 changes: 82 additions & 59 deletions poetry.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ better-exceptions = "^0.3.3"
pytest-testmon = "^1.1.1"
django-querycount = "^0.7.0"
nplusone = "^1.0.0"
faker = "^8.6.0"
docopt = "^0.6.2"

[tool.pytest.ini_options]
python_files = "tests.py test_*.py *_tests.py"
Expand Down Expand Up @@ -85,14 +87,15 @@ exclude_lines = [

[tool.black]
exclude = 'migrations'
line_length = 200
line_length = 120

[tool.isort]
profile = "black"
multi_line_output = 3

[tool.flake8]
exclude = "*migrations*,*settings*"
ignore = "W503"
max-line-length = 200
max-complexity = 25

Expand Down
16 changes: 16 additions & 0 deletions scripts/clean_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import os
from os import getenv

import psycopg2

with psycopg2.connect(
user=getenv("SQL_USER"),
password=getenv("SQL_PASSWORD"),
host=getenv("SQL_HOST"),
port=getenv("SQL_PORT"),
database="template1",
) as connection:
connection.set_isolation_level(0)
with connection.cursor() as cursor:
cursor.execute(f"DROP DATABASE {os.getenv('SQL_DATABASE')}")
cursor.execute(f"CREATE DATABASE {os.getenv('SQL_DATABASE')}")
33 changes: 33 additions & 0 deletions scripts/fake/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""A command-line tool for generating and inserting many rows of fake data into the database.

Usage:
fake generate [--teams=<teams>] [--users=<users>] [--categories=<categories>] [--challenges=<challenges>] [--solves=<solves>] [--force]
fake -h | --help

Options:
--help -h Show this screen.
--force Run even when the database is populated.

--users=<users> The number of users to generate per team. [default: 2]
--categories=<categories> The number of categories to generate. [default: 5]
--teams=<teams> The number of teams to generate. [default: 10]
--challenges=<challenges> The number of challenges to generate per category. [default: 10]
--solves=<solves> The number of solves to generate. [default: 100]
"""

import sys
from os import environ
from pathlib import Path

import django
from docopt import docopt


arguments = docopt(__doc__)
PROJECT_BASE = str(Path(__file__).parents[2] / "src")

if PROJECT_BASE not in sys.path:
sys.path.insert(1, PROJECT_BASE)

environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings.local")
django.setup()
177 changes: 177 additions & 0 deletions scripts/fake/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""A command-line tool for generating and inserting many rows of fake data into the database."""

import random

from django import db
from faker import Faker

from challenge.models import Category, Challenge, Score, Solve
from member.models import Member
from scripts.fake.config import (
CATEGORIES,
CHALLENGES,
SOLVES,
TABLE_NAMES,
TEAMS,
USERS,
arguments,
)
from scripts.fake.utils import TimedLog, random_rpn_op
from team.models import Team


if not arguments.get("--force") and Member.objects.count() > 0:
print("The database is already populated, clear the db or use --force to run anyway.")
exit(1)


cursor = db.connection.cursor()

db_constraints = {}
db_indexes = {}

try:
for table in TABLE_NAMES:
cursor.execute(
f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename='{table}' AND indexname != '{table}_pkey';"
)
indexes = cursor.fetchall()

cursor.execute(
f"SELECT conname, contype, pg_catalog.pg_get_constraintdef(r.oid, true) as condef "
f"FROM pg_catalog.pg_constraint r WHERE r.conrelid = '{table}'::regclass AND conname != '{table}_pkey';"
)
constraints = cursor.fetchall()
for constraint_name, constraint_type, constraint_sql in constraints:
cursor.execute(f"ALTER TABLE {table} DROP CONSTRAINT IF EXISTS {constraint_name}")
for index_name, index_sql in indexes:
cursor.execute(f"DROP INDEX IF EXISTS {index_name}")
db.connection.commit()

db_indexes[table] = indexes
db_constraints[table] = constraints

for table in TABLE_NAMES:
cursor.execute(f"ALTER TABLE {table} SET UNLOGGED")
db.connection.commit()

with TimedLog("Inserting data... ", ending="\n"):
fake = Faker()
category_ids = []
with TimedLog(f"Creating {CATEGORIES} categories..."):
for display_order in range(CATEGORIES):
category = Category(
name=" ".join(fake.words()),
display_order=display_order,
contained_type="test",
description=fake.unique.text(),
)
category.save()
category_ids.append(category.pk)

challenge_ids = []
with TimedLog(f"Creating {CHALLENGES} challenges for each category..."):
for pk in range(CATEGORIES):
category = Category.objects.get(pk=category_ids[pk])
for j in range(CHALLENGES):
auto_unlock = random.randint(1, 5) == 1
challenge = Challenge(
name=" ".join(fake.words())[:36],
category=category,
description=fake.unique.text(),
flag_metadata={"flag": f"ractf{{{fake.word()}}}"},
author=fake.unique.user_name(),
score=j,
challenge_metadata={},
unlock_requirements=random_rpn_op() if not auto_unlock else "",
)
challenge.save()
challenge_ids.append(challenge.pk)

with TimedLog(f"Creating {USERS * TEAMS} users in memory..."):
users_to_create = [
Member(username=fake.unique.user_name(), email=fake.unique.email()) for _ in range(USERS * TEAMS)
]

with TimedLog("Inserting to database..."):
Member.objects.bulk_create(users_to_create)

with TimedLog(f"Creating {TEAMS} teams in memory...."):
teams_to_create = []
members = list(Member.objects.all())
for index in range(TEAMS):
team = Team(name=fake.unique.user_name(), password=" ".join(fake.words()), owner=members[index * USERS])
teams_to_create.append(team)

with TimedLog("Inserting to database..."):
Team.objects.bulk_create(teams_to_create)

with TimedLog("Adding members to teams in memory..."):
members_to_update = []
teams = list(Team.objects.all())
for index in range(0, len(members)):
team_member = members[index]
team_member.team = teams[index // USERS % len(teams)]
members_to_update.append(team_member)

with TimedLog("Saving to database..."):
Member.objects.bulk_update(members_to_update, ["team"])

with TimedLog(f"Creating {SOLVES} solves and scores in memory..."):
scores_to_create = []
solves_to_create = []
users_to_update = set()
teams_to_update = set()
teams = list(Team.objects.prefetch_related("members").all())
team_index = 0

for index in range(SOLVES):
if index != 0 and index % len(challenge_ids) == 0:
team_index += 1
team = teams[team_index]
user = team.members.all()[index % USERS]

points = random.randint(0, 999)
penalty = 0 if random.randint(0, 10) != 5 else random.randint(0, points)
score = Score(team=team, reason="challenge", points=points, penalty=penalty, leaderboard=True)
scores_to_create.append(score)
solve = Solve(
team=team,
solved_by=user,
challenge_id=challenge_ids[index % len(challenge_ids)],
first_blood=False,
flag="ractf{a}",
score=score,
correct=True,
)
solves_to_create.append(solve)

user.points += points - penalty
team.points += points - penalty
user.leaderboard_points += points - penalty
team.leaderboard_points += points - penalty

teams_to_update.add(team)
users_to_update.add(user)

with TimedLog("Saving all to database...", ending="\n"):
with TimedLog("[1/4] Saving Scores in database..."):
Score.objects.bulk_create(scores_to_create)
with TimedLog("[2/4] Saving Solves in database..."):
Solve.objects.bulk_create(solves_to_create)
with TimedLog("[3/4] Saving Members in database..."):
Member.objects.bulk_update(users_to_update, ["leaderboard_points"])
with TimedLog("[4/4] Saving Teams in database..."):
Team.objects.bulk_update(teams_to_update, ["leaderboard_points"])
finally:
for table in TABLE_NAMES:
for index_name, index_sql in db_indexes[table]:
cursor.execute(index_sql)
db.connection.commit()
for table in TABLE_NAMES:
for constraint_name, constraint_type, constraint_sql in db_constraints[table]:
try:
cursor.execute(f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} {constraint_sql}")
db.connection.commit()
except db.ProgrammingError: # Some constraints seem to get added implicitly so adding them throws an error
pass
52 changes: 52 additions & 0 deletions scripts/fake/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Set any relevant config options."""

from os import getenv

from django.utils.functional import classproperty

from scripts.fake import arguments


class PostgreSQL:
"""Pull postgres connection info from the environment."""

USER: str = getenv("SQL_USER", "")
HOST: str = getenv("SQL_HOST", "")
PORT: str = getenv("SQL_PORT", "")
DATABASE: str = getenv("SQL_DATABASE", "")
PASSWORD: str = getenv("SQL_PASSWORD", "")

@classproperty
def dsn(cls) -> str:
return f"postgres://{cls.USER}:{cls.PASSWORD}@{cls.HOST}:{cls.PORT}/template1"


USERS, CATEGORIES, TEAMS, CHALLENGES, SOLVES = (
int(arguments.get(f"--{param}", "0")) for param in ("users", "categories", "teams", "challenges", "solves")
)
FORCE = arguments.get("--force")


TABLE_NAMES = [
"member_member_groups",
"authentication_token",
"member_member_user_permissions",
"authtoken_token",
"authentication_totpdevice",
"authentication_backupcode",
"authentication_passwordresettoken",
"member_userip",
"authentication_invitecode",
"challenge_file",
"challenge_tag",
"challenge_challengefeedback",
"hint_hintuse",
"hint_hint",
"challenge_challengevote",
"challenge_solve",
"challenge_score",
"challenge_challenge",
"challenge_challenge",
"team_team",
"member_member",
]
43 changes: 43 additions & 0 deletions scripts/fake/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""A set of classes and utility methods for use in the faker script."""

import sys
import time
from dataclasses import dataclass
from random import randint


@dataclass
class TimedLog:
"""A simple context manager for timing log events."""

message: str
ending: str = " "

entry_time: float = 0.0

@property
def time_elapsed(self) -> float:
"""Get the time elapsed since the log was started."""
return time.time() - self.entry_time

def __enter__(self) -> None:
"""Start the timer and print a relevant log line."""
self.entry_time = time.time()
print(self.message, end=self.ending, flush=True, file=sys.stderr)

def __exit__(self, *_) -> None:
"""Print out how long this context manager lasted for."""
print(f"Done ({self.time_elapsed}s)")


def random_rpn_op(depth: int = 0) -> str:
"""Return a random set of unlock requirements."""
depth += 1

if depth > 4 or (randint(1, 4) < 3 and depth > 1):
return str(randint(1, 1000))

if randint(1, 2) == 1:
return f"{random_rpn_op(depth)} {random_rpn_op(depth)} OR"
else:
return f"{random_rpn_op(depth)} {random_rpn_op(depth)} AND"
Loading