Skip to content
This repository has been archived by the owner on May 20, 2024. It is now read-only.

Commit

Permalink
Merge pull request #135 from uvacw/feature/dependency-updates-and-bui…
Browse files Browse the repository at this point in the history
…ld-fix

Final update to bump python dependency versions and change the Readme
  • Loading branch information
theoaraujo authored May 20, 2024
2 parents 7142e30 + c6eab89 commit 90fc288
Show file tree
Hide file tree
Showing 33 changed files with 963 additions and 550 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install dependencies
Expand Down Expand Up @@ -45,9 +45,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install with plain pip
Expand All @@ -60,9 +60,9 @@ jobs:

steps:
- name: checkout files
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: build Docker image
uses: docker/build-push-action@v2
uses: docker/build-push-action@v4
with:
file: Dockerfile
push: false
Expand All @@ -74,9 +74,9 @@ jobs:

steps:
- name: checkout files
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: build Docker image
uses: docker/build-push-action@v2
uses: docker/build-push-action@v4
with:
file: Dockerfile-test
push: false
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
![Python application](https://github.com/uvacw/osd2f/workflows/Python%20application/badge.svg?branch=main)
<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
# OSD2F: Open Source Data Donation Framework
# OSD2F: Open Source Data Donation Framework (No longer maintained)

## ⚠️ Update: this repository is archived ⚠️

This repository is being archived as is. The code can be reused by others as specified in the license, yet security updates and maintenance are not currently being done. Those interested in using the code must therefore consider performing any relevant security updates priot to using the tool. The OSD2F authors are now working on a new data donation infrastructure, which can be found here: [https://datadonation.eu](https://datadonation.eu). This infrastructure contains a stand-alone tool (PORT) which is actively maintained and updated.



## Goal

Expand Down Expand Up @@ -96,4 +102,4 @@ Araujo, T., Ausloos, J., van Atteveldt, W., Loecherbach, F., Moeller, J., Ohme,
```


This tool is inspired in earlier approaches that enable researchers to partner with individuals willing to donate their data for academic research, including [Web Historian](https://github.com/erickaakcire/webhistorian) (Menchen-Trevino, 2016), among others.
This tool is inspired in earlier approaches that enable researchers to partner with individuals willing to donate their data for academic research, including [Web Historian](https://github.com/erickaakcire/webhistorian) (Menchen-Trevino, 2016), among others.
2 changes: 1 addition & 1 deletion osd2f/anonymizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ async def anonymize_submission(submission: Submission, settings: UploadSettings)
async def anonymize_submission_list(
submission_list: SubmissionList, settings: UploadSettings
) -> SubmissionList:
for i, submission in enumerate(submission_list.__root__):
for i, submission in enumerate(submission_list.root):
logger.debug(f"at submission {i}")
await anonymize_submission(submission, settings)
return submission_list
2 changes: 1 addition & 1 deletion osd2f/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def parse_and_run():
asyncio.run(app.startup())
settings = asyncio.run(load_content_settings(use_cache=False))
with open(args.generate_current_config, "w") as outputfile:
yaml.dump(settings.dict(by_alias=True), outputfile)
yaml.dump(settings.model_dump(by_alias=True), outputfile)
asyncio.run(app.shutdown())

else:
Expand Down
4 changes: 2 additions & 2 deletions osd2f/database/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ async def get_content_config() -> typing.Optional[DBConfigurationBlobs]:

async def set_content_config(user: str, content: ContentSettings):
await DBConfigurationBlobs.create(
insert_user=user, config_type="content", config_blob=content.json()
insert_user=user, config_type="content", config_blob=content.model_dump_json()
)


async def set_upload_config(user: str, content: UploadSettings):
await DBConfigurationBlobs.create(
insert_user=user, config_type="upload", config_blob=content.json()
insert_user=user, config_type="upload", config_blob=content.model_dump_json()
)
8 changes: 4 additions & 4 deletions osd2f/database/logs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import asyncio
import logging
import queue
import time
import typing
from logging.handlers import QueueHandler

from tortoise import fields
from tortoise.models import Model
Expand Down Expand Up @@ -62,7 +62,7 @@ async def background_insert_log(
log_level: str,
log_position: str,
log_sid: typing.Optional[str] = None,
entry: typing.Dict = None,
entry: typing.Optional[typing.Dict] = None,
user_agent_string: typing.Optional[str] = None,
):

Expand All @@ -83,7 +83,7 @@ async def insert_log(
log_level: str,
log_position: str,
log_sid: typing.Optional[str] = None,
entry: typing.Dict = None,
entry: typing.Optional[typing.Dict] = None,
user_agent_string: typing.Optional[str] = None,
):
clientLogQueue.put(
Expand Down Expand Up @@ -142,7 +142,7 @@ async def async_log_worker(q: queue.SimpleQueue):
await asyncio.sleep(0.1)

logQueue: queue.SimpleQueue = queue.SimpleQueue()
h = logging.handlers.QueueHandler(logQueue)
h = QueueHandler(logQueue)
h.setLevel(logger.level)
print(h.level)
logger.addHandler(h)
Expand Down
41 changes: 24 additions & 17 deletions osd2f/database/submissions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Dict, List

from tortoise import Tortoise, fields
from tortoise.models import Model

Expand All @@ -13,7 +15,7 @@ class DBSubmission(Model):
n_deleted = fields.IntField()
insert_timestamp = fields.DatetimeField(auto_now_add=True)
update_timestamp = fields.DatetimeField(auto_now=True)
entry = fields.JSONField()
entry: Dict[str, Any] = fields.JSONField()

class Meta:
table = "submissions"
Expand All @@ -30,34 +32,39 @@ async def insert_submission(submission: Submission):
)


async def get_submissions():
async def get_submissions() -> List[OutputSubmission]:
submissions = await DBSubmission.all()
submission_dict = [
OutputSubmission(
db_id=si.id,
submission_id=si.submission_id,
filename=si.filename,
n_deleted_across_file=si.n_deleted,
insert_timestamp=si.insert_timestamp.isoformat(),
entry=SecureEntry.read_entry_field(dict(si.entry)),
).dict()
for si in submissions
]
submission_dict: List[OutputSubmission] = []

for si in submissions:
entry = SecureEntry.read_entry_field(si.entry)
sub = OutputSubmission.model_validate(
dict(
db_id=si.id,
submission_id=si.submission_id,
filename=si.filename,
n_deleted_across_file=si.n_deleted,
insert_timestamp=si.insert_timestamp.isoformat(),
entry=dict(entry),
),
)
submission_dict.append(sub)

return submission_dict


async def insert_submission_list(submissionlist: SubmissionList):
if len(submissionlist.__root__) < 1:
if len(submissionlist.root) < 1:
logger.info("Empty submissionlist")
return

logger.debug(
f"Inserting {len(submissionlist.__root__)} files of data for submission "
f"'{submissionlist.__root__[0].submission_id}'"
f"Inserting {len(submissionlist.root)} files of data for submission "
f"'{submissionlist.root[0].submission_id}'"
)

def subgenerator():
for sub in submissionlist.__root__:
for sub in submissionlist.root:
for entry in sub.entries:
yield DBSubmission(
submission_id=sub.submission_id,
Expand Down
28 changes: 13 additions & 15 deletions osd2f/definitions/content_settings.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from enum import Enum
from typing import Dict, List, Optional

from pydantic import BaseModel, EmailStr
from pydantic import BaseModel, ConfigDict, EmailStr


class FileSetting(BaseModel):
in_key: Optional[str]
in_key: Optional[str] = None
accepted_fields: List[str]
anonymizers: Optional[List[Dict[str, str]]]
anonymizers: Optional[List[Dict[str, str]]] = None


class UploadSettings(BaseModel):
Expand Down Expand Up @@ -38,22 +38,21 @@ class PageTypeEnum(str, Enum):

class CirclesRowCircle(BaseModel):
image: str
title: Optional[str]
subtitle: Optional[str]
title: Optional[str] = None
subtitle: Optional[str] = None


class ContentBlock(BaseModel):
type: BlockTypeEnum
id: str
title: Optional[str]
title: Optional[str] = None
lines: List[str]
buttons: List[ContentButton]
image: Optional[str]
image_pos: Optional[ImagePositionEnum]
circles_row: Optional[List[CirclesRowCircle]]
image: Optional[str] = None
image_pos: Optional[ImagePositionEnum] = None
circles_row: Optional[List[CirclesRowCircle]] = None

class Config:
use_enum_values = True
model_config = ConfigDict(use_enum_values=True)


class ContentPage(BaseModel):
Expand All @@ -63,7 +62,7 @@ class ContentPage(BaseModel):


class UploadBox(BaseModel):
header: Optional[str]
header: Optional[str] = None
explanation: List[str]


Expand All @@ -81,7 +80,7 @@ class PreviewComponent(BaseModel):
class ConsentPopup(BaseModel):
title: str
lead: str
points: Optional[List[str]]
points: Optional[List[str]] = None
end_text: str
decline_button: str
accept_button: str
Expand All @@ -106,5 +105,4 @@ class ContentSettings(BaseModel):
static_pages: Dict[PageTypeEnum, ContentPage]
upload_page: UploadPage

class Config:
use_enum_values = True
model_config = ConfigDict(use_enum_values=True)
4 changes: 2 additions & 2 deletions osd2f/definitions/security_settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional

from pydantic import BaseModel, validator
from pydantic import BaseModel, field_validator


class MSALConfiguration(BaseModel):
Expand All @@ -13,6 +13,6 @@ class MSALConfiguration(BaseModel):
authority: Optional[str] = None
scope: List[str] = ["User.Read"]

@validator("authority", pre=True, always=True)
@field_validator("authority", mode="before", check_fields=True) # type: ignore
def set_authority(cls, v, *, values, **kwargs):
return f"https://login.microsoftonline.com/{values['tenant_id']}"
6 changes: 3 additions & 3 deletions osd2f/definitions/submissions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Dict, List

from pydantic import BaseModel
from pydantic import BaseModel, RootModel


class Submission(BaseModel):
Expand Down Expand Up @@ -34,8 +34,8 @@ class EncryptedSubmission(BaseModel):
entry: EncryptedEntry


class SubmissionList(BaseModel):
class SubmissionList(RootModel):
"""Submissions as send from the webbrowser.
Basically, a list of file submissions as one List."""

__root__: List[Submission]
root: List[Submission]
2 changes: 1 addition & 1 deletion osd2f/security/authorization/microsoft_msal.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def microsoft_msal_authentication(func, *args, **kwargs):
return await func(*args, **kwargs)

msal_auth = os.environ.get("MSAL_CONFIG")
config = MSALConfiguration.parse_raw(msal_auth)
config = MSALConfiguration.model_validate_json(msal_auth)

authorizer = msal.ConfidentialClientApplication(
config.client_id,
Expand Down
16 changes: 8 additions & 8 deletions osd2f/security/entry_encryption/file_decryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ def __init__(self, filename: pathlib.Path, read_mode: bool):

def read_entries(self) -> Iterable[OutputSubmission]:
for raw_submission in json.load(self.file_obj):
submission = EncryptedSubmission.parse_obj(raw_submission)
submission = EncryptedSubmission.model_validate(raw_submission)
try:
EncryptedEntry.parse_obj(submission.entry)
EncryptedEntry.model_validate(submission.entry)
except ValueError:
logger.warning("Encountered an unencrypted entry!")
yield OutputSubmission.parse_obj(raw_submission)
decrypted_sub = OutputSubmission.parse_obj(raw_submission)
yield OutputSubmission.model_validate(raw_submission)
decrypted_sub = OutputSubmission.model_validate(raw_submission)
decrypted_sub.entry = SecureEntry.read_entry_field(decrypted_sub.entry)
yield decrypted_sub

def append(self, entry: OutputSubmission) -> None:
self.entries.append(entry.dict())
self.entries.append(entry.model_dump())

def __del__(self):
if (
Expand All @@ -81,7 +81,7 @@ class CSVFile(EntryFile):
def __init__(self, filename: pathlib.Path, read_mode: bool):
super().__init__(filename, read_mode)
if not read_mode:
headers = OutputSubmission.__fields__.keys()
headers = OutputSubmission.model_fields.keys()
self.writer = csv.DictWriter(self.file_obj, fieldnames=headers)
self.writer.writeheader()

Expand All @@ -92,11 +92,11 @@ def read_entries(self) -> Iterable[OutputSubmission]:
for e in reader:
re: Dict[str, Any] = {k: v for k, v in e.items() if k != "entry"}
re["entry"] = SecureEntry.read_entry_field(eval(e["entry"]))
yield OutputSubmission.parse_obj(re)
yield OutputSubmission.model_validate(re)

def append(self, entry: OutputSubmission) -> None:

self.writer.writerow(entry.dict())
self.writer.writerow(entry.model_dump())


def decrypt_file(input_path: pathlib.Path, output_path: pathlib.Path) -> int:
Expand Down
2 changes: 1 addition & 1 deletion osd2f/security/secrets/azure_keyvault.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ def azure_keyvault_replace(value: str) -> str:
cred = DefaultAzureCredential()
client = SecretClient(keyvault_url, cred)
secret = client.get_secret(secret_name).value
return secret
return secret or ""
Loading

0 comments on commit 90fc288

Please sign in to comment.