diff --git a/Dockerfile b/Dockerfile
index e00af53..a9a348c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,23 +1,17 @@
-FROM python:3-bookworm
+# Fixed alpine version due to: https://github.com/ocrmypdf/OCRmyPDF/issues/1395
+FROM python:3-alpine3.19
LABEL maintainer=LasseR15
-LABEL email=lasse.roth@lasse-it.de
-
-
-RUN apt update
-RUN apt install -y tesseract-ocr tesseract-ocr-deu ghostscript
-
-
-COPY /src /app/src
-COPY /requirements.txt /app/requirements.txt
+LABEL email=lasse.roth@nexy.dev
WORKDIR /app
-RUN pip3 install -r requirements.txt
+RUN apk add --no-cache tesseract-ocr tesseract-ocr-data-deu ghostscript
+COPY /src ./src
+COPY /requirements.txt ./requirements.txt
-RUN python -m playwright install-deps chromium
-RUN python -m playwright install chromium
+RUN pip3 install -r requirements.txt
ENV PYTHONPATH=/app/src/
ENV BASE_OUTPUT_PATH=/app/output
diff --git a/README.md b/README.md
index 178d8b6..9a4c258 100644
--- a/README.md
+++ b/README.md
@@ -92,35 +92,29 @@ There are two variants/tags available:
#### Docker Compose
To use the ocr version of the script with Docker Compose run the following command:
```bash
-docker compose run --rm -it bibox-to-pdf \
- '{USERNAME}' '{PASSWORD}' {BOOK_ID}
+docker compose run --rm -Pit bibox-to-pdf {BOOK_ID}
```
#### Docker CLI
To use the script with ocr via Docker run the following command:
```bash
-docker run --rm -it \
- -v ./books:/app/output/books \
- ghcr.io/lasser15/bibox-to-pdf:latest \
- '{USERNAME}' '{PASSWORD}' {book_id}
+docker run --rm -it -p 4200:4200 -v ./books:/app/output/books \
+ ghcr.io/lasser15/bibox-to-pdf:latest {book_id}
```
diff --git a/docker-compose.yml b/docker-compose.yml
index 4d407cc..cc09da7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,10 @@
services:
bibox-to-pdf:
image: ghcr.io/lasser15/bibox-to-pdf:latest
+ restart: no
build:
context: .
volumes:
- ./books:/app/output/books
+ ports:
+ - '4200:4200'
diff --git a/requirements.txt b/requirements.txt
index 498baf2..538beb7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
-typer~=0.12.5
-playwright~=1.47.0
-ocrmypdf~=16.5.0
+typer~=0.14.0
+ocrmypdf~=16.6.2
img2pdf~=0.5.1
requests~=2.32.3
+fastapi~=0.115.5
+uvicorn~=0.32.1
diff --git a/src/bibox_to_pdf/bibox/BiboxImageDownloader.py b/src/bibox_to_pdf/bibox/BiboxImageDownloader.py
index 79e4ae0..452b3cb 100644
--- a/src/bibox_to_pdf/bibox/BiboxImageDownloader.py
+++ b/src/bibox_to_pdf/bibox/BiboxImageDownloader.py
@@ -14,7 +14,7 @@ def get_bibox_images(access_token: str, book_id: int):
if response.status_code != 200:
print(f"Response code from server was not 200. "
- f"Either the book id '{book_id}' doesn't exist or the login wasn't successful. "
+ f"Are you sure the book id '{book_id}' exists and you have access to it? Response code was {response.status_code}.\n"
f"Exiting!")
raise typer.Exit(1)
diff --git a/src/bibox_to_pdf/bibox/BiboxLogin.py b/src/bibox_to_pdf/bibox/BiboxLogin.py
index 9d5d91e..e9b5a11 100644
--- a/src/bibox_to_pdf/bibox/BiboxLogin.py
+++ b/src/bibox_to_pdf/bibox/BiboxLogin.py
@@ -1,35 +1,94 @@
-import typer
-from playwright.sync_api import sync_playwright
-from bibox_to_pdf.values.BiboxSelectors import BiboxSelectors
+import asyncio
+import base64
+import hashlib
+import secrets
+import sys
+
+import requests
+import uvicorn
+from fastapi import FastAPI, Request, BackgroundTasks
+from fastapi.responses import HTMLResponse
+from rich import print as rprint
+
from bibox_to_pdf.values.Constants import Constants
-from rich import print
+login_endpoint_queue = asyncio.Queue()
+
+app = FastAPI()
+config = uvicorn.Config(app, host='0.0.0.0', port=4200, log_level="warning")
+server = uvicorn.Server(config)
+
+@app.get('/login', response_class=HTMLResponse)
+async def login(req: Request, background_tasks: BackgroundTasks):
+ code = req.query_params.get('code')
+ if code is None:
+ return '
Error: Code is missing from request params
'
+
+ background_tasks.add_task(login_endpoint_queue.put, code)
+
+ return 'You can close this window now
'
+
+
+async def start_webserver():
+ try:
+ await server.serve()
+ except Exception as e:
+ rprint(f'Error starting webserver: {e}')
+ sys.exit(1)
+
+def create_login_link():
+ login_url = Constants.biboxOauthLoginUrl
+ client_id = Constants.biboxOauthClientId
+ redirect_uri = 'http://localhost:4200/login'
+ code_verifier = secrets.token_urlsafe(96)[:96]
+
+ code_verifier_hashed = hashlib.sha256(code_verifier.encode('ascii')).digest()
+ code_verifier_encoded = base64.urlsafe_b64encode(code_verifier_hashed)
+ code_challenge = code_verifier_encoded.decode('ascii')[:-1]
+
+ login_url = login_url + f'?client_id={client_id}&response_type=code&scope=openid&redirect_uri={redirect_uri}&code_challenge_method=S256&code_challenge={code_challenge}'
+
+ return {
+ 'redirect_uri': redirect_uri,
+ 'code_verifier': code_verifier,
+ 'login_url': login_url,
+ }
+
+def get_access_token(code: str, code_verifier: str, redirect_uri: str) -> str:
+ token_endpoint = Constants.biboxOauthTokenUrl
+
+ token_result = requests.post(token_endpoint, data={
+ 'redirect_uri': redirect_uri,
+ 'code': code,
+ 'code_verifier': code_verifier,
+ })
+
+ if token_result.status_code != 201 | 200:
+ raise Exception(f'Error getting access token: {token_result.text}')
-def login_to_bibox(username: str, password: str) -> str:
- with sync_playwright() as p:
- print(f"Logging in to BiBox with user '{username}'")
+ return token_result.json().get('access_token')
- browser = p.chromium.launch()
- page = browser.new_page()
- page.goto(Constants.biboxLoginUrl)
- page.wait_for_selector(BiboxSelectors.loginBtn)
+async def login_to_bibox() -> str:
+ # Create a task in a separate thread with a webserver to handle the login callback
+ webserver_task = asyncio.create_task(start_webserver())
- page.type(BiboxSelectors.loginUsernameField, username)
- page.type(BiboxSelectors.loginPasswordField, password)
+ while True:
+ login_result = create_login_link()
+ rprint('To log in to bibox open the following link in your browser: ')
+ print(login_result["login_url"])
- with page.expect_navigation():
- page.click(BiboxSelectors.loginBtn)
+ code_result = await login_endpoint_queue.get()
try:
- page.wait_for_selector(BiboxSelectors.logoutBtn, timeout=10000)
- except:
- print('Login credentials incorrect or a network error occurred.')
- raise typer.Exit(1)
+ access_token = get_access_token(code_result, login_result['code_verifier'], login_result['redirect_uri'])
+ except Exception as e:
+ rprint(f'Error getting access token: {e}')
+ continue
- access_token = page.evaluate('() => window.localStorage.getItem("oauth.accessToken")')
+ rprint('Successfully logged in to bibox')
- page.close()
- browser.close()
+ await server.shutdown()
+ webserver_task.cancel()
return access_token
diff --git a/src/bibox_to_pdf/pdf/PdfOcr.py b/src/bibox_to_pdf/pdf/PdfOcr.py
index 684daa5..e98b360 100644
--- a/src/bibox_to_pdf/pdf/PdfOcr.py
+++ b/src/bibox_to_pdf/pdf/PdfOcr.py
@@ -8,7 +8,7 @@ def ocr_pdf(book_id: int, pdf_non_ocr_path: str):
pdf_output_dir = Constants.pdfOutputDir.format(book_id)
os.makedirs(pdf_output_dir, exist_ok=True)
- print("Starting PDF ocr in German...")
+ print("Starting PDF ocr in German (if you need another language please open an issue on GitHub)...")
pdf_output_file = Constants.pdfOutputFile.format(book_id, 'ocr-version')
diff --git a/src/bibox_to_pdf/values/BiboxSelectors.py b/src/bibox_to_pdf/values/BiboxSelectors.py
deleted file mode 100644
index 2c78e34..0000000
--- a/src/bibox_to_pdf/values/BiboxSelectors.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class BiboxSelectors:
- loginUsernameField = '#account'
- loginPasswordField = '#password'
- loginBtn = '#form_login > div > div:nth-child(4) > button'
- logoutBtn = '#bbx > app-root > app-shelf > div.header > div > div.right-side > div > button'
diff --git a/src/bibox_to_pdf/values/Constants.py b/src/bibox_to_pdf/values/Constants.py
index 5d63dfb..33adc22 100644
--- a/src/bibox_to_pdf/values/Constants.py
+++ b/src/bibox_to_pdf/values/Constants.py
@@ -2,7 +2,9 @@
class Constants:
- biboxLoginUrl = 'https://bibox2.westermann.de'
+ biboxOauthLoginUrl = 'https://mein.westermann.de/auth/login'
+ biboxOauthTokenUrl = 'https://backend.bibox2.westermann.de/token'
+ biboxOauthClientId = 'Nvw0ZA8Z'
biboxBookInfoUrl = 'https://backend.bibox2.westermann.de/v1/api/sync/{}?materialtypes[]=default&materialtypes[]=addon'
baseOutputPath = os.getenv('BASE_OUTPUT_PATH', default='.')
diff --git a/src/main.py b/src/main.py
index 6dd5c5e..81ad30d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,22 +1,21 @@
+import asyncio
+
import typer
+from rich import print
from typing_extensions import Annotated
+
from bibox_to_pdf.bibox.BiboxImageDownloader import get_bibox_images, download_images_from_bibox
from bibox_to_pdf.bibox.BiboxLogin import login_to_bibox
from bibox_to_pdf.pdf.PdfCreator import create_pdf_from_images
from bibox_to_pdf.pdf.PdfOcr import ocr_pdf
from bibox_to_pdf.values.Constants import Constants
-from rich import print
-
-def main(
- username: Annotated[str, typer.Argument()],
- password: Annotated[str, typer.Argument()],
- book_id: Annotated[int, typer.Argument()]):
+def main(book_id: Annotated[int, typer.Argument()]):
book_dest_path = Constants.bookBaseOutputDir.format(book_id)
print(f"Downloading book with id '{book_id}' to '{book_dest_path}'...")
- access_token = login_to_bibox(username, password)
+ access_token = asyncio.run(login_to_bibox())
bibox_images = get_bibox_images(access_token, book_id)
image_paths = download_images_from_bibox(bibox_images, book_id)