From 5e70611972e85c4fe3bf1b94abff19119f589805 Mon Sep 17 00:00:00 2001 From: Anonyo Noor <100anonyo@gmail.com> Date: Fri, 28 Jun 2024 04:06:25 +0000 Subject: [PATCH] fixed querying by changing user agent --- .vscode/launch.json | 57 ++++++++----------- README.md | 8 ++- backend/docker-compose.dev.yaml | 2 +- backend/docker-compose.prod.yaml | 4 +- backend/routers/filer.py | 30 +++++++--- backend/routers/general.py | 13 +++-- backend/routers/lib/api.py | 14 ++--- backend/routers/lib/database.py | 5 +- backend/routers/lib/web.py | 1 + backend/routers/utils.py | 9 --- backend/routers/worker.py | 2 + backend/search/{Dockerfile => Dockerfile.dev} | 3 +- backend/search/Dockerfile.prod | 0 13 files changed, 79 insertions(+), 69 deletions(-) rename backend/search/{Dockerfile => Dockerfile.dev} (50%) create mode 100644 backend/search/Dockerfile.prod diff --git a/.vscode/launch.json b/.vscode/launch.json index f73b37dc..aa5fa68d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,44 +6,35 @@ "configurations": [ { "name": "FastAPI: Module", - "type": "python", - "module": "poetry", + "type": "debugpy", + "module": "uvicorn", "justMyCode": false, - "args": [ - "run", - "uvicorn", - "main:app", - "--host", - "0.0.0.0", - "--port", - "8000", - "--reload" - ], + "args": ["main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"], "request": "launch", "cwd": "${workspaceFolder}/backend" - }, - { - "name": "Celery: Remote Attach", - "type": "debugpy", - "request": "attach", - "connect": { - "host": "localhost", - "port": 6900 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "/app" - } - ], - "preLaunchTask": "preLaunch", - "postDebugTask": "killProcess" } + // { + // "name": "Celery: Remote Attach", + // "type": "debugpy", + // "request": "attach", + // "connect": { + // "host": "localhost", + // "port": 6900 + // }, + // "pathMappings": [ + // { + // "localRoot": "${workspaceFolder}", + // "remoteRoot": "/app" + // } + // ], + // "preLaunchTask": "preLaunch", + // "postDebugTask": "killProcess" + // } ], "compounds": [ - { - "name": "Debug Full Backend", - "configurations": ["Celery: Remote Attach", "FastAPI: Module"] - } + // { + // "name": "Debug Full Backend", + // "configurations": ["Celery: Remote Attach", "FastAPI: Module"] + // } ] } diff --git a/README.md b/README.md index 252f26c9..beafaca3 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,13 @@ Once you have all the configuration files ready, to start the app, run the follo 2. Run the microservices by calling the development compose file. ```bash - docker compose -f docker-compose.dev.yaml up + docker compose -f docker-compose.dev.yaml up -d + ``` + + **Note:** You should stop these microservices after you're done using them (otherwise they will waste resources indefinitely). To stop them, run the following. + + ``` + docker compose -f docker-compose.dev.yaml down # Run after you've finished ``` 3. Install dependencies with Poetry. diff --git a/backend/docker-compose.dev.yaml b/backend/docker-compose.dev.yaml index 7798cb7f..b616ab1b 100644 --- a/backend/docker-compose.dev.yaml +++ b/backend/docker-compose.dev.yaml @@ -21,7 +21,7 @@ services: container_name: search build: context: ./search - dockerfile: Dockerfile + dockerfile: Dockerfile.dev volumes: - ./search/search_db:/meili_data restart: always diff --git a/backend/docker-compose.prod.yaml b/backend/docker-compose.prod.yaml index 6a229d28..bf936002 100644 --- a/backend/docker-compose.prod.yaml +++ b/backend/docker-compose.prod.yaml @@ -62,12 +62,14 @@ services: container_name: search build: context: ./search - dockerfile: Dockerfile + dockerfile: Dockerfile.prod volumes: - ./search/search_db:/meili_data networks: - proxy-network restart: always + environment: + MEILI_MASTER_KEY: "***********" networks: proxy-network: diff --git a/backend/routers/filer.py b/backend/routers/filer.py index fe25eedb..419b8144 100644 --- a/backend/routers/filer.py +++ b/backend/routers/filer.py @@ -9,6 +9,7 @@ from datetime import datetime from . import worker +from .worker import production_environment from .lib import web from .lib import database @@ -90,7 +91,7 @@ def create_recent(cik, company, stamp): try: database.add_log(cik, "Creating Filer (Newest)", company_name, cik) - + recent_filing = database.find_filing(cik, last_report) for ( access_number, filing_stock, @@ -209,11 +210,11 @@ def update_filer(company): operation = database.find_log(cik) if operation is None: raise HTTPException(404, detail="CIK not found.") - if operation["status"] == 2 or operation["status"] == 1: - raise HTTPException( # @IgnoreException - 302, detail="Filer continuous building." - ) # @IgnoreException - if operation["status"] > 2: + # elif operation["status"] == 2 or operation["status"] == 1: + # raise HTTPException( # @IgnoreException + # 302, detail="Filer continuous building." + # ) # @IgnoreException + elif operation["status"] >= 2: raise HTTPException(409, detail="Filer still building.") update, last_report = web.check_new(cik) @@ -224,7 +225,10 @@ def update_filer(company): database.edit_filer({"cik": cik}, {"$set": {"last_report": last_report}}) stamp = {"name": company["name"], "start": time} - worker.create_historical.delay(cik, company, stamp) + if production_environment: + worker.create_historical.delay(cik, company, stamp) + else: + create_historical(cik, company, stamp) return {"description": "Filer update started."} @@ -243,7 +247,11 @@ async def query_filer(cik: str): logging.error(e) raise HTTPException(404, detail="CIK not found.") - worker.create_filer.delay(cik, sec_data) + if production_environment: + worker.create_filer.delay(cik, sec_data) + else: + create_filer(cik, sec_data) + res = {"description": "Filer creation started."} else: res = update_filer(filer) @@ -292,7 +300,11 @@ async def rollback_filer(cik: str, password: str): start = datetime.now().timestamp() stamp = {"name": filer["name"], "start": start} - worker.create_historical(cik, filer, stamp) + + if production_environment: + worker.create_historical.delay(cik, filer, stamp) + else: + create_historical(cik, filer, stamp) return {"description": "Filer rollback started."} diff --git a/backend/routers/general.py b/backend/routers/general.py index 794e7d2f..e6370c1f 100644 --- a/backend/routers/general.py +++ b/backend/routers/general.py @@ -10,7 +10,7 @@ from .lib.backup import save_collections from .filer import popular_cik_list, top_cik_list -from .worker import try_filer, replace_filer, delay_error +from .worker import try_filer, replace_filer, delay_error, production_environment cache = cm.cache router = APIRouter( @@ -32,7 +32,6 @@ async def info(): async def info_undefined(): return {"message": "Hello World!"} - @cache(4) @router.get("/health", status_code=200) async def health(): @@ -105,7 +104,10 @@ async def query_top(password: str): filer_ciks = popular_cik_list filer_ciks.extend(top_cik_list) - background_query("query", filer_ciks, try_filer.delay) + if production_environment: + background_query("query", filer_ciks, try_filer.delay) + else: + background_query("query", filer_ciks, try_filer) return {"description": "Started querying filers."} @@ -118,7 +120,10 @@ async def progressive_restore(password: str): filers = database.find_filers({}, {"cik": 1}) all_ciks = [filer["cik"] for filer in filers] - background_query("restore", all_ciks, replace_filer.delay) + if production_environment: + background_query("restore", all_ciks, replace_filer.delay) + else: + background_query("restore", all_ciks, replace_filer) return {"description": "Started progressive restore of filers."} diff --git a/backend/routers/lib/api.py b/backend/routers/lib/api.py index c49d7f80..acae5e2d 100644 --- a/backend/routers/lib/api.py +++ b/backend/routers/lib/api.py @@ -6,13 +6,14 @@ from datetime import datetime from . import database +from . import analysis logging.info("[ APIs Initializing ] ...") # Requests session = requests.Session() headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0" + "User-Agent": "wallstreetlocal admin@wallstreetlocal.com ", } ENVIRONMENT = os.environ.get("ENVIRONMENT", "development") @@ -139,14 +140,13 @@ def sec_filer_search(cik): cik, custom_wait=600, ) - data = res.json() - - if res.status_code == 400: + + if res.ok: + data = res.json() + else: raise LookupError - from .analysis import convert_underscore - - data_converted = convert_underscore(data, {}) + data_converted = analysis.convert_underscore(data, {}) return data_converted diff --git a/backend/routers/lib/database.py b/backend/routers/lib/database.py index b3df4712..49ce6e4f 100644 --- a/backend/routers/lib/database.py +++ b/backend/routers/lib/database.py @@ -94,8 +94,9 @@ def edit_filer(query, value): def delete_filer(cik): filer_query = {"cik": cik} - logs.delete_one(filer_query) - main.delete_one(filer_query) + logs.delete_many(filer_query) + filings.delete_many(filer_query) + main.delete_many(filer_query) def delete_filers(query): diff --git a/backend/routers/lib/web.py b/backend/routers/lib/web.py index 233c7b21..3db7be75 100644 --- a/backend/routers/lib/web.py +++ b/backend/routers/lib/web.py @@ -460,6 +460,7 @@ def process_stocks(cik, filings): for document in filings_list: access_number = document["access_number"] form_type = document["form"] + if "13F-HR" not in form_type: continue diff --git a/backend/routers/utils.py b/backend/routers/utils.py index dac5b319..e57411cb 100644 --- a/backend/routers/utils.py +++ b/backend/routers/utils.py @@ -347,13 +347,4 @@ def insert_search(document_list): worker = threading.Thread(target=start_worker) worker.start() - print("Setting Up Environment ...") - - if not production_environment: - filer_query = {"cik": DEBUG_CIK} - - logs.delete_one(filer_query) - filers.delete_one(filer_query) - filings.delete_many(filer_query) - print("Done!") diff --git a/backend/routers/worker.py b/backend/routers/worker.py index 5387e591..22e311dc 100644 --- a/backend/routers/worker.py +++ b/backend/routers/worker.py @@ -26,6 +26,8 @@ class Config: worker_concurrency = WORKERS conccurrency = 4 broker_connection_retry_on_startup = True + celery_task_always_eager = False if production_environment else True + queue = Celery("worker", broker=BROKER) queue.config_from_object(Config) diff --git a/backend/search/Dockerfile b/backend/search/Dockerfile.dev similarity index 50% rename from backend/search/Dockerfile rename to backend/search/Dockerfile.dev index 0bb6dced..e346e6ce 100644 --- a/backend/search/Dockerfile +++ b/backend/search/Dockerfile.dev @@ -3,8 +3,7 @@ FROM getmeili/meilisearch:v1.5 WORKDIR /search COPY ./config.toml /search/config.toml -ARG MEILI_MASTER_KEY EXPOSE 7700 -CMD ["meilisearch", "--master-key=qq80RvopBK1kjvdlSVG_8VaxsRZICP0uniq5F2v0nlM"] +CMD ["meilisearch"] diff --git a/backend/search/Dockerfile.prod b/backend/search/Dockerfile.prod new file mode 100644 index 00000000..e69de29b