Skip to content

Commit

Permalink
Add script to query Danni for the number of valid Dans at a given dat…
Browse files Browse the repository at this point in the history
…e in a specific Batch (kubeflow#41)

* Add script to query Danni for the number of valid Dans at a given date

* Add launcher for danni_count_batch.py
  • Loading branch information
Jerome-Kaleido authored Oct 8, 2021
1 parent b9466f7 commit a729c53
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 0 deletions.
77 changes: 77 additions & 0 deletions core/bin/danni_count_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
import time
from datetime import datetime
import argparse
import requests
import json

from kaleido.data.danni.connection import DanniConnection


def main():
parser = argparse.ArgumentParser(description='Count how many valid Dans belong to a batch, at a specific date.')
parser.add_argument('-u', '--user', default=None, type=str, help='danni user.')
parser.add_argument('-t', '--token', default=None, type=str, help='danni password.')
parser.add_argument('-b', '--batch', required=True, type=str, help='batch name.')
parser.add_argument('-d', '--date', default=datetime.today().strftime('%Y-%m-%d'), type=str, help='Get Dans up until this date, in format %Y-%m-%d')
args = parser.parse_args()

if "DANNI_USER" not in os.environ and args.user is None:
raise RuntimeError(
"Missing user credential for Danni. Either as environment variable DANNI_USER or as argument --user")
if "DANNI_TOKEN" not in os.environ and args.token is None:
raise RuntimeError(
"Missing token credential for Danni. Either as environment variable DANNI_TOKEN or as argument --token")

os.environ["DANNI_HOST"] = "https://danni.kaleido.ai/"
if args.user:
os.environ["DANNI_USER"] = args.user
if args.token:
os.environ["DANNI_TOKEN"] = args.token

# Compute date
try:
dt_start = datetime.strptime(str(args.date), '%Y-%m-%d')
except ValueError:
print("Incorrect format for argument date")
exit(1)

date_boundary = int((dt_start - datetime(1970, 1, 1)).total_seconds())
print(f"{args.date} -> {date_boundary} seconds since epoch.")

filter_dict = {"image.remove_background.batches": args.batch, "to_delete": False,
"$and": [{"image.remove_background.alpha.qc.status": "ok"},
{"image.remove_background.alpha.qc.created_at": {"$lte": date_boundary}}]}

params = {"filter": json.dumps(filter_dict), "bucket_uris": True, "limit": 1000}

# create connection
conn = DanniConnection()

page = 1
num_samples = 0

while True:

params["page"] = page

try:
data = conn.req(requests.get, "/api/dans", params=params)
except Exception as e:
print("got exception while fetching danni data: {}".format(e))
break

# no more dans, break
if not data:
break

num_samples += len(data)
print(f"Page {page} -> {num_samples} samples")

page += 1

print(f"Until {args.date}, {num_samples} valid images in batch \"{args.batch}\"")


if __name__ == '__main__':
main()
8 changes: 8 additions & 0 deletions core/scripts/danni_count_batch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
export DANNI_USER=$(cat ~/Documents/danni_auth_user.txt)
export DANNI_TOKEN=$(cat ~/Documents/danni_auth_token.txt)

BATCH=Carl
DATE=2021-08-16

python ../bin/danni_count_batch.py --batch=${BATCH} --date=${DATE}

0 comments on commit a729c53

Please sign in to comment.