Skip to content

Commit

Permalink
example(dataset): add caltech-101 dataset (#1631)
Browse files Browse the repository at this point in the history
add caltech-101 dataset
  • Loading branch information
anda-ren authored Dec 20, 2022
1 parent 398cc6a commit 2d9b8b1
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 0 deletions.
29 changes: 29 additions & 0 deletions example/datasets/caltech_101/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
title: The `caltech-101` Dataset
---

## The Caltech 101 Dataset Description

- [Homepage](https://data.caltech.edu/records/mzrjq-6wc02)

## The `caltech-101` dataset Structure

### Data Fields

- `data`: `starwhale.Image` loaded as bytes array
- `annotations` of type dict:
- `label`: the label for the image

## Build `caltech-101` Dataset locally

```shell
python3 dataset.py
```

## Example

Output the first 1 record of the `caltech-101` dataset.

```shell
python3 example.py
```
47 changes: 47 additions & 0 deletions example/datasets/caltech_101/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import json

import requests

from starwhale import Link, Image, dataset, MIMEType # noqa: F401
from starwhale.utils.retry import http_retry

PATH_ROOT = "https://starwhale-examples.oss-cn-beijing.aliyuncs.com/dataset/caltech-101"
DATA_PATH = "101_ObjectCategories"


@http_retry
def request_link_text(anno_link):
return requests.get(anno_link, timeout=10).text


def build_ds():
ds = dataset("caltech-101", create=True)
tree = json.loads(request_link_text(f"{PATH_ROOT}/tree.json"))
for dir in tree:
if DATA_PATH not in dir.get("name", ""):
continue
for d in dir["contents"]:
if d["type"] != "directory":
continue
category = d["name"]
for f in d["contents"]:
_name = f["name"]
ds.append(
(
f"{category}/{_name}",
Link(
uri=f"{PATH_ROOT}/{DATA_PATH}/{category}/{_name}",
data_type=Image(
display_name=_name, mime_type=MIMEType.JPEG
),
with_local_fs_data=False,
),
{"label": category},
)
)
ds.commit()
ds.close()


if __name__ == "__main__":
build_ds()
16 changes: 16 additions & 0 deletions example/datasets/caltech_101/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import io

from PIL import Image as PILImage
from PIL import ImageDraw

from starwhale import dataset

ds_name = "caltech-101/version/latest"
ds = dataset(ds_name)
row = ds.fetch_one()
data = row.data
annotations = row.annotations
with PILImage.open(io.BytesIO(data.fp)) as img:
draw = ImageDraw.Draw(img)
draw.text((28, 36), annotations["label"], fill="red")
img.show()

0 comments on commit 2d9b8b1

Please sign in to comment.