-
Notifications
You must be signed in to change notification settings - Fork 2
/
cubeplusplus_demo.py
76 lines (64 loc) · 2.74 KB
/
cubeplusplus_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys
import time
import numpy as np
import pandas as pd
import seaborn as sns
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import matplotlib.pylab as plt
from pathlib import Path
from presto import pipeline
from presto.analysis import StrategyAnalysis
from presto.strategy import Strategy
thread_shard_count = int(sys.argv[1])
compression_type = str(sys.argv[2])
sample_count = int(sys.argv[3])
runs = int(sys.argv[4])
pipeline_mod = str(sys.argv[5])
if (pipeline_mod == 'jpg'):
from cubeplusplus_pipeline_jpg import pipeline_definition
source_path = "/dataset/JPG"
log_path = "/logs/jpg"
datatype = "JPG"
else:
from cubeplusplus_pipeline_png import pipeline_definition
source_path = "/dataset/PNG"
log_path = "/logs/png"
datatype = "PNG"
storage_type = "remote"
target_path = "/tmp"
# define pipeline with the source path
imagenet_pipeline = pipeline_definition(source_path)
imagenet_loading_pipeline_op = imagenet_pipeline[0]["op"]
imagenet_pipeline_steps = list(range(len(imagenet_pipeline)))
# del imagenet_pipeline_steps[3] # remove the 3-resized strategy for application cache
# del imagenet_pipeline_steps[2] # remove the 2-decode-image strategy for application cache
del imagenet_pipeline_steps[1] # remove the 1-list-files strategy from profiling
# del imagenet_pipeline_steps[0] # remove the 0-fully-online strategy for compression
thread_counts = [thread_shard_count]
shard_counts = [thread_shard_count]
thread_shard_counts = zip(thread_counts, shard_counts)
strategies = [ Strategy(
pipeline = imagenet_pipeline
, split_position = None if step == 0 else step
, shard_count = shard_count
, thread_count = thread_count
, shard_directory_prefix = f"{target_path}/cubeplusplus-{datatype}-split"
, compression_type = compression_type
, storage_type = storage_type)
for thread_count, shard_count in thread_shard_counts
for step in imagenet_pipeline_steps]
sample_counts = [sample_count]
runs_total = runs
for sample_count in sample_counts:
for strategy in strategies:
strategy.profile_strategy(sample_count = sample_count
, runs_total = runs_total
, system_cache_enabled = True)
strategy.print_stats()
strategy_dfs = [strat.profile_as_df() for strat in strategies]
dstat_dfs = [strat.profile_as_dstat_df() for strat in strategies]
strat_analysis = StrategyAnalysis(strategy_dataframes = strategy_dfs
, dstat_dataframes = dstat_dfs)
strat_analysis.save_dfs_as_csv(path=log_path, prefix=f"cubeplusplus-{datatype}")