Skip to content

Commit

Permalink
DOCS-modin-project#1835: add runner of taxi benchmark as example (mod…
Browse files Browse the repository at this point in the history
…in-project#1836)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored and aregm committed Sep 16, 2020
1 parent b89205b commit bebb062
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions examples/cluster/taxi-runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


# pip install git+https://github.com/intel-go/ibis.git@develop

# NOTE: expects https://github.com/intel-go/omniscripts/ checked out and in PYTHONPATH

# the following import turns on experimental mode in Modin,
# including enabling running things in remote cloud
import modin.experimental.pandas as pd # noqa: F401
from modin.experimental.cloud import create_cluster, get_connection

from taxi import run_benchmark as run_benchmark
from taxi.taxibench_pandas_ibis import etl_pandas

test_cluster = create_cluster(
"aws",
"aws_credentials",
cluster_name="rayscale-test",
region="eu-north-1",
zone="eu-north-1b",
image="ami-00e1e82d7d4ca80d3",
)
with test_cluster:
conn = get_connection()
np = conn.modules["numpy"]
etl_pandas.__globals__["np"] = np

parameters = {
"data_file": "https://modin-datasets.s3.amazonaws.com/trips_data.csv",
# "data_file": "s3://modin-datasets/trips_data.csv",
"dfiles_num": 1,
"validation": False,
"no_ibis": True,
"no_pandas": False,
"pandas_mode": "Modin_on_ray",
"ray_tmpdir": "/tmp",
"ray_memory": 1024 * 1024 * 1024,
}

run_benchmark(parameters)

0 comments on commit bebb062

Please sign in to comment.