Skip to content

Commit

Permalink
fix no_restart
Browse files Browse the repository at this point in the history
fix ci

fix autoscale
  • Loading branch information
chaokunyang committed Nov 1, 2021
1 parent 0692169 commit f169b41
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 47 deletions.
31 changes: 4 additions & 27 deletions .upload.aci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ upload_py36_wheel_to_oss:
inputs:
image: ${ACI_VAR_MARS_IMAGES_FOR_RELEASE_PY36}
command:
- bash ci/antbuild.sh
- |
python setup.py bdist_wheel
pluginConfig:
appname: mars
source: ray-project
Expand Down Expand Up @@ -79,7 +80,8 @@ upload_py38_wheel_to_oss:
inputs:
image: ${ACI_VAR_MARS_IMAGES_FOR_RELEASE_PY38}
command:
- bash ci/antbuild.sh
- |
python setup.py bdist_wheel
pluginConfig:
appname: mars
source: ray-project
Expand All @@ -91,28 +93,3 @@ upload_py38_wheel_to_oss:
only:
change:
- ^mars/*

upload_py39_wheel_to_oss:
stage: 上传wheel包
aciTags: DOCKER
agent:
resourceClass: L
steps:
- plugin: clone
- plugin: shell
defaultWebTerminal: true
inputs:
image: ${ACI_VAR_MARS_IMAGES_FOR_RELEASE_PY39}
command:
- bash ci/antbuild.sh
pluginConfig:
appname: mars
source: ray-project
type: UT
publisher:
archiveArtifacts:
artifacts: '**/dist/pymars-*-cp39-cp39-linux_x86_64.whl'
allowEmptyArchive: false
only:
change:
- ^mars/*
16 changes: 3 additions & 13 deletions ci/antbuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,14 @@ $OSS_HTTP_PREFIX/$(ls pyodps*)
EOF

echo "Upload files..."
FILES=(dist/"$(ls dist)" dsmp/dist/"$(ls dsmp/dist)" "$(ls pyodp*)" package.txt requirements.txt
client/mars_driver.py client/odps_op_with_dsmp.py)
FILES=(dist/"$(ls dist)" dsmp/dist/"$(ls dsmp/dist)" "$(ls pyodp*)" package.txt requirements.tx client/mars_driver.py client/odps_op_with_dsmp.py)
for f in "${FILES[@]}"; do
set +x

FILE_NAME=$(basename $f)
if [[ $FILE_NAME == pymars-* ]]; then
IFS='-' read -ra wheel_name_array <<< "$FILE_NAME"
# If in release branch, wheel_name_array[1] == $VERSION
wheel_name_array[1]=$VERSION
WHEEL_NAME=$(IFS="-" ; echo "${wheel_name_array[*]}")
else
WHEEL_NAME=$FILE_NAME
fi
echo "Upload $WHEEL_NAME to oss"

echo "Upload $FILE_NAME to oss"
osscmd put --headers="x-oss-forbid-overwrite:$FORBID_OVERWRITE" "$WORKSPACE_DIR/$f" \
oss://rayoltest/"$PKG_PATH/$WHEEL_NAME" \
oss://rayoltest/"$PKG_PATH/$FILE_NAME" \
config --id=LTAI4GKK6Chbk8gNULPpYV4j --key="${ACI_VAR_MARS_PACK_OSS_KEY}" \
--host=oss-cn-hangzhou-zmf.aliyuncs.com;
set -x
Expand Down
2 changes: 1 addition & 1 deletion mars/deploy/oscar/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ storage:
default_config:
transfer_block_size: 5 * 1024 ** 2
plasma:
store_memory: 20%
store_memory: 1%
"@overriding_fields": ["backends"]
meta:
store: dict
Expand Down
4 changes: 4 additions & 0 deletions mars/deploy/oscar/tests/fault_injection_config.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"@inherits": '@default'
third_party_modules:
- mars.services.tests.fault_injection_patch
storage:
backends: [plasma]
plasma:
store_memory: 1%
2 changes: 2 additions & 0 deletions mars/deploy/oscar/tests/fault_injection_config_with_rerun.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ scheduling:
storage:
# shared-memory38 may lose object if the process crash after put success.
backends: [plasma]
plasma:
store_memory: 1%
8 changes: 4 additions & 4 deletions mars/deploy/oscar/tests/test_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def time_consuming(x):
assert await autoscaler_ref.get_dynamic_worker_nums() > 0


@pytest.mark.timeout(timeout=120)
@pytest.mark.timeout(timeout=240)
@pytest.mark.parametrize('ray_large_cluster', [{'num_nodes': 4}], indirect=True)
@require_ray
@pytest.mark.asyncio
Expand Down Expand Up @@ -422,7 +422,7 @@ async def test_auto_scale_in(ray_large_cluster):
assert await autoscaler_ref.get_dynamic_worker_nums() == 2


@pytest.mark.timeout(timeout=120)
@pytest.mark.timeout(timeout=240)
@pytest.mark.parametrize('ray_large_cluster', [{'num_nodes': 4}], indirect=True)
@require_ray
@pytest.mark.asyncio
Expand All @@ -431,7 +431,7 @@ async def test_ownership_when_scale_in(ray_large_cluster):
worker_num=0,
worker_cpu=2,
worker_mem=200 * 1024 ** 2,
supervisor_mem=1 * 1024 ** 3,
supervisor_mem=200 * 1024 ** 2,
config={
'scheduling.autoscale.enabled': True,
'scheduling.autoscale.scheduler_check_interval': 1,
Expand All @@ -444,7 +444,7 @@ async def test_ownership_when_scale_in(ray_large_cluster):
autoscaler_ref = mo.create_actor_ref(
uid=AutoscalerActor.default_uid(), address=client._cluster.supervisor_address)
await asyncio.gather(*[autoscaler_ref.request_worker() for _ in range(2)])
df = md.DataFrame(mt.random.rand(100, 4, chunk_size=2), columns=list('abcd'))
df = md.DataFrame(mt.random.rand(400, 4, chunk_size=2), columns=list('abcd'))
print(df.execute())
assert await autoscaler_ref.get_dynamic_worker_nums() > 1
while await autoscaler_ref.get_dynamic_worker_nums() > 1:
Expand Down
2 changes: 1 addition & 1 deletion mars/oscar/backends/mars/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ async def _create_sub_pool(
await pool.join()

async def kill_sub_pool(self, process: multiprocessing.Process,
force: bool = False, *_):
force: bool = False, no_restart: bool = False):
if 'COV_CORE_SOURCE' in os.environ and not force and not _is_windows: # pragma: no cover
# must shutdown gracefully, or coverage info lost
try:
Expand Down
2 changes: 1 addition & 1 deletion mars/oscar/backends/test/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ async def _create_sub_pool(
await pool.join()

async def kill_sub_pool(self, process: multiprocessing.Process,
force: bool = False):
force: bool = False, no_restart: bool = False):
process.cancel()

async def is_sub_pool_alive(self, process: multiprocessing.Process):
Expand Down

0 comments on commit f169b41

Please sign in to comment.