You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/worker.py", line 458, in _run_safely
results = func()
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/worker.py", line 391, in _func
_results = func(*args) if is_apply_func else func(args)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/worker.py", line 571, in _helper_func_with_idx
return args[0], self._call_func(func, args[1])
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/worker.py", line 596, in _call_func
return func(*args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/parallel.py", line 205, in _subprocess_runner
router._supervised_runner(main_process, *args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/parallel.py", line 236, in _supervised_runner
main(*args, **kwargs)
File "/mnt/sda/share_space/xxx/DI-engine/ding/example/./ppo.py", line 56, in main
task.run()
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 206, in run
self.forward(fn)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 51, in runtime_handler
return func(task, *args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 274, in forward
g = fn(ctx)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 238, in forward
g = self.forward(fn, ctx, async_mode=False)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 51, in runtime_handler
return func(task, *args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/task.py", line 274, in forward
g = fn(ctx)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/middleware/functional/advantage_estimator.py", line 45, in _gae
data = ttorch_collate(data) # ttorch.Tensor
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/utils/data/collate_fn.py", line 30, in ttorch_collate
x = ttorch.stack(x)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/treevalue/tree/func/func.py", line 56, in _new_func
return _treelized(*args, **kwargs)
File "treevalue/tree/func/cfunc.pyx", line 181, in treevalue.tree.func.cfunc._w_func_treelize_run
File "treevalue/tree/func/cfunc.pyx", line 76, in treevalue.tree.func.cfunc._c_func_treelize_run
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/treetensor/torch/funcs/operation.py", line 356, in stack
return stream_call(torch.stack, tensors, *args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/treetensor/torch/stream.py", line 40, in stream_call
return func(*args, **kwargs)
TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not NoneType
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/xxx/miniconda3/envs/fjsp/bin/ditask", line 8, in <module>
sys.exit(cli_ditask())
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/entry/cli_ditask.py", line 67, in cli_ditask
return _cli_ditask(*args, **kwargs)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/entry/cli_ditask.py", line 148, in _cli_ditask
Parallel.runner(
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/ding/framework/parallel.py", line 129, in _runner
pool.map(cls._subprocess_runner, params_group)
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/pool.py", line 450, in map
results = self.map_unordered(
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/pool.py", line 522, in map_unordered
return list(self.imap_unordered(func, iterable_of_args, iterable_len, max_tasks_active, chunk_size,
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/pool.py", line 786, in imap_unordered
self._handle_exception()
File "/home/xxx/miniconda3/envs/fjsp/lib/python3.10/site-packages/mpire/pool.py", line 924, in _handle_exception
raise exception
TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not NoneType
The text was updated successfully, but these errors were encountered:
我尝试使用两种命令并行运行ding.exmaple.dqn文件,分别如下:
由于我已经在环境中安装di-engine包,第二个命令实际运行的是安装包中的dqn文件,而第一个命令运行的是我下载下来的源代码。现在出现如下问题:
当我运行第二个命令时,可以正常并行化训练,但是第一个命令却无法正常训练,会卡住。
类似的问题也出现在example中的ppo.py中,但是ppo出现的bug不一样,如下:
The text was updated successfully, but these errors were encountered: