#input 값
#return 값 -line7 from humanoid.py
return self.__get__obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
Input
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
#mujoco_py에서 sim 가져옵니다.
class mujoco_py.MjSim(model, data=None, nsubsteps=1, udd_callback=None)
MjSim represents a running simulation including its state.
Similar to Gym’s MujocoEnv
, it internally wraps a PyMjModel
and a PyMjData
.
self.init_qpos = self.sim.data.qpos.ravel().copy() # 다차원 배열을 1차원 배열로
self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0]
high = bounds[:, 1]
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
high = np.inf*np.ones(self.obs_dim)
low = -high
self.observation_space = spaces.Box(low, high, dtype=np.float32)
self.seed() # random위해 seed 생성
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
data에 들어있는 것은
-
act
-
act_dot
-
active_contacts_efc_pos
-
actuator_force
-
actuator_length
-
actuator_moment
-
actuator_velocity
-
body_jacp
-
body_jacr
-
body_xmat
-
body_xpos
-
body_xquat
-
body_xvelp
-
body_xvelr
-
cacc
-
cam_xmat
-
cam_xpos
-
cdof
-
cdof_dot
-
cfrc_ext
-
cfrc_int
-
cinert
-
contact
-
crb
-
ctrl
-
cvel
-
efc_AR
-
efc_AR_colind
-
efc_AR_rowadr
-
efc_AR_rownnz
-
efc_D
-
efc_J
-
efc_JT
-
efc_JT_colind
-
efc_JT_rowadr
-
efc_JT_rownnz
-
efc_J_colind
-
efc_J_rowadr
-
efc_J_rownnz
-
efc_R
-
efc_aref
-
efc_b
-
efc_diagApprox
-
efc_force
-
efc_frictionloss
-
efc_id
-
efc_margin
-
efc_solimp
-
efc_solref
-
efc_state
-
efc_type
-
efc_vel
-
energy
-
geom_jacp
-
geom_jacr
-
geom_xmat
-
geom_xpos
-
geom_xvelp
-
geom_xvelr
-
light_xdir
-
light_xpos
-
maxuse_con
-
maxuse_efc
-
maxuse_stack
-
mocap_pos
-
mocap_quat
-
nbuffer
-
ncon
-
ne
-
nefc
-
nf
-
nstack
-
pstack
-
qLD
-
qLDiagInv
-
qLDiagSqrtInv
-
qM
-
qacc
-
qacc_unc
-
qacc_warmstart
-
qfrc_actuator
-
qfrc_applied
-
qfrc_bias
-
qfrc_constraint
-
qfrc_inverse
-
qfrc_passive
-
qfrc_unc
-
qpos
-
qvel
-
sensordata
-
set_joint_qpos
-
set_joint_qvel
-
set_mocap_pos
-
set_mocap_quat
-
site_jacp
-
site_jacr
-
site_xmat
-
site_xpos
-
site_xvelp
-
site_xvelr
-
solver
-
solver_fwdinv
-
solver_iter
-
solver_nnz
-
subtree_angmom
-
subtree_com
-
subtree_linvel
-
ten_length
-
ten_moment
-
ten_velocity
-
ten_wrapadr
-
ten_wrapnum
-
time
-
timer
-
userdata
-
warning
-
wrap_obj
-
wrap_xpos
-
xanchor
-
xaxis
-
xfrc_applied
-
ximat
-
xipos
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
# observation은
>>> print(sim.data.qpos)
[0. 0. 1.4 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
>>> print(sim.data.qvel)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
>>> print(len(sim.data.qpos))
28
>>> print(len(sim.data.qvel))
27
Import gym
>>> import gym
>>> import numpy as np
>>> env = gym.make('Humanoid-v2')
>>> env.reset()
array([ 1.40919187e+00, 9.99939372e-01, -7.71515557e-03, 6.19486342e-04,
-7.83231679e-03, 7.32990013e-03, 1.10215862e-03, 9.22152196e-03,
-3.53687229e-03, -9.47177185e-03, 2.24031908e-03, -7.53064783e-03,
-9.78305413e-03, -7.90378196e-03, -2.35972876e-03, 7.21348110e-03,
-9.63757539e-03, 9.81365608e-03, -5.37928060e-03, -5.45530816e-03,
4.62269139e-03, -2.30846754e-03, -6.07179910e-03, -7.80136491e-03,
-4.62899115e-03, 3.13089568e-03, -4.99864454e-03, -9.77169365e-03,
7.79317196e-03, -7.51343512e-03, -8.70429433e-03, 9.04579738e-03,
8.86997241e-03, 1.35371253e-04, 2.62731035e-03, -5.96273831e-04,
-4.16089135e-03, -1.42343764e-03, 9.92559119e-03, 7.25291998e-03,
2.39257416e-03, 3.21586491e-03, 8.03763845e-03, -8.14553818e-03,
-1.73986608e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.23048629e+00,
2.22138142e+00, 3.60175818e-02, 5.08580438e-04, 6.92649338e-02,
-2.28067340e-02, -1.45719862e-01, 4.22829666e-02, 4.15173691e+00,
8.32207894e+00, 8.75941005e-02, 8.52586837e-02, 8.65669965e-03,
-9.48430053e-07, 1.03256258e-02, -1.73086837e-04, -5.19991100e-02,
1.15694768e-03, 4.04227184e-01, 2.03575204e+00, 4.42095777e-02,
4.22583228e-02, 4.96572605e-02, -3.53365805e-04, 8.86839205e-03,
2.62871584e-04, -2.65017636e-01, -5.25535349e-03, 1.95838275e-01,
5.85278711e+00, 2.47825773e-01, 2.07245699e-01, 5.44878712e-02,
-1.11894233e-02, -1.98573257e-02, -7.68599391e-02, -1.14165924e-01,
-4.43381354e-01, -7.95774731e-01, 4.52555626e+00, 8.75130894e-01,
8.52177371e-01, 2.94415167e-02, -6.39981824e-03, -3.70873002e-02,
-1.42505935e-01, -6.61732318e-02, -2.54556309e-01, -1.47121696e+00,
2.63249442e+00, 1.03880128e+00, 1.02280378e+00, 2.23579875e-02,
-4.52699073e-03, -3.48314474e-02, -1.32260552e-01, -4.58998850e-02,
-1.74289172e-01, -1.34101095e+00, 1.76714587e+00, 2.46552906e-01,
2.08662074e-01, 5.07748816e-02, 9.76573369e-03, -1.78664934e-02,
7.40836067e-02, -1.03463383e-01, 4.26646504e-01, -8.01292363e-01,
4.52555626e+00, 8.75600634e-01, 8.54897329e-01, 2.56875352e-02,
4.42418949e-03, -2.69687268e-02, 1.33681550e-01, -4.88395732e-02,
2.38539846e-01, -1.47432285e+00, 2.63249442e+00, 1.03947655e+00,
1.02516668e+00, 1.91349593e-02, 2.50097375e-03, -2.08164387e-02,
1.22639095e-01, -2.73892105e-02, 1.61362279e-01, -1.34307207e+00,
1.76714587e+00, 4.21369777e-01, 3.29555083e-01, 1.13780212e-01,
2.88445051e-02, -3.97973027e-02, 1.70819412e-01, 9.88779871e-02,
-3.94461574e-01, 7.07453341e-01, 1.59405984e+00, 3.26212158e-01,
3.46453814e-01, 1.67310656e-01, 7.44276068e-02, -1.52998263e-01,
1.27011610e-01, 3.27153627e-01, -2.87772277e-01, 5.45640552e-01,
1.19834313e+00, 4.11293226e-01, 3.17421761e-01, 1.18596914e-01,
-3.15872654e-02, -4.27927247e-02, -1.70411037e-01, 1.08758061e-01,
4.02529474e-01, 6.91480909e-01, 1.59405984e+00, 3.12298760e-01,
3.36511289e-01, 1.70509615e-01, -7.59998737e-02, -1.51845891e-01,
-1.23013870e-01, 3.33878141e-01, 2.87198022e-01, 5.30312746e-01,
1.19834313e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.03897265e-03,
-5.19718377e-03, -9.69684886e-03, -3.73163767e-03, -6.55453620e-03,
-4.56384745e-03, 2.95585284e-03, -1.25888048e-02, -1.78848202e-03,
-1.77108184e-03, -6.37364953e-03, -4.37417405e-03, -5.74800641e-03,
-1.25170983e-02, -1.83779786e-03, -1.78063829e-03, -7.53750238e-03,
-4.37980576e-03, 3.25020748e-03, -1.23691521e-02, 7.08154301e-03,
-2.68338727e-03, -7.35882667e-03, -3.47203757e-03, 3.20353603e-03,
-1.49959237e-02, 7.10708463e-03, -3.70730737e-03, -7.34005333e-03,
-3.41231334e-03, 3.20353603e-03, -1.49959237e-02, 7.10708463e-03,
-3.70730737e-03, -7.34005333e-03, -3.41231334e-03, -5.17590491e-03,
-1.39599905e-02, 2.32139831e-03, -1.38037683e-03, -7.44075880e-03,
-4.40130021e-03, -5.17902649e-03, -2.38855204e-02, 2.28666895e-03,
-5.25514174e-03, -7.44027562e-03, -4.19111384e-03, -5.17902649e-03,
-2.38855204e-02, 2.28666895e-03, -5.25514174e-03, -7.44027562e-03,
-4.19111384e-03, 8.97333714e-03, -3.93557516e-03, -5.05778417e-03,
-5.15276479e-03, -3.37980035e-03, -3.60929896e-03, 8.92324685e-03,
-6.15537201e-03, -2.73146090e-03, -5.19875253e-03, -3.77332862e-03,
-3.98579708e-03, 9.42994859e-03, -1.44019552e-02, -1.20165546e-02,
6.10358184e-04, -3.28620450e-03, -5.57029198e-03, 9.45643751e-03,
-1.31477215e-02, -1.08110131e-02, 6.15788761e-04, -3.48114641e-03,
-5.36759563e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00])
>>> len(env.reset())
376
>>> env2 = gym.make('FetchReach-v0')
>>> len(env2.reset())
3
>>> env2.reset()
{'observation': array([ 1.34184371e+00, 7.49100477e-01, 5.34717228e-01, 1.89027457e-04,
7.77191143e-05, 3.43749435e-06, -1.26100357e-08, -9.04671898e-08,
4.55387076e-06, -2.13287826e-06]), 'achieved_goal': array([1.34184371, 0.74910048, 0.53471723]), 'desired_goal': array([1.34356719, 0.68918438, 0.65263931])}
>>> env3 = gym.make('Humanoid-v2')
>>> len(env3.reset())
376
>>> env3.reset()
array([ 1.39758085e+00, 9.99928195e-01, -6.80477034e-03, -5.52128948e-03,
8.17407325e-03, 9.57742986e-03, -2.06755463e-03, -4.98516752e-03,
-4.22545190e-04, -8.14896674e-03, 7.21117984e-03, 2.71079697e-03,
4.81616984e-03, -2.75377264e-03, -1.99285924e-03, 5.39214170e-03,
4.54261420e-03, 4.31337441e-03, 2.64508200e-03, 3.82645025e-03,
-9.27845242e-03, 8.67776269e-03, -3.75362974e-04, 2.73493928e-03,
-3.95066969e-03, 7.01573648e-03, -6.58934185e-03, 6.14901783e-03,
3.65067188e-03, -9.99626086e-03, 1.65606996e-03, -3.11581211e-03,
-7.99289266e-03, -3.34975850e-03, -7.94484353e-03, 3.47208608e-03,
-6.70389044e-03, -4.56555053e-03, -7.86088592e-04, 4.38323916e-03,
-8.04447470e-03, 9.87397096e-03, -5.99994562e-03, 4.24977584e-03,
1.03941086e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.22762477e+00,
2.22058522e+00, 3.87577890e-02, 1.83185348e-03, 1.00029290e-01,
-3.55013090e-02, -2.04401169e-01, 6.80114182e-02, 4.14845975e+00,
8.32207894e+00, 8.72422420e-02, 8.52196365e-02, 9.02902236e-03,
3.27300186e-04, 1.15439215e-02, -1.55892575e-03, -5.82822680e-02,
8.12564638e-03, 4.03265543e-01, 2.03575204e+00, 4.39137486e-02,
4.22425199e-02, 4.99092316e-02, 5.62374157e-04, 8.75772271e-03,
1.16944094e-04, -2.67848236e-01, 4.44685999e-03, 1.91513844e-01,
5.85278711e+00, 2.46169721e-01, 2.04879963e-01, 5.30881631e-02,
-8.85550102e-03, -1.50975075e-02, -7.67065212e-02, -9.06491922e-02,
-4.41695865e-01, -7.92021558e-01, 4.52555626e+00, 8.74159029e-01,
8.48468620e-01, 2.99325316e-02, -3.73323772e-03, -2.04927199e-02,
-1.47077099e-01, -3.74850095e-02, -2.62736468e-01, -1.46891355e+00,
2.63249442e+00, 1.03807767e+00, 1.01933390e+00, 2.31372856e-02,
-1.98890905e-03, -1.45561570e-02, -1.38706888e-01, -1.92051637e-02,
-1.83007678e-01, -1.33937170e+00, 1.76714587e+00, 2.46609326e-01,
2.10996335e-01, 4.87667759e-02, 9.75632076e-03, -1.75113699e-02,
7.10472946e-02, -1.05741226e-01, 4.14488006e-01, -8.07889553e-01,
4.52555626e+00, 8.73210407e-01, 8.57752669e-01, 1.95943073e-02,
2.74493919e-03, -1.89555428e-02, 1.14352175e-01, -3.50702322e-02,
2.04799348e-01, -1.47727240e+00, 2.63249442e+00, 1.03692960e+00,
1.02755607e+00, 1.35511932e-02, 9.77217184e-04, -1.01567919e-02,
9.84721819e-02, -1.33460653e-02, 1.29392842e-01, -1.34485577e+00,
1.76714587e+00, 4.13484662e-01, 3.27985562e-01, 1.08516869e-01,
2.85192478e-02, -4.01916413e-02, 1.65477874e-01, 1.00565021e-01,
-3.83430859e-01, 7.04984965e-01, 1.59405984e+00, 3.18164650e-01,
3.45510159e-01, 1.58640156e-01, 6.96110260e-02, -1.52458942e-01,
1.18814977e-01, 3.26428209e-01, -2.70376487e-01, 5.45274981e-01,
1.19834313e+00, 4.19994145e-01, 3.18419339e-01, 1.21286265e-01,
-2.71550487e-02, -3.43929060e-02, -1.75690562e-01, 8.84895015e-02,
4.12111052e-01, 6.95853228e-01, 1.59405984e+00, 3.28165434e-01,
3.36703997e-01, 1.67112062e-01, -7.52935180e-02, -1.47021039e-01,
-1.31230544e-01, 3.16545830e-01, 2.99872185e-01, 5.40972220e-01,
1.19834313e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.05301168e-03,
-6.38951357e-03, 6.31502552e-03, 2.60046669e-03, 6.10315722e-03,
-3.86631328e-03, 7.25605952e-03, -1.63323665e-02, 1.01033080e-02,
5.23461480e-03, 6.26927027e-03, -3.57151489e-03, 8.91120334e-03,
-1.62890908e-02, 1.01378602e-02, 5.22896229e-03, 6.49053361e-03,
-3.57787041e-03, 6.02673874e-03, -1.98686046e-02, 2.14708123e-03,
6.00546433e-03, 6.32627343e-03, -3.78458630e-03, 5.88702298e-03,
-1.19264357e-02, 1.99552288e-03, 9.10204302e-03, 6.37789731e-03,
-3.93393605e-03, 5.88702298e-03, -1.19264357e-02, 1.99552288e-03,
9.10204302e-03, 6.37789731e-03, -3.93393605e-03, 5.42674458e-03,
-2.07874534e-02, 1.68741615e-02, 5.86670403e-03, 6.70616301e-03,
-3.10399553e-03, 5.40442836e-03, -2.00019024e-02, 1.68555344e-02,
6.17255563e-03, 6.71455548e-03, -3.11649031e-03, 5.40442836e-03,
-2.00019024e-02, 1.68555344e-02, 6.17255563e-03, 6.71455548e-03,
-3.11649031e-03, 1.05313501e-02, 1.11358849e-03, 2.37408908e-03,
-6.68340541e-04, 7.83643986e-03, -3.45144266e-03, 1.05936903e-02,
-5.79814367e-03, 9.42528576e-03, -6.80233913e-04, 6.68454613e-03,
-4.58044811e-03, 2.05074054e-03, -1.01861117e-03, 6.75164098e-03,
-9.20476101e-05, 3.53384056e-03, -3.10852410e-03, 2.07318103e-03,
-1.76104710e-03, 6.02455071e-03, -1.02303955e-04, 3.65504057e-03,
-3.23259865e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00])
>>> env4 = gym.make('HalfCheetah-v2')
>>> len(env4.reset())
17
>>> env4.reset()
array([-0.01354274, 0.09147245, 0.01138785, 0.04450717, 0.06320186,
-0.02112325, -0.01080774, 0.09767808, 0.05346551, 0.07971385,
-0.09114062, 0.00974805, 0.02388392, 0.09041859, 0.16462125,
-0.00306638, -0.21058064])
>>> env5 = gym.make('Hopper-v2')
>>> env5.reset()
array([ 1.24865273e+00, 2.84226244e-03, -3.36845412e-03, -1.87042456e-03,
-4.85806257e-03, -3.21682134e-03, -4.93706795e-03, -6.09194722e-04,
4.74014768e-03, 2.52174078e-03, -3.99487536e-03])
>>> len(env5.reset())
11