from tf_agents.specs import array_spec
from tf_agents.policies import random_py_policy
import numpy as np

from tf_agents.policies import scripted_py_policy

#Random Python Policy
action_spec = array_spec.BoundedArraySpec(shape=(1,), dtype=np.int32, minimum=0, maximum=10)
my_random_py_policy = random_py_policy.RandomPyPolicy(time_step_spec=None,
    action_spec=action_spec)
time_step = None
action_step = my_random_py_policy.action(time_step)
print(action_step)
action_step = my_random_py_policy.action(time_step)
print(action_step)

print("*"*100)

#Scripted Python Policy
action_spec = array_spec.BoundedArraySpec((2,), np.int32, -10, 10)
action_script = [(1, np.array([5, 2], dtype=np.int32)),
                 (0, np.array([0, 0], dtype=np.int32)), # Setting `num_repeates` to 0 will skip this action.
                 (2, np.array([1, 2], dtype=np.int32)),
                 (1, np.array([3, 4], dtype=np.int32))]

my_scripted_py_policy = scripted_py_policy.ScriptedPyPolicy(
    time_step_spec=None, action_spec=action_spec, action_script=action_script)

policy_state = my_scripted_py_policy.get_initial_state()
time_step = None
print('Executing scripted policy...')
action_step = my_scripted_py_policy.action(time_step, policy_state)
print(action_step.action[0])
action_step= my_scripted_py_policy.action(time_step, action_step.state)
print(action_step.action[0])
action_step = my_scripted_py_policy.action(time_step, action_step.state)
print(action_step.action[0])
action_step = my_scripted_py_policy.action(time_step, action_step.state)
print(action_step.action[0])


print('Resetting my_scripted_py_policy...')
policy_state = my_scripted_py_policy.get_initial_state()
action_step = my_scripted_py_policy.action(time_step, policy_state)
print(action_step)
print("*"*100)





