import gym
import torch

env = gym.make("CartPole-v1")

model = torch.nn.Sequential(
    torch.nn.Linear(4, 2))
model.load_state_dict(torch.load("entropy.pt"))

state0 = torch.tensor(env.reset(), dtype = torch.float)
done1 = False
while not done1:
    env.render()
    preference0 = model(state0)
    policy0 = torch.nn.functional.softmax(preference0, -1)
    action0 = torch.multinomial(policy0, 1).item()
    state1, reward1, done1, info = env.step(action0)
    state1 = torch.tensor(state1, dtype = torch.float)
    state0 = state1

env.close()
