python value iteration算法玩倒立摆(inverted pendulum)-FinClip官网

python value iteration算法玩倒立摆(inverted pendulum)

网友投稿 692 2022-10-02

python value iteration算法玩倒立摆(inverted pendulum)

最近用value iteration的方法实现了一下倒立摆，看一下效果：

pendulum

我这里分享一下我的实现，倒立摆的代码为：

# a few packages we need to importimport numpy as npimport matplotlib.pyplot as pltimport matplotlibimport matplotlib.animation as animationimport IPython class Pendulum: """ This class describes an inverted pendulum and provides some helper functions """ def __init__(self): """ constructor of the class """ #gravity constant self.g=9.81 # number of dimensions (angle and angular velocity) self.state_dims = 2 # the maximum velocity self.vmax = 6. # the range of allowable states self.state_range = np.array([[0, 2*np.pi],[-self.vmax, self.vmax]]) #simulation step self.delta_t = 0.1 # internal integration step self._internaldt = 0.01 self._integration_ratio = int(round(self.delta_t / self._internaldt)) def next_state(self,x,u): """ This function integrates the pendulum for one step of self.delta_t seconds Inputs: x: state of the pendulum (x,v) as a 2D numpy array u: control as a scalar Output: the state of the pendulum as a 2D numpy array at the end of the integration """ x_next = x[0] v_next = x[1] for i in range(self._integration_ratio): xx_next = (x_next + self._internaldt * v_next)%(2*np.pi) v_next = np.clip(v_next + self._internaldt * (u-self.g*np.sin(x_next)), -self.vmax, self.vmax) x_next = xx_next return np.array([x_next,v_next]) def simulate(self, x0, policy, T): """ This function simulates the pendulum for T seconds from initial state x0 using a policy (policy is called as policy(x) and returns one control) Inputs: x0: the initial conditions of the pendulum as a 2D array (angle and velocity) T: the time to integrate for Output: x (2D array) and u (1D array) containing the time evolution of states and control """ horizon_length = int(T/self.delta_t) x=np.empty([2, horizon_length+1]) x[:,0] = x0 u=np.empty([horizon_length]) for i in range(horizon_length): u[i] = policy(x[:,i]) x[:,i+1] = self.next_state(x[:,i], u[i]) return x, u def animate_robot(self, x, dt = 0.01): """ This function makes an animation showing the behavior of the pendulum takes as input the result of a simulation - dt is the sampling time (0.1s normally) """ # here we check if we need to down-sample the data for display #downsampling (we want 100ms DT or higher) min_dt = 0.1 if(dt < min_dt): steps = int(min_dt/dt) use_dt = int(min_dt * 1000) else: steps = 1 use_dt = int(dt * 1000) plotx = x[:,::steps] fig = matplotlib.figure.Figure(figsize=[6,6]) matplotlib.backends.backend_agg.FigureCanvasAgg(fig) ax = fig.add_subplot(111, autoscale_on=False, xlim=[-1.3,1.3], ylim=[-1.3,1.3]) ax.grid() list_of_lines = [] #create the cart pole line, = ax.plot([], [], 'k', lw=2) list_of_lines.append(line) line, = ax.plot([], [], 'o', lw=2) list_of_lines.append(line) cart_height = 0.25 def animate(i): for l in list_of_lines: #reset all lines l.set_data([],[]) x_pend = np.sin(plotx[0,i]) y_pend = -np.cos(plotx[0,i]) list_of_lines[0].set_data([0., x_pend], [0., y_pend]) list_of_lines[1].set_data([x_pend, x_pend], [y_pend, y_pend]) return list_of_lines def init(): return animate(0) ani = animation.FuncAnimation(fig, animate, np.arange(0, len(plotx[0,:])), interval=use_dt, blit=True, init_func=init) plt.close(fig) plt.close(ani._fig) IPython.display.display_html(IPython.core.display.HTML(ani.to_html5_video()))

value iteration

定义配置和辅助函数

nq=50nv=50nu = 3v_max = 6u_max=5# create lookup tables for discretized statesu_table = np.linspace(-u_max, u_max, nu)q_table = np.linspace(0., 2*np.pi, nq, endpoint=False)v_table = np.linspace(-v_max, v_max, nv)num_states = nq * nvrobot = Pendulum()def get_index(x): ind_q = np.argmin((x[0]-q_table)**2) ind_v = np.argmin((x[1]-v_table)**2) return ind_q + ind_v*nqdef get_states(index): iv,ix = np.divmod(index, nq) return np.array([q_table[ix], v_table[iv]]) next_state_index = np.empty([num_states, nu], dtype=np.int32)for i in range(num_states): for k in range(nu): x_next = robot.next_state(get_states(i),u_table[k]) next_state_index[i,k] = get_index(x_next)

定义代价函数

def cost(x,u): """ a cost function for the inverted pendulum """ return (x[0]-np.pi)**2 + 0.01*x[1]**2 + 0.0001*u**2

value iteration的实现

class ValueIteration: """ This class is used to implement value iteration and store the state of the value function and policy as we iterate """ def __init__(self, model,num_states, cost, discount_factor=0.99): """ receives as input a pendulum and cost function and potentially a discount factor """ # value function stored as a 1D array (indexed as we indexed states in pendulum) self.num_states=num_states self.value_function = np.zeros([self.num_states]) # we also store the policy similarly self.policy = np.zeros([self.num_states]) # references to the pendulum and cost function self.model = model self.cost = cost #discount factor for cost self.gamma = discount_factor def iterate(self,num_states,nu,u_table, num_iter=1): """ the main iteration of value iteration num_iter: maximum number of iterations to be performed. If after an iteration the value function does not change (e.g. less thant 10e-5) the function returns and print success """ for i in range(num_iter): J_new = self.value_function.copy() for j in range(num_states): #for each possible control input we compute the cost r = np.zeros([nu]) for l in range(nu): # the current states and control x = get_states(j) u = u_table[l] # the index for the next state next_index = next_state_index[j,l] #compute the cost r[l] = self.cost(x, u) + self.gamma*self.value_function[next_index] # we take the smallest cost value to update the value function J_new[j] = np.min(r) #here we also store the policy (so we have it for later) self.policy[j] = u_table[np.argmin(r)] #we update the current value function if there is any change otherwise we are done if ((self.value_function-J_new)**2 < 10e-2).all(): print("CONVERGED after iteration " + str(i)) break else: self.value_function = J_new.copy()

训练

# we instanciate a value iteration object for a pendulum model and a cost functionvalue_iteration = ValueIteration(robot,num_states, cost)# we run the iterations (with maximum number 2000).value_iteration.iterate(num_states,nu,u_table,2000)

可视化

my_policy=value_iteration.policydef policy(x):# print(x) index=get_index(x) return my_policy[index]

def plot_results(robot, value_function, policy,my_policy, animate=True): """ This function plots the results. It displays the value function, the policy for all states. Then it integrates the pendulum from state [0,0] and displays the states and control as a function of time Finally it shows an animation of the result """ x0 = np.array([0.,0.]) x, u = robot.simulate(x0, policy, 20) if animate: robot.animate_robot(x, robot.delta_t)

plot_results(robot, value_iteration.value_function, policy,my_policy, animate=True)

后续把跑的视频摆出来，哈哈哈，今天分享就到这里，这是实践代码，理论部分就不讲了哈。

小程序技术标准，打造卓越的小程序应用

692 2022-10-02

python value iteration算法玩倒立摆(inverted pendulum)

在数字化转型中，选择合适的跨平台开发框架不仅能提高效率，还有助于确保数据安全与合规性。

如何设计一个优秀的小程序开发平台？

小程序技术标准，打造卓越的小程序应用

最近发表

更多内容

小程序SDK

Finclip技术文档

小程序开发

小程序容器

小程序框架

Finclip小程序平台

Finclip用户投稿

车联网

推荐文章

小程序SDK是什么意思？小程序sdk和插件有什么区别？

小程序支付功能怎么实现？

企业app开发流程是什么？

app运营模式有哪些？

小程序多端引流怎么做？

小程序生态分析的机会和威胁

Flutter入门这一篇效率文章就够了

原生与跨平台解决方案分析,跨平台软件开发技术方案

热更新技术：让软件更新变得更加轻松快速

解决方案

银行解决方案

证券解决方案

互联网解决方案

政企OA解决方案

科技解决方案

loT解决方案

信任解决方案

热评文章

AppCan:基于混合模式的移动应用开发,移动混合模

Hybrid App混合模式开发的了解

小程序容器技术助力券商数字营销突围，小程序容器化的意

用mpvue开发微信小程序基础知识（vue.js开发

小程序多端框架全面测评对比，强烈推荐！

券商app架构 - 解析券商应用程序的构建与设计