考慮我們有下面的 Maze:
目的是從 Initial State S 走道 Goal State G. 在 Maze problem - Using Online DFS Search 我們已經知道這是一個 Online Search Problem. 因此底下要介紹另一個 Online local search 的演算法 LRGA*: 透過更新 H(s) 來學習並找到通往 Goal State 的路徑.
LRTA* :
LRTA* 指的是 Learning Real-Time A* 的縮寫, 它跟 A* 一樣都有個 H(s) (對比 A* 的 h(s)), 不一樣的是這個 H(s) 會隨著 State 的 transition 自己更新:
該演算法的 Pseudo code 如下:
看完說明與 Pseudo code 還有有點昏? 我也是 ><", 接著來看範例. 考慮有 State transition 與 對應 H(S) mapping 如下 (深色的圓 是 current state):
每個圓圈內的數字是該 State 對應的 H(s), 而目前的 State 是在從左邊數過來第 3 個圓圈.
- (a)->(b)
- (b)->(c)
- (c)->(d)
- (d)->(e)
如此原先在 (a) 的 State 透過 update H(s) 便從原先的 local minimum 脫離而朝 global minimum 移動.
Coding and definition:
詳細的 State Representation 與 Actions 定義可以參考 Maze problem - Using Online DFS Search, 而對應 LRTA* 的實作我使用類別 LRTAAgent 來當作解 maze problem 的 Agent:
- package c4.maze;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.Map.Entry;
- import java.util.Queue;
- /**
- * BD: Learning in Real Time A* Search Agent
- * http://localhost/JJWiki/Wiki.jsp?page=Beyond%20Classical%20Search#section-Beyond+Classical+Search-OnlineLocalSearch
- * @author John
- *
- */
- public class LRTAAgent {
- public boolean useUncertainOptimal = false;
- public boolean debug = false;
- private Maze maze;
- private State s=null; // Previous state
- private Action a=null; // Previous action
- private HashMap
result = new HashMap(); // a table, indexed by state and action. - private HashMap
H = new HashMap(); // a table of cost estimates indexed by state. - public static class AState{
- public State st;
- public Action act;
- public AState(State s, Action a){this.st =s; this.act=a;}
- @Override
- public boolean equals(Object o)
- {
- if(o!=null && o instanceof AState)
- {
- AState as = (AState)o;
- if(st.equals(as.st) && act.equals(as.act)) return true;
- }
- return false;
- }
- @Override
- public int hashCode()
- {
- return st.x*100+st.y+act.hashCode();
- }
- }
- public LRTAAgent(Maze m){maze = m;}
- public void printf(String fmt, Object ...args)
- {
- if(debug) System.out.printf(fmt, args);
- }
- /**
- * BD: Use distance in straight line as h(s)
- * @param s
- * @param g
- * @return
- */
- public double h(State s, State g)
- {
- double dist = Math.pow((double)(g.x-s.x), 2)+ Math.pow((double)(g.y-s.y), 2);
- //System.out.printf("\t[Test] sd=%.01f for %s to %s...\n", Math.sqrt(dist), s, g);
- return Math.sqrt(dist);
- }
- /**
- * BD : Always return 1;
- * @param s
- * @param g
- * @return
- */
- public double h1(State s, State g)
- {
- return 1;
- }
- /**
- * BD: LRTA*-COST(s,a,s',H)
- * @param ps
- * @param a
- * @param cs
- * @return
- */
- public double lrtaCost(State ps, Action a, State cs)
- {
- Double i = cs!=null?H.get(cs.toString()):null;
- if(i==null)
- {
- printf("\t[Test] %s take %s to be unknown! Using heruistic...\n", ps, a);
- return h1(ps, maze.goalState);
- }
- else
- {
- if(Action.STOP.equals(a))
- {
- printf("\t[Test] lrtaCost[%s, %s, %s]=%.1f(%.1f)...\n", ps, a, cs, i, i);
- return 0+i; // Stop take no action and has cost=0;
- }
- else
- {
- printf("\t[Test] lrtaCost[%s, %s, %s]=%.1f(%.1f)...\n", ps, a, cs, i+1, i);
- return 1+i; // Here c(s,a,s')=1. Every action take unit cost=1.
- }
- }
- }
- public void showResult()
- {
- Iterator
> iter = result.entrySet().iterator(); - while(iter.hasNext())
- {
- Entry
e = iter.next(); - System.out.printf("\t[Result] %s->%s\n", e.getKey(), e.getValue());
- }
- }
- public void showH()
- {
- Iterator
> iter = H.entrySet().iterator(); - while(iter.hasNext())
- {
- Entry
e = iter.next(); - System.out.printf("\t[H] %s->%.1f\n", e.getKey(), e.getValue());
- }
- }
- /**
- * BD: Learning in Real Time A*
- * @param s
- * @return
- */
- public Action lrta(State cs)
- {
- if(debug)
- {
- showResult();
- showH();
- }
- if(maze.goalTest()) return Action.STOP;
- if(!H.containsKey(cs.toString())) H.put(cs.toString(), h1(cs, maze.goalState));
- if(s!=null)
- {
- // Update result table.
- printf("\t[Test] Update result: %s take %s to %s...\n", s, a, cs);
- result.put(String.format("%s_%s", s,a), cs.cloneself());
- printf("\t[Test] Update result: %s take %s to %s...\n", cs, a.Reverse(), s);
- result.put(String.format("%s_%s", cs, a.Reverse()), s.cloneself());
- printf("\t[Test] Update %s to have cost=%.01f...\n", s, lrtaCost(s, a, cs));
- // Update estimated cost for previous state.
- H.put(s.toString(), lrtaCost(s, a, cs));
- }
- Queue
queue = maze.actions(cs); - double minCost = Integer.MAX_VALUE, tmpCost=0;
- while(!queue.isEmpty())
- {
- Action na = queue.poll();
- State ns = result.get(String.format("%s_%s", cs, na));
- if(useUncertainOptimal && ns==null)
- {
- printf("\t[Test] Uncertainity take lead on action=%s...\n", na);
- a = na;
- break;
- }
- else if((tmpCost=lrtaCost(cs, na, ns)) < minCost)
- {
- printf("\t[Test] Take %s from %s to %s have cost=%.01f...\n", na, cs, ns, tmpCost);
- minCost = tmpCost;
- a = na;
- }
- }
- s = cs.cloneself();
- return a;
- }
- public void run() throws Exception
- {
- State is = maze.curState;
- Action a = null;
- do
- {
- a = lrta(is); // Retrieve action based on current state
- System.out.printf("\t[Info] %s...\n", a);
- is = maze.result(a); // Execute action and get result state.
- System.out.printf("\t[Info] Maze:\n%s\n", maze);
- Thread.sleep(1000);
- }while(!a.equals(Action.STOP));
- }
- public static void main(String[] args) throws Exception{
- State cs = new State(0,0); // Initial state
- State gs = new State(4,3); // Goal state
- int mb[][] = {{1,1,0,1},{1,0,0,1},{1,1,1,1},{1,0,0,0},{1,1,1,1}};
- Maze maze = new Maze(mb, cs, gs);
- //System.out.printf("\t[Info] Maze:\n%s\n", maze);
- LRTAAgent lrtaAgent = new LRTAAgent(maze);
- lrtaAgent.useUncertainOptimal = false; // Using uncertainty optimal approach
- lrtaAgent.debug = true; // Debug use. Print more message.
- lrtaAgent.run();
- }
- }
完整代碼可以在 這裡 下載.
沒有留言:
張貼留言