001    package ai;
002    
003    import java.io.PrintStream;
004    import java.util.ArrayList;
005    import java.util.List;
006    import java.util.Map;
007    import java.util.HashMap;
008    import java.util.concurrent.locks.Lock;
009    import java.util.concurrent.locks.ReentrantLock;
010    
011    import rules.GameState;
012    import core.Color;
013    import core.Game;
014    import core.Move;
015    import core.Piece;
016    import core.Player;
017    import core.Position;
018    
019    /**
020     * An ai player that uses alpha-beta pruning for some heuristic.
021     * 
022     * @specfield state : game state // the current state of the came according to
023     *            the ai
024     */
025    public class OptimizingAlphaBetaPlayer extends Player implements Runnable
026    {
027    
028        private GameState state;
029        private final int NUM_THREADS;
030        private static final int INF = 1000000000;
031        private int numEvals;
032        private PrintStream output;
033    
034        // Table of Variations from old states.
035        private LookupTable lookupTable;
036    
037        // Experience heuristic for ordering moves.
038        private Map<Move, Integer> experience;
039    
040        //Variables for threading.
041        private Lock threadLock;
042        private int curMove;
043        private int threadDepth;
044        private List<Move> movesToSearch;
045        private int curAlpha, curBeta;
046        private Variation threadBestVariation;
047        
048        
049        
050        private Heuristic heuristic;
051    
052        /**
053         * Creates a new OptimizingAlphaBetaPlayer player of the given color
054         * 
055         * @param c
056         *            color of the player
057         * @param h
058         *            Static evaluator to use.
059         * 
060         * @requires c not null, h not null
061         */
062        public OptimizingAlphaBetaPlayer(Color c, Heuristic h)
063            {
064                this(c, h, 1, System.out, null);
065            }
066    
067        /**
068         * Creates a new OptimizingAlphaBetaPlayer player of the given color
069         * 
070         * @param c
071         *            color of the player
072         * @param h
073         *            Static evaluator to use.
074         * @param num_threads
075         *            Maximum number of threads to use at a time
076         * @param output
077         *            Place to spew debugging output.
078         * @param powerupDiffs
079         *            Positions of powerups.
080         * 
081         * @requires c not null, h not null, output not null, num_threads >= 1
082         */
083        public OptimizingAlphaBetaPlayer(Color c, Heuristic h, int num_threads, 
084                                         PrintStream output,
085                                         Map<Position, Piece> powerupDiffs)
086        {
087            super(c);
088            this.output = output;
089            if (powerupDiffs != null)
090                state = new GameState(powerupDiffs);
091            else 
092                state = new GameState();
093            heuristic = h;
094            NUM_THREADS = num_threads;
095            threadLock = new ReentrantLock();
096            lookupTable = new LookupTable();
097            experience = new HashMap<Move, Integer>();
098        }
099    
100        /**
101         * @see Player
102         */
103        public Move askForMove(Move opponentsMove) {
104            return askForMove(opponentsMove, -1, -1);
105        }
106    
107        /**
108         * @see Player
109         */
110        public Move askForMove(Move opponentsMove, long myTime, long opponentsTime) {
111            if (opponentsMove == null)
112                output.println("Got null move");
113            else
114                output.println("Got move " + opponentsMove);
115            output.println("Current move " + state.getMoveNum());
116            output.println("My color " + this.getColor());
117                    // If this is not the game's very first move
118                    if (state.getMoveNum() > 0 || this.getColor() == Color.BLACK)
119                    {
120                            // Update the board with the opponent's move
121                        if (opponentsMove == null) {
122                            output.println("WTF, ref?  You gave me a null move!");
123                            opponentsMove = Move.fromString("");
124                        }
125                        state.makeMove(opponentsMove);
126                    }
127                    Move m = getBestMove(myTime);
128                    if (m == null)
129                        output.println("ACK!  Got null?");
130                    state.makeMove(m);
131                    return m;
132            }
133        private static List<Move> sortList(List<Move> a,
134                                                      List<Integer> vs){
135           //In Python: [x[1] for x in sorted(zip(vs, a))].  Thanks, Java.
136          
137            List<Integer> indices = new ArrayList<Integer>();
138            int j = -1;
139            for(int v: vs){
140                j++;
141                int i = 0;
142                for(; i < indices.size(); i++){
143                    if (v < vs.get(indices.get(i)))
144                        break;
145                }
146                indices.add(i, j);
147            }
148            List<Move> lst = new ArrayList<Move>();
149            for(int i = 0; i < indices.size(); i++){
150                lst.add(a.get(indices.get(i)));
151            }
152            return lst;
153        }
154    
155        /**
156         * Return the moves for a given state in the order to search them.
157         * 
158         * @param firstMove  A preference for the first move; if it is a possible
159         *                   first move, place it first.
160         */
161        private List<Move> sortedMoves(GameState gameState,
162                                       Move firstMove){
163            List<Move> answer = new ArrayList<Move>(gameState.legalMoves());
164            // because firstMove often comes from our collision-ignoring
165            // hash table, it may not actually be a valid move.
166            
167            List<Integer> values = new ArrayList<Integer>();
168            for (Move m: answer){
169                values.add(-getExperience(m));
170            }
171            List<Move> ans = sortList(answer, values);
172            if (firstMove != null && ans.remove(firstMove))
173                ans.add(0, firstMove);
174            return ans;
175        }
176        
177        /**
178         * This method is called concurrently by several threads, all on
179         * the same threadState.  They find the best variation for this
180         * state, communicating by the curMove, movesToSearch, curAlpha,
181         * curBeta, threadBestVariation variables.
182         */
183        private void threadSearch(GameState threadState){
184            while(true){
185                Move m;
186                synchronized(threadLock) {
187                    if (curMove == movesToSearch.size())
188                        break;
189                    m = movesToSearch.get(curMove);
190                    curMove++;
191                }
192                Map<Position, Piece> moveMap = m.toMap(threadState);
193                threadState.reversibleMakeMove(moveMap, true);
194                Variation v = getValue(threadState, threadDepth-1, -curBeta, -curAlpha);
195                threadState.reversibleMakeMove(moveMap, false);
196                if (Thread.currentThread().isInterrupted()) {
197                    return;
198                }
199                v = v.cons(m);
200    
201                synchronized(threadLock) {
202                    if (v.getValue() > threadBestVariation.getValue()) {
203                        threadBestVariation = v;
204                        if (v.getValue() > curAlpha){
205                            curAlpha = v.getValue();
206                        }
207                    }
208                    if (threadBestVariation.getValue() >= curBeta) {
209                        break;
210                    }
211                }
212            }       
213        }
214    
215        /**
216         * Start a new threadSearch on the current state.
217         */
218        public void run(){
219            threadSearch(state.copy());  //Since searching mutates the state.
220        }
221        
222        private synchronized void incrementCounter(){
223            numEvals++;
224        }
225    
226        /**
227         * Get the value of this move from the experience table, default 0.
228         * Thread-safe.
229         */
230        private int getExperience(Move m){
231            Integer v;
232            synchronized (experience) {
233                v = experience.get(m);
234            }
235            if (v == null)
236                return 0;
237            return v;
238            
239        }
240        
241        /**
242         * Update the experience table with a given good Variation.
243         * Thread-safe.
244         */
245        private void updateExperience(Variation v){
246            synchronized (experience) {
247                int val = getExperience(v.getMove());
248                experience.put(v.getMove(), (1 << v.getDepth()) + val);
249            }
250        }
251    
252        /**
253         * Returns any variation that the current player can force to have
254         * value &gt; beta, or the best possible variation, given the
255         * current state.  Static evaluate at a given depth.
256         */
257        private Variation getValue(GameState gameState, int depth, int alpha, int beta){
258            Variation lastTime = lookupTable.getTable(gameState);
259            if (lastTime != null && lastTime.getDepth() >= depth) {
260                return lastTime; //(probably) already seen.
261            }
262            Color c = gameState.getWinner();
263            if (c != Color.NONE){
264                incrementCounter();
265                if (c == gameState.getCurrentColor())
266                    return new Variation(INF + depth); //Incentive to win early
267                else
268                    return new Variation(-INF - depth);
269            }
270            if (depth == 0){
271                incrementCounter();
272                return new Variation(heuristic.heuristic(gameState));
273            }
274            Variation myBestVariation = new Variation(-2*INF);
275            Move firstMove = null;
276            //If possible, do the best move from last time (at a smaller
277            //depth); this is probably a good move, so it will improve
278            //alpha and likely allow us to break out.
279            if (lastTime != null)
280                firstMove = lastTime.getMove();
281            for(Move m: sortedMoves(gameState, firstMove)){
282                if (myBestVariation.getValue() > alpha)
283                    alpha = myBestVariation.getValue();
284                Map<Position, Piece> moveMap = m.toMap(gameState);
285                gameState.reversibleMakeMove(moveMap, true);
286                Variation v = getValue(gameState, depth-1, -beta, -alpha);
287                gameState.reversibleMakeMove(moveMap, false);
288                if (Thread.currentThread().isInterrupted()) {
289                    return null;
290                }
291                v = v.cons(m); //Also negates v.getValue
292                if (v.getValue() > myBestVariation.getValue()) {
293                    myBestVariation = v;
294                    if (v.getValue() >= beta) {
295                        break;
296                    }
297                }
298            }
299            updateExperience(myBestVariation);
300            lookupTable.update(gameState, myBestVariation);
301            return myBestVariation;
302        }
303    
304        /**
305         * Use NUM_THREADS threads to compute the best variation from
306         * state to some depth. First estimate alpha and beta based off
307         * the most likely answer, taking the threads with you.  Then
308         * split the rest of the possible moves up among the threads, and
309         * let them do each move with a single thread.
310         *
311         * @param principal  The best variation for this position at the previous
312         *                   depth.
313         * @param depth      The depth to run to.
314         * @param timeLimit  The millisecond clock time to quit at.
315         * @param alpha, beta  The alpha and beta values for this state.
316         */
317        private Variation threadedGetBestMove(int depth, int alpha, int beta, 
318                                              Variation principal, long timeLimit){
319            Variation lastTime = lookupTable.getTable(state);
320            if (lastTime != null && lastTime.getDepth() >= depth) {
321                // Don't try to use the lookup table here; it isn't too costly to
322                // compute, and we want to avoid collisions.
323                //return lastTime;
324            }
325            Color c = state.getWinner();
326            if (c != Color.NONE){
327                incrementCounter();
328                if (c == state.getCurrentColor()){
329                    return new Variation(INF + depth); //Incentive to win early
330                } else {
331                    return new Variation(-INF - depth);
332                }
333            }
334            if (depth == 0){
335                incrementCounter();
336                return new Variation(heuristic.heuristic(state));
337            }
338            if (principal == null)
339                principal = new Variation(0);
340            Move firstMove = principal.getMove();
341            List<Move> moves = sortedMoves(state, firstMove);
342            firstMove = moves.get(0); // firstMove could have been null
343            moves.remove(firstMove);
344            Map<Position, Piece> moveMap = firstMove.toMap(state);
345            state.reversibleMakeMove(moveMap, true);
346            threadBestVariation = threadedGetBestMove(depth-1, -beta, -alpha, principal.cdr(), timeLimit);
347            state.reversibleMakeMove(moveMap, false);
348            if (System.currentTimeMillis() > timeLimit || 
349                Thread.currentThread().isInterrupted()){
350                return null;
351            }
352            threadBestVariation = threadBestVariation.cons(firstMove);
353            if (threadBestVariation.getValue() > alpha)
354                alpha = threadBestVariation.getValue();
355    
356            boolean killThreads = false;
357            if (moves.size() == 0) {
358            } else if (threadBestVariation.getValue() >= beta) {
359            } else {
360                movesToSearch = moves;
361                curMove = 0;
362                curAlpha = alpha;
363                curBeta = beta;
364                threadDepth = depth;
365    
366                //output.println("Spawning threads " + NUM_THREADS);
367                //All set to spawn threads.
368                List<Thread> threads = new ArrayList<Thread>();
369                int num_threads = Math.min(NUM_THREADS, moves.size());
370                for (int i = 0; i < num_threads; i++) {
371                    Thread t = new Thread(this);
372                    t.start();
373                    threads.add(t);
374                }
375                for (Thread t: threads){
376                    if (killThreads){
377                        t.interrupt();
378                        continue;
379                    }
380                    while (t.isAlive()) {
381                        if (System.currentTimeMillis() > timeLimit){
382                            t.interrupt();
383                            killThreads = true;
384                            break;
385                        }
386                        try {
387                            t.join(100);
388                        } catch (InterruptedException e) {
389                            output.println("Interrupted by GraphicUI.");
390                            //Asked to die by GraphicUI
391                            Thread.currentThread().interrupt();
392                            t.interrupt();
393                            killThreads = true;
394                            break;
395                        }
396                    }
397                }
398                if (killThreads) { //Interrupted them, now need to rejoin them
399                    for (Thread t: threads){
400                        try {
401                            t.join();
402                        } catch (InterruptedException e) {
403                            output.println("Interrupted by GraphicUI.");
404                            Thread.currentThread().interrupt();
405                            try {
406                                t.join();
407                            } catch (InterruptedException e2) {
408                                output.println("Interrupted twice?  Dying badly.");
409                            }
410                        }
411                    }
412                }
413            }
414            //Only update if it succeeded, but return answer anyway
415            if (killThreads) { 
416                Thread.currentThread().interrupt();     
417            } else {
418                updateExperience(threadBestVariation);
419                lookupTable.update(state, threadBestVariation);
420            }
421            return threadBestVariation;
422        }
423    
424        /** Using iterated deepening, compute the best move until 
425         *  a time limit.
426         *  @param lowTimeLimit The time after which you shouldn't 
427         *         start a new depth.
428         *  @param timeLimit The time after which a search at a given depth
429         *         should break out.
430         */
431        private Move getBestMoveID(long lowTimeLimit, long timeLimit){
432            Variation ans = new Variation(0);
433            long startTime = System.currentTimeMillis();
434            Thread.interrupted(); //clear interrupted status
435            output.println("Current value " + heuristic.heuristic(state));
436            for(int depth = 3; ; depth++){
437                numEvals = 0;
438                Variation suggested = threadedGetBestMove(depth, -2*INF, 2*INF, ans, timeLimit);
439                if (suggested != null)
440                    ans = suggested;
441                if (suggested == null || Thread.currentThread().isInterrupted()) {
442                    output.println("Breaking out depth " + depth + " in " + 
443                                   ((System.currentTimeMillis() - startTime) * 0.001) + 
444                                   " seconds.");
445                    break;
446                }
447                if (ans.getValue() >= INF){
448                    output.println("Breaking out to win.");
449                    break;
450                }  else if (ans.getValue() <= -INF){
451                    output.println("Breaking out to lose.");
452                    break;
453                } 
454                double timePassed = (System.currentTimeMillis() - startTime) * 0.001;
455                output.println("Searched to depth " + depth + " in " + numEvals + " evaluations and " + timePassed + " seconds, value " + suggested.getValue() + " move " + suggested.getMove() + ".");
456                if (System.currentTimeMillis() > lowTimeLimit ||
457                    timePassed > 5.0 && depth > 13)
458                    break;
459            }
460            if (ans.getMove() == null) {
461                output.println("ACK!!!! NULL MOVE!!!");
462                return (new ArrayList<Move>(state.legalMoves())).get(0);
463            }
464            return ans.getMove();
465        }
466    
467        /**
468         * Given the amount of player time left, compute the best move to
469         * make.
470         *
471         * @param timeLeft the number of milliseconds on this player's clock.
472         */
473        private Move getBestMove(long timeLeft){
474            numEvals = 0;
475            long startTime = System.currentTimeMillis();
476            long timeLimitDiff = 20000; //Quit at 20 seconds
477            if (timeLeft > 0 && timeLeft  < 225000){
478                timeLimitDiff = timeLeft / 15;
479            }
480            if (state.legalMoves().size() == 1 || timeLimitDiff < 100){
481                     Move m = (new ArrayList<Move>(state.legalMoves())).get(0);
482                     if(state.legalMoves().size() == 1)
483                            output.println("Forced to make move " + m + ".");
484                     else if(timeLimitDiff < 100)
485                            output.println("Time low, making first move " + m + ".");
486                return m;
487            }
488            
489            //Decay the experience heuristic over time.
490            for(Map.Entry<Move, Integer> e: experience.entrySet()) {
491                e.setValue(e.getValue() / 2);
492            }
493            long lowTimeLimitDiff = timeLimitDiff / 3;
494            output.println("Max time: " + ((timeLimitDiff) / 1000.0) + ".");
495            return getBestMoveID(lowTimeLimitDiff + startTime, timeLimitDiff + startTime);
496        }
497        
498        /**
499         * Has the player make a series of moves. Used to get the AI player up to
500         * speed on a newly loaded game
501         * 
502         * @param game
503         *            the new game
504         * 
505         * @modifies state
506         * 
507         * @requires game not null
508         */
509        public void update(Game game)
510        {
511            state = new GameState();
512            for (int i = 0; i < game.getMoveHistory().size() - 1; i++)
513                {
514                    state.makeMove(game.getMoveHistory().get(i));
515                }
516            
517            if (this.getColor() != game.getCurrentPlayer().getColor())
518                {
519                    state.makeMove(game.getMoveHistory().get(
520                                                             game.getMoveHistory().size() - 1));
521                }
522        }
523    }