001 package ai;
002
003 import java.io.PrintStream;
004 import java.util.ArrayList;
005 import java.util.List;
006 import java.util.Map;
007 import java.util.HashMap;
008 import java.util.concurrent.locks.Lock;
009 import java.util.concurrent.locks.ReentrantLock;
010
011 import rules.GameState;
012 import core.Color;
013 import core.Game;
014 import core.Move;
015 import core.Piece;
016 import core.Player;
017 import core.Position;
018
019 /**
020 * An ai player that uses alpha-beta pruning for some heuristic.
021 *
022 * @specfield state : game state // the current state of the came according to
023 * the ai
024 */
025 public class OptimizingAlphaBetaPlayer extends Player implements Runnable
026 {
027
028 private GameState state;
029 private final int NUM_THREADS;
030 private static final int INF = 1000000000;
031 private int numEvals;
032 private PrintStream output;
033
034 // Table of Variations from old states.
035 private LookupTable lookupTable;
036
037 // Experience heuristic for ordering moves.
038 private Map<Move, Integer> experience;
039
040 //Variables for threading.
041 private Lock threadLock;
042 private int curMove;
043 private int threadDepth;
044 private List<Move> movesToSearch;
045 private int curAlpha, curBeta;
046 private Variation threadBestVariation;
047
048
049
050 private Heuristic heuristic;
051
052 /**
053 * Creates a new OptimizingAlphaBetaPlayer player of the given color
054 *
055 * @param c
056 * color of the player
057 * @param h
058 * Static evaluator to use.
059 *
060 * @requires c not null, h not null
061 */
062 public OptimizingAlphaBetaPlayer(Color c, Heuristic h)
063 {
064 this(c, h, 1, System.out, null);
065 }
066
067 /**
068 * Creates a new OptimizingAlphaBetaPlayer player of the given color
069 *
070 * @param c
071 * color of the player
072 * @param h
073 * Static evaluator to use.
074 * @param num_threads
075 * Maximum number of threads to use at a time
076 * @param output
077 * Place to spew debugging output.
078 * @param powerupDiffs
079 * Positions of powerups.
080 *
081 * @requires c not null, h not null, output not null, num_threads >= 1
082 */
083 public OptimizingAlphaBetaPlayer(Color c, Heuristic h, int num_threads,
084 PrintStream output,
085 Map<Position, Piece> powerupDiffs)
086 {
087 super(c);
088 this.output = output;
089 if (powerupDiffs != null)
090 state = new GameState(powerupDiffs);
091 else
092 state = new GameState();
093 heuristic = h;
094 NUM_THREADS = num_threads;
095 threadLock = new ReentrantLock();
096 lookupTable = new LookupTable();
097 experience = new HashMap<Move, Integer>();
098 }
099
100 /**
101 * @see Player
102 */
103 public Move askForMove(Move opponentsMove) {
104 return askForMove(opponentsMove, -1, -1);
105 }
106
107 /**
108 * @see Player
109 */
110 public Move askForMove(Move opponentsMove, long myTime, long opponentsTime) {
111 if (opponentsMove == null)
112 output.println("Got null move");
113 else
114 output.println("Got move " + opponentsMove);
115 output.println("Current move " + state.getMoveNum());
116 output.println("My color " + this.getColor());
117 // If this is not the game's very first move
118 if (state.getMoveNum() > 0 || this.getColor() == Color.BLACK)
119 {
120 // Update the board with the opponent's move
121 if (opponentsMove == null) {
122 output.println("WTF, ref? You gave me a null move!");
123 opponentsMove = Move.fromString("");
124 }
125 state.makeMove(opponentsMove);
126 }
127 Move m = getBestMove(myTime);
128 if (m == null)
129 output.println("ACK! Got null?");
130 state.makeMove(m);
131 return m;
132 }
133 private static List<Move> sortList(List<Move> a,
134 List<Integer> vs){
135 //In Python: [x[1] for x in sorted(zip(vs, a))]. Thanks, Java.
136
137 List<Integer> indices = new ArrayList<Integer>();
138 int j = -1;
139 for(int v: vs){
140 j++;
141 int i = 0;
142 for(; i < indices.size(); i++){
143 if (v < vs.get(indices.get(i)))
144 break;
145 }
146 indices.add(i, j);
147 }
148 List<Move> lst = new ArrayList<Move>();
149 for(int i = 0; i < indices.size(); i++){
150 lst.add(a.get(indices.get(i)));
151 }
152 return lst;
153 }
154
155 /**
156 * Return the moves for a given state in the order to search them.
157 *
158 * @param firstMove A preference for the first move; if it is a possible
159 * first move, place it first.
160 */
161 private List<Move> sortedMoves(GameState gameState,
162 Move firstMove){
163 List<Move> answer = new ArrayList<Move>(gameState.legalMoves());
164 // because firstMove often comes from our collision-ignoring
165 // hash table, it may not actually be a valid move.
166
167 List<Integer> values = new ArrayList<Integer>();
168 for (Move m: answer){
169 values.add(-getExperience(m));
170 }
171 List<Move> ans = sortList(answer, values);
172 if (firstMove != null && ans.remove(firstMove))
173 ans.add(0, firstMove);
174 return ans;
175 }
176
177 /**
178 * This method is called concurrently by several threads, all on
179 * the same threadState. They find the best variation for this
180 * state, communicating by the curMove, movesToSearch, curAlpha,
181 * curBeta, threadBestVariation variables.
182 */
183 private void threadSearch(GameState threadState){
184 while(true){
185 Move m;
186 synchronized(threadLock) {
187 if (curMove == movesToSearch.size())
188 break;
189 m = movesToSearch.get(curMove);
190 curMove++;
191 }
192 Map<Position, Piece> moveMap = m.toMap(threadState);
193 threadState.reversibleMakeMove(moveMap, true);
194 Variation v = getValue(threadState, threadDepth-1, -curBeta, -curAlpha);
195 threadState.reversibleMakeMove(moveMap, false);
196 if (Thread.currentThread().isInterrupted()) {
197 return;
198 }
199 v = v.cons(m);
200
201 synchronized(threadLock) {
202 if (v.getValue() > threadBestVariation.getValue()) {
203 threadBestVariation = v;
204 if (v.getValue() > curAlpha){
205 curAlpha = v.getValue();
206 }
207 }
208 if (threadBestVariation.getValue() >= curBeta) {
209 break;
210 }
211 }
212 }
213 }
214
215 /**
216 * Start a new threadSearch on the current state.
217 */
218 public void run(){
219 threadSearch(state.copy()); //Since searching mutates the state.
220 }
221
222 private synchronized void incrementCounter(){
223 numEvals++;
224 }
225
226 /**
227 * Get the value of this move from the experience table, default 0.
228 * Thread-safe.
229 */
230 private int getExperience(Move m){
231 Integer v;
232 synchronized (experience) {
233 v = experience.get(m);
234 }
235 if (v == null)
236 return 0;
237 return v;
238
239 }
240
241 /**
242 * Update the experience table with a given good Variation.
243 * Thread-safe.
244 */
245 private void updateExperience(Variation v){
246 synchronized (experience) {
247 int val = getExperience(v.getMove());
248 experience.put(v.getMove(), (1 << v.getDepth()) + val);
249 }
250 }
251
252 /**
253 * Returns any variation that the current player can force to have
254 * value > beta, or the best possible variation, given the
255 * current state. Static evaluate at a given depth.
256 */
257 private Variation getValue(GameState gameState, int depth, int alpha, int beta){
258 Variation lastTime = lookupTable.getTable(gameState);
259 if (lastTime != null && lastTime.getDepth() >= depth) {
260 return lastTime; //(probably) already seen.
261 }
262 Color c = gameState.getWinner();
263 if (c != Color.NONE){
264 incrementCounter();
265 if (c == gameState.getCurrentColor())
266 return new Variation(INF + depth); //Incentive to win early
267 else
268 return new Variation(-INF - depth);
269 }
270 if (depth == 0){
271 incrementCounter();
272 return new Variation(heuristic.heuristic(gameState));
273 }
274 Variation myBestVariation = new Variation(-2*INF);
275 Move firstMove = null;
276 //If possible, do the best move from last time (at a smaller
277 //depth); this is probably a good move, so it will improve
278 //alpha and likely allow us to break out.
279 if (lastTime != null)
280 firstMove = lastTime.getMove();
281 for(Move m: sortedMoves(gameState, firstMove)){
282 if (myBestVariation.getValue() > alpha)
283 alpha = myBestVariation.getValue();
284 Map<Position, Piece> moveMap = m.toMap(gameState);
285 gameState.reversibleMakeMove(moveMap, true);
286 Variation v = getValue(gameState, depth-1, -beta, -alpha);
287 gameState.reversibleMakeMove(moveMap, false);
288 if (Thread.currentThread().isInterrupted()) {
289 return null;
290 }
291 v = v.cons(m); //Also negates v.getValue
292 if (v.getValue() > myBestVariation.getValue()) {
293 myBestVariation = v;
294 if (v.getValue() >= beta) {
295 break;
296 }
297 }
298 }
299 updateExperience(myBestVariation);
300 lookupTable.update(gameState, myBestVariation);
301 return myBestVariation;
302 }
303
304 /**
305 * Use NUM_THREADS threads to compute the best variation from
306 * state to some depth. First estimate alpha and beta based off
307 * the most likely answer, taking the threads with you. Then
308 * split the rest of the possible moves up among the threads, and
309 * let them do each move with a single thread.
310 *
311 * @param principal The best variation for this position at the previous
312 * depth.
313 * @param depth The depth to run to.
314 * @param timeLimit The millisecond clock time to quit at.
315 * @param alpha, beta The alpha and beta values for this state.
316 */
317 private Variation threadedGetBestMove(int depth, int alpha, int beta,
318 Variation principal, long timeLimit){
319 Variation lastTime = lookupTable.getTable(state);
320 if (lastTime != null && lastTime.getDepth() >= depth) {
321 // Don't try to use the lookup table here; it isn't too costly to
322 // compute, and we want to avoid collisions.
323 //return lastTime;
324 }
325 Color c = state.getWinner();
326 if (c != Color.NONE){
327 incrementCounter();
328 if (c == state.getCurrentColor()){
329 return new Variation(INF + depth); //Incentive to win early
330 } else {
331 return new Variation(-INF - depth);
332 }
333 }
334 if (depth == 0){
335 incrementCounter();
336 return new Variation(heuristic.heuristic(state));
337 }
338 if (principal == null)
339 principal = new Variation(0);
340 Move firstMove = principal.getMove();
341 List<Move> moves = sortedMoves(state, firstMove);
342 firstMove = moves.get(0); // firstMove could have been null
343 moves.remove(firstMove);
344 Map<Position, Piece> moveMap = firstMove.toMap(state);
345 state.reversibleMakeMove(moveMap, true);
346 threadBestVariation = threadedGetBestMove(depth-1, -beta, -alpha, principal.cdr(), timeLimit);
347 state.reversibleMakeMove(moveMap, false);
348 if (System.currentTimeMillis() > timeLimit ||
349 Thread.currentThread().isInterrupted()){
350 return null;
351 }
352 threadBestVariation = threadBestVariation.cons(firstMove);
353 if (threadBestVariation.getValue() > alpha)
354 alpha = threadBestVariation.getValue();
355
356 boolean killThreads = false;
357 if (moves.size() == 0) {
358 } else if (threadBestVariation.getValue() >= beta) {
359 } else {
360 movesToSearch = moves;
361 curMove = 0;
362 curAlpha = alpha;
363 curBeta = beta;
364 threadDepth = depth;
365
366 //output.println("Spawning threads " + NUM_THREADS);
367 //All set to spawn threads.
368 List<Thread> threads = new ArrayList<Thread>();
369 int num_threads = Math.min(NUM_THREADS, moves.size());
370 for (int i = 0; i < num_threads; i++) {
371 Thread t = new Thread(this);
372 t.start();
373 threads.add(t);
374 }
375 for (Thread t: threads){
376 if (killThreads){
377 t.interrupt();
378 continue;
379 }
380 while (t.isAlive()) {
381 if (System.currentTimeMillis() > timeLimit){
382 t.interrupt();
383 killThreads = true;
384 break;
385 }
386 try {
387 t.join(100);
388 } catch (InterruptedException e) {
389 output.println("Interrupted by GraphicUI.");
390 //Asked to die by GraphicUI
391 Thread.currentThread().interrupt();
392 t.interrupt();
393 killThreads = true;
394 break;
395 }
396 }
397 }
398 if (killThreads) { //Interrupted them, now need to rejoin them
399 for (Thread t: threads){
400 try {
401 t.join();
402 } catch (InterruptedException e) {
403 output.println("Interrupted by GraphicUI.");
404 Thread.currentThread().interrupt();
405 try {
406 t.join();
407 } catch (InterruptedException e2) {
408 output.println("Interrupted twice? Dying badly.");
409 }
410 }
411 }
412 }
413 }
414 //Only update if it succeeded, but return answer anyway
415 if (killThreads) {
416 Thread.currentThread().interrupt();
417 } else {
418 updateExperience(threadBestVariation);
419 lookupTable.update(state, threadBestVariation);
420 }
421 return threadBestVariation;
422 }
423
424 /** Using iterated deepening, compute the best move until
425 * a time limit.
426 * @param lowTimeLimit The time after which you shouldn't
427 * start a new depth.
428 * @param timeLimit The time after which a search at a given depth
429 * should break out.
430 */
431 private Move getBestMoveID(long lowTimeLimit, long timeLimit){
432 Variation ans = new Variation(0);
433 long startTime = System.currentTimeMillis();
434 Thread.interrupted(); //clear interrupted status
435 output.println("Current value " + heuristic.heuristic(state));
436 for(int depth = 3; ; depth++){
437 numEvals = 0;
438 Variation suggested = threadedGetBestMove(depth, -2*INF, 2*INF, ans, timeLimit);
439 if (suggested != null)
440 ans = suggested;
441 if (suggested == null || Thread.currentThread().isInterrupted()) {
442 output.println("Breaking out depth " + depth + " in " +
443 ((System.currentTimeMillis() - startTime) * 0.001) +
444 " seconds.");
445 break;
446 }
447 if (ans.getValue() >= INF){
448 output.println("Breaking out to win.");
449 break;
450 } else if (ans.getValue() <= -INF){
451 output.println("Breaking out to lose.");
452 break;
453 }
454 double timePassed = (System.currentTimeMillis() - startTime) * 0.001;
455 output.println("Searched to depth " + depth + " in " + numEvals + " evaluations and " + timePassed + " seconds, value " + suggested.getValue() + " move " + suggested.getMove() + ".");
456 if (System.currentTimeMillis() > lowTimeLimit ||
457 timePassed > 5.0 && depth > 13)
458 break;
459 }
460 if (ans.getMove() == null) {
461 output.println("ACK!!!! NULL MOVE!!!");
462 return (new ArrayList<Move>(state.legalMoves())).get(0);
463 }
464 return ans.getMove();
465 }
466
467 /**
468 * Given the amount of player time left, compute the best move to
469 * make.
470 *
471 * @param timeLeft the number of milliseconds on this player's clock.
472 */
473 private Move getBestMove(long timeLeft){
474 numEvals = 0;
475 long startTime = System.currentTimeMillis();
476 long timeLimitDiff = 20000; //Quit at 20 seconds
477 if (timeLeft > 0 && timeLeft < 225000){
478 timeLimitDiff = timeLeft / 15;
479 }
480 if (state.legalMoves().size() == 1 || timeLimitDiff < 100){
481 Move m = (new ArrayList<Move>(state.legalMoves())).get(0);
482 if(state.legalMoves().size() == 1)
483 output.println("Forced to make move " + m + ".");
484 else if(timeLimitDiff < 100)
485 output.println("Time low, making first move " + m + ".");
486 return m;
487 }
488
489 //Decay the experience heuristic over time.
490 for(Map.Entry<Move, Integer> e: experience.entrySet()) {
491 e.setValue(e.getValue() / 2);
492 }
493 long lowTimeLimitDiff = timeLimitDiff / 3;
494 output.println("Max time: " + ((timeLimitDiff) / 1000.0) + ".");
495 return getBestMoveID(lowTimeLimitDiff + startTime, timeLimitDiff + startTime);
496 }
497
498 /**
499 * Has the player make a series of moves. Used to get the AI player up to
500 * speed on a newly loaded game
501 *
502 * @param game
503 * the new game
504 *
505 * @modifies state
506 *
507 * @requires game not null
508 */
509 public void update(Game game)
510 {
511 state = new GameState();
512 for (int i = 0; i < game.getMoveHistory().size() - 1; i++)
513 {
514 state.makeMove(game.getMoveHistory().get(i));
515 }
516
517 if (this.getColor() != game.getCurrentPlayer().getColor())
518 {
519 state.makeMove(game.getMoveHistory().get(
520 game.getMoveHistory().size() - 1));
521 }
522 }
523 }