001 package ai; 002 003 import java.io.PrintStream; 004 import java.util.ArrayList; 005 import java.util.List; 006 import java.util.Map; 007 import java.util.HashMap; 008 import java.util.concurrent.locks.Lock; 009 import java.util.concurrent.locks.ReentrantLock; 010 011 import rules.GameState; 012 import core.Color; 013 import core.Game; 014 import core.Move; 015 import core.Piece; 016 import core.Player; 017 import core.Position; 018 019 /** 020 * An ai player that uses alpha-beta pruning for some heuristic. 021 * 022 * @specfield state : game state // the current state of the came according to 023 * the ai 024 */ 025 public class OptimizingAlphaBetaPlayer extends Player implements Runnable 026 { 027 028 private GameState state; 029 private final int NUM_THREADS; 030 private static final int INF = 1000000000; 031 private int numEvals; 032 private PrintStream output; 033 034 // Table of Variations from old states. 035 private LookupTable lookupTable; 036 037 // Experience heuristic for ordering moves. 038 private Map<Move, Integer> experience; 039 040 //Variables for threading. 041 private Lock threadLock; 042 private int curMove; 043 private int threadDepth; 044 private List<Move> movesToSearch; 045 private int curAlpha, curBeta; 046 private Variation threadBestVariation; 047 048 049 050 private Heuristic heuristic; 051 052 /** 053 * Creates a new OptimizingAlphaBetaPlayer player of the given color 054 * 055 * @param c 056 * color of the player 057 * @param h 058 * Static evaluator to use. 059 * 060 * @requires c not null, h not null 061 */ 062 public OptimizingAlphaBetaPlayer(Color c, Heuristic h) 063 { 064 this(c, h, 1, System.out, null); 065 } 066 067 /** 068 * Creates a new OptimizingAlphaBetaPlayer player of the given color 069 * 070 * @param c 071 * color of the player 072 * @param h 073 * Static evaluator to use. 074 * @param num_threads 075 * Maximum number of threads to use at a time 076 * @param output 077 * Place to spew debugging output. 078 * @param powerupDiffs 079 * Positions of powerups. 080 * 081 * @requires c not null, h not null, output not null, num_threads >= 1 082 */ 083 public OptimizingAlphaBetaPlayer(Color c, Heuristic h, int num_threads, 084 PrintStream output, 085 Map<Position, Piece> powerupDiffs) 086 { 087 super(c); 088 this.output = output; 089 if (powerupDiffs != null) 090 state = new GameState(powerupDiffs); 091 else 092 state = new GameState(); 093 heuristic = h; 094 NUM_THREADS = num_threads; 095 threadLock = new ReentrantLock(); 096 lookupTable = new LookupTable(); 097 experience = new HashMap<Move, Integer>(); 098 } 099 100 /** 101 * @see Player 102 */ 103 public Move askForMove(Move opponentsMove) { 104 return askForMove(opponentsMove, -1, -1); 105 } 106 107 /** 108 * @see Player 109 */ 110 public Move askForMove(Move opponentsMove, long myTime, long opponentsTime) { 111 if (opponentsMove == null) 112 output.println("Got null move"); 113 else 114 output.println("Got move " + opponentsMove); 115 output.println("Current move " + state.getMoveNum()); 116 output.println("My color " + this.getColor()); 117 // If this is not the game's very first move 118 if (state.getMoveNum() > 0 || this.getColor() == Color.BLACK) 119 { 120 // Update the board with the opponent's move 121 if (opponentsMove == null) { 122 output.println("WTF, ref? You gave me a null move!"); 123 opponentsMove = Move.fromString(""); 124 } 125 state.makeMove(opponentsMove); 126 } 127 Move m = getBestMove(myTime); 128 if (m == null) 129 output.println("ACK! Got null?"); 130 state.makeMove(m); 131 return m; 132 } 133 private static List<Move> sortList(List<Move> a, 134 List<Integer> vs){ 135 //In Python: [x[1] for x in sorted(zip(vs, a))]. Thanks, Java. 136 137 List<Integer> indices = new ArrayList<Integer>(); 138 int j = -1; 139 for(int v: vs){ 140 j++; 141 int i = 0; 142 for(; i < indices.size(); i++){ 143 if (v < vs.get(indices.get(i))) 144 break; 145 } 146 indices.add(i, j); 147 } 148 List<Move> lst = new ArrayList<Move>(); 149 for(int i = 0; i < indices.size(); i++){ 150 lst.add(a.get(indices.get(i))); 151 } 152 return lst; 153 } 154 155 /** 156 * Return the moves for a given state in the order to search them. 157 * 158 * @param firstMove A preference for the first move; if it is a possible 159 * first move, place it first. 160 */ 161 private List<Move> sortedMoves(GameState gameState, 162 Move firstMove){ 163 List<Move> answer = new ArrayList<Move>(gameState.legalMoves()); 164 // because firstMove often comes from our collision-ignoring 165 // hash table, it may not actually be a valid move. 166 167 List<Integer> values = new ArrayList<Integer>(); 168 for (Move m: answer){ 169 values.add(-getExperience(m)); 170 } 171 List<Move> ans = sortList(answer, values); 172 if (firstMove != null && ans.remove(firstMove)) 173 ans.add(0, firstMove); 174 return ans; 175 } 176 177 /** 178 * This method is called concurrently by several threads, all on 179 * the same threadState. They find the best variation for this 180 * state, communicating by the curMove, movesToSearch, curAlpha, 181 * curBeta, threadBestVariation variables. 182 */ 183 private void threadSearch(GameState threadState){ 184 while(true){ 185 Move m; 186 synchronized(threadLock) { 187 if (curMove == movesToSearch.size()) 188 break; 189 m = movesToSearch.get(curMove); 190 curMove++; 191 } 192 Map<Position, Piece> moveMap = m.toMap(threadState); 193 threadState.reversibleMakeMove(moveMap, true); 194 Variation v = getValue(threadState, threadDepth-1, -curBeta, -curAlpha); 195 threadState.reversibleMakeMove(moveMap, false); 196 if (Thread.currentThread().isInterrupted()) { 197 return; 198 } 199 v = v.cons(m); 200 201 synchronized(threadLock) { 202 if (v.getValue() > threadBestVariation.getValue()) { 203 threadBestVariation = v; 204 if (v.getValue() > curAlpha){ 205 curAlpha = v.getValue(); 206 } 207 } 208 if (threadBestVariation.getValue() >= curBeta) { 209 break; 210 } 211 } 212 } 213 } 214 215 /** 216 * Start a new threadSearch on the current state. 217 */ 218 public void run(){ 219 threadSearch(state.copy()); //Since searching mutates the state. 220 } 221 222 private synchronized void incrementCounter(){ 223 numEvals++; 224 } 225 226 /** 227 * Get the value of this move from the experience table, default 0. 228 * Thread-safe. 229 */ 230 private int getExperience(Move m){ 231 Integer v; 232 synchronized (experience) { 233 v = experience.get(m); 234 } 235 if (v == null) 236 return 0; 237 return v; 238 239 } 240 241 /** 242 * Update the experience table with a given good Variation. 243 * Thread-safe. 244 */ 245 private void updateExperience(Variation v){ 246 synchronized (experience) { 247 int val = getExperience(v.getMove()); 248 experience.put(v.getMove(), (1 << v.getDepth()) + val); 249 } 250 } 251 252 /** 253 * Returns any variation that the current player can force to have 254 * value > beta, or the best possible variation, given the 255 * current state. Static evaluate at a given depth. 256 */ 257 private Variation getValue(GameState gameState, int depth, int alpha, int beta){ 258 Variation lastTime = lookupTable.getTable(gameState); 259 if (lastTime != null && lastTime.getDepth() >= depth) { 260 return lastTime; //(probably) already seen. 261 } 262 Color c = gameState.getWinner(); 263 if (c != Color.NONE){ 264 incrementCounter(); 265 if (c == gameState.getCurrentColor()) 266 return new Variation(INF + depth); //Incentive to win early 267 else 268 return new Variation(-INF - depth); 269 } 270 if (depth == 0){ 271 incrementCounter(); 272 return new Variation(heuristic.heuristic(gameState)); 273 } 274 Variation myBestVariation = new Variation(-2*INF); 275 Move firstMove = null; 276 //If possible, do the best move from last time (at a smaller 277 //depth); this is probably a good move, so it will improve 278 //alpha and likely allow us to break out. 279 if (lastTime != null) 280 firstMove = lastTime.getMove(); 281 for(Move m: sortedMoves(gameState, firstMove)){ 282 if (myBestVariation.getValue() > alpha) 283 alpha = myBestVariation.getValue(); 284 Map<Position, Piece> moveMap = m.toMap(gameState); 285 gameState.reversibleMakeMove(moveMap, true); 286 Variation v = getValue(gameState, depth-1, -beta, -alpha); 287 gameState.reversibleMakeMove(moveMap, false); 288 if (Thread.currentThread().isInterrupted()) { 289 return null; 290 } 291 v = v.cons(m); //Also negates v.getValue 292 if (v.getValue() > myBestVariation.getValue()) { 293 myBestVariation = v; 294 if (v.getValue() >= beta) { 295 break; 296 } 297 } 298 } 299 updateExperience(myBestVariation); 300 lookupTable.update(gameState, myBestVariation); 301 return myBestVariation; 302 } 303 304 /** 305 * Use NUM_THREADS threads to compute the best variation from 306 * state to some depth. First estimate alpha and beta based off 307 * the most likely answer, taking the threads with you. Then 308 * split the rest of the possible moves up among the threads, and 309 * let them do each move with a single thread. 310 * 311 * @param principal The best variation for this position at the previous 312 * depth. 313 * @param depth The depth to run to. 314 * @param timeLimit The millisecond clock time to quit at. 315 * @param alpha, beta The alpha and beta values for this state. 316 */ 317 private Variation threadedGetBestMove(int depth, int alpha, int beta, 318 Variation principal, long timeLimit){ 319 Variation lastTime = lookupTable.getTable(state); 320 if (lastTime != null && lastTime.getDepth() >= depth) { 321 // Don't try to use the lookup table here; it isn't too costly to 322 // compute, and we want to avoid collisions. 323 //return lastTime; 324 } 325 Color c = state.getWinner(); 326 if (c != Color.NONE){ 327 incrementCounter(); 328 if (c == state.getCurrentColor()){ 329 return new Variation(INF + depth); //Incentive to win early 330 } else { 331 return new Variation(-INF - depth); 332 } 333 } 334 if (depth == 0){ 335 incrementCounter(); 336 return new Variation(heuristic.heuristic(state)); 337 } 338 if (principal == null) 339 principal = new Variation(0); 340 Move firstMove = principal.getMove(); 341 List<Move> moves = sortedMoves(state, firstMove); 342 firstMove = moves.get(0); // firstMove could have been null 343 moves.remove(firstMove); 344 Map<Position, Piece> moveMap = firstMove.toMap(state); 345 state.reversibleMakeMove(moveMap, true); 346 threadBestVariation = threadedGetBestMove(depth-1, -beta, -alpha, principal.cdr(), timeLimit); 347 state.reversibleMakeMove(moveMap, false); 348 if (System.currentTimeMillis() > timeLimit || 349 Thread.currentThread().isInterrupted()){ 350 return null; 351 } 352 threadBestVariation = threadBestVariation.cons(firstMove); 353 if (threadBestVariation.getValue() > alpha) 354 alpha = threadBestVariation.getValue(); 355 356 boolean killThreads = false; 357 if (moves.size() == 0) { 358 } else if (threadBestVariation.getValue() >= beta) { 359 } else { 360 movesToSearch = moves; 361 curMove = 0; 362 curAlpha = alpha; 363 curBeta = beta; 364 threadDepth = depth; 365 366 //output.println("Spawning threads " + NUM_THREADS); 367 //All set to spawn threads. 368 List<Thread> threads = new ArrayList<Thread>(); 369 int num_threads = Math.min(NUM_THREADS, moves.size()); 370 for (int i = 0; i < num_threads; i++) { 371 Thread t = new Thread(this); 372 t.start(); 373 threads.add(t); 374 } 375 for (Thread t: threads){ 376 if (killThreads){ 377 t.interrupt(); 378 continue; 379 } 380 while (t.isAlive()) { 381 if (System.currentTimeMillis() > timeLimit){ 382 t.interrupt(); 383 killThreads = true; 384 break; 385 } 386 try { 387 t.join(100); 388 } catch (InterruptedException e) { 389 output.println("Interrupted by GraphicUI."); 390 //Asked to die by GraphicUI 391 Thread.currentThread().interrupt(); 392 t.interrupt(); 393 killThreads = true; 394 break; 395 } 396 } 397 } 398 if (killThreads) { //Interrupted them, now need to rejoin them 399 for (Thread t: threads){ 400 try { 401 t.join(); 402 } catch (InterruptedException e) { 403 output.println("Interrupted by GraphicUI."); 404 Thread.currentThread().interrupt(); 405 try { 406 t.join(); 407 } catch (InterruptedException e2) { 408 output.println("Interrupted twice? Dying badly."); 409 } 410 } 411 } 412 } 413 } 414 //Only update if it succeeded, but return answer anyway 415 if (killThreads) { 416 Thread.currentThread().interrupt(); 417 } else { 418 updateExperience(threadBestVariation); 419 lookupTable.update(state, threadBestVariation); 420 } 421 return threadBestVariation; 422 } 423 424 /** Using iterated deepening, compute the best move until 425 * a time limit. 426 * @param lowTimeLimit The time after which you shouldn't 427 * start a new depth. 428 * @param timeLimit The time after which a search at a given depth 429 * should break out. 430 */ 431 private Move getBestMoveID(long lowTimeLimit, long timeLimit){ 432 Variation ans = new Variation(0); 433 long startTime = System.currentTimeMillis(); 434 Thread.interrupted(); //clear interrupted status 435 output.println("Current value " + heuristic.heuristic(state)); 436 for(int depth = 3; ; depth++){ 437 numEvals = 0; 438 Variation suggested = threadedGetBestMove(depth, -2*INF, 2*INF, ans, timeLimit); 439 if (suggested != null) 440 ans = suggested; 441 if (suggested == null || Thread.currentThread().isInterrupted()) { 442 output.println("Breaking out depth " + depth + " in " + 443 ((System.currentTimeMillis() - startTime) * 0.001) + 444 " seconds."); 445 break; 446 } 447 if (ans.getValue() >= INF){ 448 output.println("Breaking out to win."); 449 break; 450 } else if (ans.getValue() <= -INF){ 451 output.println("Breaking out to lose."); 452 break; 453 } 454 double timePassed = (System.currentTimeMillis() - startTime) * 0.001; 455 output.println("Searched to depth " + depth + " in " + numEvals + " evaluations and " + timePassed + " seconds, value " + suggested.getValue() + " move " + suggested.getMove() + "."); 456 if (System.currentTimeMillis() > lowTimeLimit || 457 timePassed > 5.0 && depth > 13) 458 break; 459 } 460 if (ans.getMove() == null) { 461 output.println("ACK!!!! NULL MOVE!!!"); 462 return (new ArrayList<Move>(state.legalMoves())).get(0); 463 } 464 return ans.getMove(); 465 } 466 467 /** 468 * Given the amount of player time left, compute the best move to 469 * make. 470 * 471 * @param timeLeft the number of milliseconds on this player's clock. 472 */ 473 private Move getBestMove(long timeLeft){ 474 numEvals = 0; 475 long startTime = System.currentTimeMillis(); 476 long timeLimitDiff = 20000; //Quit at 20 seconds 477 if (timeLeft > 0 && timeLeft < 225000){ 478 timeLimitDiff = timeLeft / 15; 479 } 480 if (state.legalMoves().size() == 1 || timeLimitDiff < 100){ 481 Move m = (new ArrayList<Move>(state.legalMoves())).get(0); 482 if(state.legalMoves().size() == 1) 483 output.println("Forced to make move " + m + "."); 484 else if(timeLimitDiff < 100) 485 output.println("Time low, making first move " + m + "."); 486 return m; 487 } 488 489 //Decay the experience heuristic over time. 490 for(Map.Entry<Move, Integer> e: experience.entrySet()) { 491 e.setValue(e.getValue() / 2); 492 } 493 long lowTimeLimitDiff = timeLimitDiff / 3; 494 output.println("Max time: " + ((timeLimitDiff) / 1000.0) + "."); 495 return getBestMoveID(lowTimeLimitDiff + startTime, timeLimitDiff + startTime); 496 } 497 498 /** 499 * Has the player make a series of moves. Used to get the AI player up to 500 * speed on a newly loaded game 501 * 502 * @param game 503 * the new game 504 * 505 * @modifies state 506 * 507 * @requires game not null 508 */ 509 public void update(Game game) 510 { 511 state = new GameState(); 512 for (int i = 0; i < game.getMoveHistory().size() - 1; i++) 513 { 514 state.makeMove(game.getMoveHistory().get(i)); 515 } 516 517 if (this.getColor() != game.getCurrentPlayer().getColor()) 518 { 519 state.makeMove(game.getMoveHistory().get( 520 game.getMoveHistory().size() - 1)); 521 } 522 } 523 }