diff --git a/README.md b/README.md index 28d6bdeef..0a56ddd51 100644 --- a/README.md +++ b/README.md @@ -9,24 +9,74 @@ The goal is to build a strong UCT chess AI following the same type of techniques We will need to do this with a distributed project, as it requires a huge amount of compute. -Please visit the LCZero forum to discuss: https://groups.google.com/forum/#!forum/lczero +Please visit the LCZero forum to discuss: https://groups.google.com/forum/#!forum/lczero, or the github issues. # Contributing -The server is live at http://lczero.org/. Please download the client and give it a try: https://github.com/glinscott/leela-chess/releases. More information on getting started here: https://github.com/glinscott/leela-chess/wiki. +For precompiled binaries, see: +* [https://github.com/glinscott/leela-chess/wiki](wiki) +* [https://github.com/glinscott/leela-chess/wiki/Getting-Started](wiki/Getting-Started) +For live status: http://lczero.org + +The rest of this page is for users who want to compile the code themselves. Of course, we also appreciate code reviews, pull requests and Windows testers! -NOTE: The steps below are not required -- only for those that want to experiment with generating their own data. +# Compiling + +## Requirements + +* GCC, Clang or MSVC, any C++14 compiler +* boost 1.58.x or later (libboost-all-dev on Debian/Ubuntu) +* BLAS Library: OpenBLAS (libopenblas-dev) or (optionally) Intel MKL +* zlib library (zlib1g & zlib1g-dev on Debian/Ubuntu) +* Standard OpenCL C headers (opencl-headers on Debian/Ubuntu, or at + https://github.com/KhronosGroup/OpenCL-Headers/tree/master/opencl22/) +* OpenCL ICD loader (ocl-icd-libopencl1 on Debian/Ubuntu, or reference implementation at https://github.com/KhronosGroup/OpenCL-ICD-Loader) +* An OpenCL capable device, preferably a very, very fast GPU, with recent + drivers is strongly recommended (OpenCL 1.2 support should be enough, even + OpenCL 1.1 might work). If you do not have a GPU, modify config.h in the + source and remove the line that says `#define USE_OPENCL`. +* Tensorflow 1.4 or higher (for training) +* The program has been tested on Linux. + +## Example of compiling - Ubuntu 16.04 + + # Install dependencies + sudo apt install libboost-all-dev libopenblas-dev opencl-headers ocl-icd-libopencl1 ocl-icd-opencl-dev zlib1g-dev + + # Test for OpenCL support & compatibility + sudo apt install clinfo && clinfo + + # Clone github repo + git clone git@github.com:glinscott/leela-chess.git + cd leela-chess + git submodule update --init --recursive + mkdir build && cd build + + # Configure, build and run tests + cmake .. + make + ./tests + +# Compiling Client + +See https://github.com/glinscott/leela-chess/tree/master/go/src/client/README.md. +This client will produce self-play games and upload them to http://lczero.org. +A central server uses these self-play games as input to the training process. ## Weights -The weights from the distributed training are downloadable from http://lczero.org/networks, the best one is at the top. +The weights from the distributed training are downloadable from http://lczero.org/networks, the best one is the top network that has some Games played on it. Weights that we trained to prove the engine was solid are here https://github.com/glinscott/lczero-weights. Currently, the best weights were obtained through supervised learning on a human dataset with elo ratings > 2000. # Training a new net using self-play +Running the Training is not required to help the project, only the central server needs to do this. +The distributed part is running the client to create self-play games. Those games are uploaded +http://lczero.org, and used as the input to the training process. + After compiling lczero (see below), try the following: ``` cd build @@ -94,47 +144,6 @@ automatically resume using the tensorflow checkpoint. You can use this to adjust learning rates, etc. -# Compiling - -## Requirements - -* GCC, Clang or MSVC, any C++14 compiler -* boost 1.58.x or later (libboost-all-dev on Debian/Ubuntu) -* BLAS Library: OpenBLAS (libopenblas-dev) or (optionally) Intel MKL -* zlib library (zlib1g & zlib1g-dev on Debian/Ubuntu) -* Standard OpenCL C headers (opencl-headers on Debian/Ubuntu, or at - https://github.com/KhronosGroup/OpenCL-Headers/tree/master/opencl22/) -* OpenCL ICD loader (ocl-icd-libopencl1 on Debian/Ubuntu, or reference implementation at https://github.com/KhronosGroup/OpenCL-ICD-Loader) -* An OpenCL capable device, preferably a very, very fast GPU, with recent - drivers is strongly recommended (OpenCL 1.2 support should be enough, even - OpenCL 1.1 might work). If you do not have a GPU, modify config.h in the - source and remove the line that says `#define USE_OPENCL`. -* Tensorflow 1.4 or higher (for training only) -* The program has been tested on Linux. - -## Example of compiling - Ubuntu 16.04 - - # Install dependencies - sudo apt install libboost-all-dev libopenblas-dev opencl-headers ocl-icd-libopencl1 ocl-icd-opencl-dev zlib1g-dev - - # Test for OpenCL support & compatibility - sudo apt install clinfo && clinfo - - # Clone github repo - git clone git@github.com:glinscott/leela-chess.git - cd leela-chess - git submodule update --init --recursive - mkdir build && cd build - - # Configure, build and run tests - cmake .. - make - ./tests - -# Compiling Client - -See https://github.com/glinscott/leela-chess/tree/master/go/src/client/README.md. - # Other projects * [mokemokechicken/reversi-alpha-zero](https://github.com/mokemokechicken/reversi-alpha-zero) diff --git a/go/src/client/main.go b/go/src/client/main.go index 25bea69e8..4093bd04c 100644 --- a/go/src/client/main.go +++ b/go/src/client/main.go @@ -28,7 +28,7 @@ var HOSTNAME = flag.String("hostname", "http://162.217.248.187", "Address of the var USER = flag.String("user", "", "Username") var PASSWORD = flag.String("password", "", "Password") var GPU = flag.Int("gpu", -1, "ID of the OpenCL device to use (-1 for default, or no GPU)") -var DEBUG = flag.Bool("debug", false, "Enable debug mode to see verbose output") +var DEBUG = flag.Bool("debug", false, "Enable debug mode to see verbose output and save logs") type Settings struct { User string @@ -76,11 +76,11 @@ func getExtraParams() map[string]string { return map[string]string{ "user": *USER, "password": *PASSWORD, - "version": "3", + "version": "4", } } -func uploadGame(httpClient *http.Client, path string, pgn string, nextGame client.NextGameResponse) error { +func uploadGame(httpClient *http.Client, path string, pgn string, nextGame client.NextGameResponse, retryCount uint) error { extraParams := getExtraParams() extraParams["training_id"] = strconv.Itoa(int(nextGame.TrainingId)) extraParams["network_id"] = strconv.Itoa(int(nextGame.NetworkId)) @@ -96,6 +96,8 @@ func uploadGame(httpClient *http.Client, path string, pgn string, nextGame clien body := &bytes.Buffer{} _, err = body.ReadFrom(resp.Body) if err != nil { + time.Sleep(time.Second * (2 << retryCount)) + err = uploadGame(httpClient, path, pgn, nextGame, retryCount+1) return err } resp.Body.Close() @@ -180,7 +182,7 @@ func (c *CmdWrapper) launch(networkPath string, args []string, input bool) { } } -func playMatch(baselinePath string, candidatePath string, params []string, flip bool) (int, string) { +func playMatch(baselinePath string, candidatePath string, params []string, flip bool) (int, string, error) { baseline := CmdWrapper{} baseline.launch(baselinePath, params, true) defer baseline.Input.Close() @@ -230,24 +232,29 @@ func playMatch(baselinePath string, candidatePath string, params []string, flip io.WriteString(p.Input, "position startpos"+move_history+"\n") io.WriteString(p.Input, "go\n") - best_move := <-p.BestMove - err := game.MoveStr(best_move) - if err != nil { - log.Println("Error decoding: " + best_move + " for game:\n" + game.String()) - log.Fatal(err) - } - if len(move_history) == 0 { - move_history = " moves" + select { + case best_move := <-p.BestMove: + err := game.MoveStr(best_move) + if err != nil { + log.Println("Error decoding: " + best_move + " for game:\n" + game.String()) + return 0, "", err + } + if len(move_history) == 0 { + move_history = " moves" + } + move_history += " " + best_move + turn += 1 + case <-time.After(60 * time.Second): + log.Println("Bestmove has timed out, aborting match") + return 0, "", errors.New("timeout") } - move_history += " " + best_move - turn += 1 } chess.UseNotation(chess.AlgebraicNotation{})(game) - return result, game.String() + return result, game.String(), nil } -func train(networkPath string, params []string) (string, string) { +func train(networkPath string, count int, params []string) (string, string) { // pid is intended for use in multi-threaded training pid := os.Getpid() @@ -268,6 +275,13 @@ func train(networkPath string, params []string) (string, string) { } } + if *DEBUG { + logs_dir := path.Join(dir, fmt.Sprintf("logs-%v", pid)) + os.MkdirAll(logs_dir, os.ModePerm) + logfile := path.Join(logs_dir, fmt.Sprintf("%s.log", time.Now().Format("20060102150405"))) + params = append(params, "-l"+logfile) + } + num_games := 1 train_cmd := fmt.Sprintf("--start=train %v %v", pid, num_games) params = append(params, train_cmd) @@ -280,7 +294,7 @@ func train(networkPath string, params []string) (string, string) { log.Fatal(err) } - return path.Join(train_dir, "training.0.gz"), c.Pgn + return path.Join(train_dir, "training."+fmt.Sprintf("%d", count)+".gz"), c.Pgn } func getNetwork(httpClient *http.Client, sha string, clearOld bool) (string, error) { @@ -307,7 +321,7 @@ func getNetwork(httpClient *http.Client, sha string, clearOld bool) (string, err return path, nil } -func nextGame(httpClient *http.Client) error { +func nextGame(httpClient *http.Client, count int) error { nextGame, err := client.NextGame(httpClient, *HOSTNAME, getExtraParams()) if err != nil { return err @@ -327,16 +341,19 @@ func nextGame(httpClient *http.Client) error { if err != nil { return err } - result, pgn := playMatch(networkPath, candidatePath, params, nextGame.Flip) - client.UploadMatchResult(httpClient, *HOSTNAME, nextGame.MatchGameId, result, pgn, getExtraParams()) + result, pgn, err := playMatch(networkPath, candidatePath, params, nextGame.Flip) + if err != nil { + return err + } + go client.UploadMatchResult(httpClient, *HOSTNAME, nextGame.MatchGameId, result, pgn, getExtraParams()) return nil } else if nextGame.Type == "train" { networkPath, err := getNetwork(httpClient, nextGame.Sha, true) if err != nil { return err } - trainFile, pgn := train(networkPath, params) - uploadGame(httpClient, trainFile, pgn, nextGame) + trainFile, pgn := train(networkPath, count, params) + go uploadGame(httpClient, trainFile, pgn, nextGame, 0) return nil } @@ -358,13 +375,16 @@ func main() { } httpClient := &http.Client{} - for { - err := nextGame(httpClient) + start := time.Now() + for i := 0; ; i++ { + err := nextGame(httpClient, i) if err != nil { log.Print(err) log.Print("Sleeping for 30 seconds...") time.Sleep(30 * time.Second) continue } + elapsed := time.Since(start) + log.Printf("Completed %d games in %s time", i, elapsed) } } diff --git a/src/Network.cpp b/src/Network.cpp index fc61df16e..cc2bbae08 100644 --- a/src/Network.cpp +++ b/src/Network.cpp @@ -48,7 +48,6 @@ #include "UCTNode.h" #endif -#include "Utils.h" #include "Random.h" #include "Network.h" #include "NNCache.h" @@ -874,8 +873,11 @@ T relative_difference(T a, T b) { return std::max(fabs((fa - fb) / fa), fabs((fa - fb) / fb)); } -void compare_net_outputs(std::vector& data, - std::vector& ref) { +bool compare_net_outputs(std::vector& data, + std::vector& ref, + bool display_only = false, + std::string info = "") { + auto almost_equal = true; // The idea is to allow an OpenCL error > 5% every SELFCHECK_MIN_EXPANSIONS // correct expansions. As the num_expansions increases between errors > 5%, // we'll allow more errors to occur (max 3) before crashing. As if it @@ -885,16 +887,20 @@ void compare_net_outputs(std::vector& data, static std::atomic num_expansions{min_correct_expansions}; num_expansions = std::min(num_expansions + 1, 3 * min_correct_expansions); - // We accept an error up to 5%, but output values + // We accept an error up to 10%, but output values // smaller than 1/1000th are "rounded up" for the comparison. - constexpr float relative_error = 5e-2f; + constexpr float relative_error = 10e-2f; for (auto idx = size_t{0}; idx < data.size(); ++idx) { auto err = relative_difference(data[idx], ref[idx]); - if (err > relative_error) { - printf("Error in OpenCL calculation: expected %f got %f (%lli" + if (display_only) { + myprintf("compare_net_outputs %s idx %d data %f ref %f err=%f\n", + info.c_str(), idx, data[idx], ref[idx], err); + } else if (err > relative_error) { + almost_equal = false; + myprintf("Error in OpenCL calculation: expected %f got %f (%lli" "(error=%f%%)\n", ref[idx], data[idx], num_expansions.load(), err * 100.0); if (num_expansions < min_correct_expansions) { - printf("Update your GPU drivers or reduce the amount of games " + myprintf_so("Update your GPU drivers or reduce the amount of games " "played simultaneously.\n"); throw std::runtime_error("OpenCL self-check mismatch."); } @@ -903,6 +909,7 @@ void compare_net_outputs(std::vector& data, } } } + return almost_equal; } #endif @@ -989,8 +996,25 @@ Network::Netresult Network::get_scored_moves_internal(const BoardHistory& pos, N auto cpu_policy_data = std::vector(policy_data.size()); auto cpu_value_data = std::vector(value_data.size()); forward_cpu(input_data, cpu_policy_data, cpu_value_data); - compare_net_outputs(policy_data, cpu_policy_data); - compare_net_outputs(value_data, cpu_value_data); + auto almost_equal = compare_net_outputs(policy_data, cpu_policy_data); + almost_equal &= compare_net_outputs(value_data, cpu_value_data); + if (!almost_equal) { + myprintf("PGN\n%s\nEND\n", pos.pgn().c_str()); + // Compare again but with debug info + compare_net_outputs(policy_data, cpu_policy_data, true, "orig policy"); + compare_net_outputs(value_data, cpu_value_data, true, "orig value"); + // Call opencl.forward again to see if the error is reproduceable. + std::vector value_data_retry(Network::NUM_VALUE_INPUT_PLANES * width * height); + std::vector policy_data_retry(Network::NUM_OUTPUT_POLICY); + opencl.forward(input_data, policy_data_retry, value_data_retry); + auto almost_equal_retry = compare_net_outputs(policy_data_retry, policy_data, true, "retry policy"); + almost_equal_retry &= compare_net_outputs(value_data_retry, value_data, true, "retry value"); + if (!almost_equal_retry) { + throw std::runtime_error("OpenCL retry self-check mismatch."); + } else { + myprintf("compare_net_outputs retry was ok\n"); + } + } } #endif diff --git a/src/Parameters.cpp b/src/Parameters.cpp index 1a3e2690b..8e0c6efd6 100644 --- a/src/Parameters.cpp +++ b/src/Parameters.cpp @@ -41,8 +41,10 @@ using namespace Utils; // Configuration flags bool cfg_allow_pondering; +int cfg_max_threads; int cfg_num_threads; int cfg_max_playouts; +int cfg_max_visits; int cfg_lagbuffer_cs; int cfg_resignpct; int cfg_noise; @@ -65,10 +67,11 @@ bool cfg_quiet; void Parameters::setup_default_parameters() { cfg_allow_pondering = true; int num_cpus = std::thread::hardware_concurrency(); - //cfg_num_threads = std::max(1, std::min(num_cpus, MAX_CPUS)); + cfg_max_threads = std::max(1, std::min(num_cpus, MAX_CPUS)); cfg_num_threads = 2; - cfg_max_playouts = 800; + cfg_max_playouts = MAXINT_DIV2; + cfg_max_visits = 800; cfg_lagbuffer_cs = 100; #ifdef USE_OPENCL cfg_gpus = { }; diff --git a/src/Parameters.h b/src/Parameters.h index 3a3ff7279..a5d318d6f 100644 --- a/src/Parameters.h +++ b/src/Parameters.h @@ -22,9 +22,12 @@ #include #include +constexpr int MAXINT_DIV2 = std::numeric_limits::max() / 2; extern bool cfg_allow_pondering; +extern int cfg_max_threads; extern int cfg_num_threads; extern int cfg_max_playouts; +extern int cfg_max_visits; extern int cfg_lagbuffer_cs; extern int cfg_resignpct; extern int cfg_noise; diff --git a/src/Position.cpp b/src/Position.cpp index 9c816bcd0..57d50bcbe 100644 --- a/src/Position.cpp +++ b/src/Position.cpp @@ -1126,7 +1126,7 @@ std::string Position::move_to_san(Move m) const { result += file; result += rank; } - } else if (type_of(pc) == PAWN && board[to] != NO_PIECE) { + } else if (type_of(pc) == PAWN && (board[to] != NO_PIECE || type_of(m) == ENPASSANT)) { result += file; } @@ -1417,6 +1417,13 @@ void BoardHistory::do_move(Move m) { positions.back().do_move(m, *states.back()); } +bool BoardHistory::undo_move() { + if (positions.size() == 1) return false; + states.pop_back(); + positions.pop_back(); + return true; +} + std::string BoardHistory::pgn() const { std::string result; for (int i = 0; i< static_cast(positions.size()) - 1; ++i) { diff --git a/src/Position.h b/src/Position.h index c5cf78472..99cc472e9 100644 --- a/src/Position.h +++ b/src/Position.h @@ -409,6 +409,7 @@ struct BoardHistory { void set(const std::string& fen); BoardHistory shallow_clone() const; void do_move(Move m); + bool undo_move(); std::string pgn() const; }; diff --git a/src/Tuner.cpp b/src/Tuner.cpp index d27911aa3..9d4b79a29 100644 --- a/src/Tuner.cpp +++ b/src/Tuner.cpp @@ -398,7 +398,7 @@ std::string Tuner::tune_sgemm(const int m, const int n, const int k, } } if (best_time == 0) { - printf("Failed to find a working configuration.\nCheck your OpenCL drivers.\n"); + myprintf_so("Failed to find a working configuration.\nCheck your OpenCL drivers.\n"); throw std::runtime_error("Tuner failed to find working configuration."); } return best_params; diff --git a/src/UCI.cpp b/src/UCI.cpp index bf7d8013a..e55ab00af 100644 --- a/src/UCI.cpp +++ b/src/UCI.cpp @@ -34,28 +34,7 @@ #include "Utils.h" using namespace std; - -enum SyncCout { IO_LOCK, IO_UNLOCK }; - -std::ostream& operator<<(std::ostream&, SyncCout); - -#define sync_cout std::cout << IO_LOCK -#define sync_endl std::endl << IO_UNLOCK - -/// Used to serialize access to std::cout to avoid multiple threads writing at -/// the same time. - -std::ostream& operator<<(std::ostream& os, SyncCout sc) { - static mutex m; - - if (sc == IO_LOCK) - m.lock(); - - if (sc == IO_UNLOCK) - m.unlock(); - - return os; -} +using namespace Utils; namespace { @@ -107,7 +86,7 @@ namespace { while (is >> token) value += string(" ", value.empty() ? 0 : 1) + token; - sync_cout << "No such option: " << name << sync_endl; + myprintf_so("No such option: %s\n", name.c_str()); } @@ -115,7 +94,7 @@ namespace { // the thinking time and other parameters from the input string, then starts // the search. - void go(BoardHistory& bh, istringstream& is) { + void go(UCTSearch& search, BoardHistory& bh, istringstream& is) { Limits = LimitsType(); string token; @@ -132,10 +111,9 @@ namespace { // TODO(gary): This just does the search on the UI thread... - auto search = std::make_unique(bh.shallow_clone()); - Move move = search->think(); + Move move = search.think(bh.shallow_clone()); bh.do_move(move); - printf("bestmove %s\n", UCI::move(move).c_str()); + myprintf_so("bestmove %s\n", UCI::move(move).c_str()); } // called when receiving the 'perft Depth' command @@ -145,12 +123,13 @@ namespace { Depth depth = Depth(d); uint64_t total = UCI::perft(bh, depth); - sync_cout << "Total: " << total << sync_endl; + myprintf_so("Total: %lld\n", total); } // Return the score from the self-play game int play_one_game(BoardHistory& bh) { + auto search = std::make_unique(bh.shallow_clone()); for (int game_ply = 0; game_ply < 450; ++game_ply) { if (bh.cur().is_draw()) { return 0; @@ -166,8 +145,7 @@ int play_one_game(BoardHistory& bh) { } } Limits.startTime = now(); - auto search = std::make_unique(bh.shallow_clone()); - Move move = search->think(); + Move move = search->think(bh.shallow_clone()); bh.do_move(move); } @@ -183,8 +161,8 @@ int play_one_game() { Training::clear_training(); int game_score = play_one_game(bh); - printf("PGN\n%s\nEND\n", bh.pgn().c_str()); - printf("Score: %d\n", game_score); + myprintf_so("PGN\n%s\nEND\n", bh.pgn().c_str()); + myprintf_so("Score: %d\n", game_score); return game_score; } @@ -201,7 +179,7 @@ void generate_training_games(istringstream& is) { fs::path dir("data-" + suffix); if (!fs::exists(dir)) { fs::create_directories(dir); - printf("Created dirs %s\n", dir.string().c_str()); + myprintf_so("Created dirs %s\n", dir.string().c_str()); } auto chunker = OutputChunker{dir.string() + "/training", true}; for (int64_t i = 0; i < num_games; i++) { @@ -234,7 +212,7 @@ Bg3 15. f4 d6 16. cxd6+ Ke8 17. Kg1 Bd7 18. a4 Rd8 {0.50s} 19. a5 Ra8 {0.54s} PGNParser parser(ss); auto game = parser.parse(); - printf("%s\n", game->bh.cur().fen().c_str()); + myprintf_so("%s\n", game->bh.cur().fen().c_str()); /* Network::DebugRawData debug_data; @@ -245,9 +223,9 @@ Bg3 15. f4 d6 16. cxd6+ Ke8 17. Kg1 Bd7 18. a4 Rd8 {0.50s} 19. a5 Ra8 {0.54s} fclose(f); */ - auto search = std::make_unique(std::move(game->bh)); + auto search = std::make_unique(game->bh.shallow_clone()); search->set_quiet(false); - search->think(); + search->think(game->bh.shallow_clone()); } } // namespace @@ -271,8 +249,9 @@ uint64_t UCI::perft(BoardHistory& bh, Depth depth) { nodes += cnt; bh.cur().undo_move(m); } - if (Root) - sync_cout << UCI::move(m) << ": " << cnt << sync_endl; + if (Root) { + myprintf_so("%s: %lld\n", UCI::move(m).c_str(), cnt); + } } return nodes; } @@ -285,21 +264,22 @@ uint64_t UCI::perft(BoardHistory& bh, Depth depth) { /// In addition to the UCI ones, also some additional debug commands are supported. void UCI::loop(const std::string& start) { - string token, cmd = start; - BoardHistory bh; bh.set(Position::StartFEN); + auto search = std::make_unique(bh.shallow_clone()); do { if (start.empty() && !getline(cin, cmd)) // Block here waiting for input or EOF cmd = "quit"; + log_input(cmd); istringstream is(cmd); - token.clear(); // Avoid a stale if getline() returns empty or blank line is >> skipws >> token; + if (token == "quit" || token == "exit") break; + /* // The GUI sends 'ponderhit' to tell us the user has played the expected move. // So 'ponderhit' will be sent if we were told to ponder on the same move the @@ -314,26 +294,66 @@ void UCI::loop(const std::string& start) { Threads.ponder = false; // Switch to normal search */ - if (token == "uci") - sync_cout << "id name lczero\n" - << "uciok" << sync_endl; - + if (token == "uci") { + myprintf_so("id name lczero\nuciok\n"); + } else if (token == "setoption") setoption(is); - else if (token == "go") go(bh, is); + else if (token == "go") go(*search, bh, is); else if (token == "perft") uci_perft(bh, is); else if (token == "position") position(bh, is); // else if (token == "ucinewgame") Search::clear(); - else if (token == "isready") sync_cout << "readyok" << sync_endl; - + else if (token == "isready") { + myprintf_so("readyok\n"); + } // Additional custom non-UCI commands, mainly for debugging else if (token == "train") generate_training_games(is); else if (token == "bench") bench(); - //else if (token == "d") sync_cout << pos << sync_endl; - //else if (token == "eval") sync_cout << Eval::trace(pos) << sync_endl; - else - sync_cout << "Unknown command: " << token << " " << cmd << sync_endl; - - } while (token != "quit" && start.empty()); // Command line args are one-shot + else if (token == "d" || token == "showboard") { + std::stringstream ss; + ss << bh.cur(); + myprintf_so("%s\n", ss.str().c_str()); + } + else if (token == "showfen") { + std::stringstream ss; + ss << bh.cur().fen(); + myprintf_so("%s\n", ss.str().c_str()); + } + else if (token == "showgame") { + std::string result; + for (const auto &p : bh.positions) { + if (result == "") { + result = " "; // first position has no move + } else { + result += UCI::move(p.get_move()) + " "; + } + } + myprintf_so("position startpos%s\n", result.c_str()); + } + else if (token == "showpgn") myprintf_so("%s\n", bh.pgn().c_str()); + else if (token == "undo") myprintf_so(bh.undo_move() ? "Undone\n" : "At first move\n"); + else if (token == "usermove" || token == "play") { + std::string ms; is >> ms; + Move m = UCI::to_move(bh.cur(), ms); + if (m == MOVE_NONE) m = bh.cur().san_to_move(ms); + if (m != MOVE_NONE) { + bh.do_move(m); + myprintf_so("usermove %s\n", UCI::move(m).c_str()); + } + else { + myprintf_so("Illegal move: %s\n", ms.c_str()); + } + } + else if (UCI::to_move(bh.cur(), token) != MOVE_NONE) { + Move m = UCI::to_move(bh.cur(), token); + bh.do_move(m); + myprintf_so("usermove %s\n", UCI::move(m).c_str()); + } + //else if (token == "eval") sync_cout << Eval::trace(pos) << sync_endl; + else if (token != "quit") { + myprintf_so("Unknown command: %s\n", cmd.c_str()); + } + + } while (start.empty()); // Command line args are one-shot } /// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index d6a1f5b4c..f08e04bf5 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -31,10 +31,12 @@ #include #include #include +#include #include "Position.h" #include "Parameters.h" #include "Movegen.h" +#include "UCI.h" #include "UCTNode.h" #include "UCTSearch.h" #include "Utils.h" @@ -337,6 +339,17 @@ UCTNode& UCTNode::get_best_root_child(Color color) { NodeComp(color))->get()); } +size_t UCTNode::count_nodes() const { + auto nodecount = size_t{0}; + if (m_has_children) { + nodecount += m_children.size(); + for (auto& child : m_children) { + nodecount += child->count_nodes(); + } + } + return nodecount; +} + UCTNode* UCTNode::get_first_child() const { if (m_children.empty()) { return nullptr; @@ -347,3 +360,42 @@ UCTNode* UCTNode::get_first_child() const { const std::vector& UCTNode::get_children() const { return m_children; } + +// Search the new_bh backwards and see if we can find the prevroot_full_key. +// May not find it if e.g. the user asked to evaluate some different position. +UCTNode::node_ptr_t UCTNode::find_new_root(Key prevroot_full_key, BoardHistory& new_bh) { + UCTNode::node_ptr_t new_root = nullptr; + std::vector moves; + for (auto pos : boost::adaptors::reverse(new_bh.positions)) { + if (pos.full_key() == prevroot_full_key) { + new_root = find_path(moves); + break; + } + moves.push_back(pos.get_move()); + } + return new_root; +} + +// Take the moves found by find_new_root and try to find the new root node. +// May not find it if the search didn't include that node. +UCTNode::node_ptr_t UCTNode::find_path(std::vector& moves) { + if (moves.size() == 0) { + // TODO this means the current root is actually a match. + // This only happens if you e.g. undo a move. + // For now just ignore this case. + return nullptr; + } + auto move = moves.back(); + moves.pop_back(); + for (auto& node : m_children) { + if (node->get_move() == move) { + if (moves.size() > 0) { + // Keep going recursively through the move list. + return node->find_path(moves); + } else { + return std::move(node); + } + } + } + return nullptr; +} diff --git a/src/UCTNode.h b/src/UCTNode.h index 5af39c998..7fd7c0bbd 100644 --- a/src/UCTNode.h +++ b/src/UCTNode.h @@ -42,6 +42,7 @@ class UCTNode { explicit UCTNode(Move move, float score, float init_eval); UCTNode() = delete; ~UCTNode(); + size_t count_nodes() const; bool first_visit() const; bool has_children() const; bool create_children(std::atomic & nodecount, const BoardHistory& state, float& eval); @@ -66,6 +67,8 @@ class UCTNode { void sort_root_children(Color color); UCTNode& get_best_root_child(Color color); + UCTNode::node_ptr_t find_new_root(Key prevroot_full_key, BoardHistory& new_bh); + UCTNode::node_ptr_t find_path(std::vector& moves); private: void link_nodelist(std::atomic& nodecount, std::vector& nodelist, float init_eval); diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index ab00872e9..d3535c527 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -50,10 +50,12 @@ LimitsType Limits; UCTSearch::UCTSearch(BoardHistory&& bh) : bh_(std::move(bh)) { set_playout_limit(cfg_max_playouts); + set_visit_limit(cfg_max_visits); + m_root = std::make_unique(MOVE_NONE, 0.0f, 0.5f); } void UCTSearch::set_quiet(bool quiet) { - quiet_ = quiet; + quiet_ = quiet; } SearchResult UCTSearch::play_simulation(BoardHistory& bh, UCTNode* const node) { @@ -102,7 +104,7 @@ void UCTSearch::dump_stats(BoardHistory& state, UCTNode& parent) { const Color color = state.cur().side_to_move(); // sort children, put best move on top - m_root.sort_root_children(color); + m_root->sort_root_children(color); if (parent.get_first_child()->first_visit()) { return; @@ -113,10 +115,10 @@ void UCTSearch::dump_stats(BoardHistory& state, UCTNode& parent) { std::string pvstring(tmp); myprintf("%4s -> %7d (V: %5.2f%%) (N: %5.2f%%) PV: ", - tmp.c_str(), - node->get_visits(), - node->get_eval(color)*100.0f, - node->get_score() * 100.0f); + tmp.c_str(), + node->get_visits(), + node->get_eval(color)*100.0f, + node->get_score() * 100.0f); StateInfo si; state.cur().do_move(node->get_move(), si); @@ -132,25 +134,25 @@ Move UCTSearch::get_best_move() { Color color = bh_.cur().side_to_move(); // Make sure best is first - m_root.sort_root_children(color); + m_root->sort_root_children(color); // Check whether to randomize the best move proportional // to the playout counts. if (cfg_randomize) { - m_root.randomize_first_proportionally(); + m_root->randomize_first_proportionally(); } - Move bestmove = m_root.get_first_child()->get_move(); + Move bestmove = m_root->get_first_child()->get_move(); // do we have statistics on the moves? - if (m_root.get_first_child()->first_visit()) { + if (m_root->get_first_child()->first_visit()) { return bestmove; } // should we consider resigning? /* - float bestscore = m_root.get_first_child()->get_eval(color); - int visits = m_root.get_visits(); + float bestscore = m_root->get_first_child()->get_eval(color); + int visits = m_root->get_visits(); // bad score and visited enough if (bestscore < ((float)cfg_resignpct / 100.0f) && visits > 500 @@ -191,18 +193,24 @@ void UCTSearch::dump_analysis(int64_t elapsed, bool force_output) { auto bh = bh_.shallow_clone(); Color color = bh.cur().side_to_move(); - std::string pvstring = get_pv(bh, m_root); - float feval = m_root.get_eval(color); + std::string pvstring = get_pv(bh, *m_root); + float feval = m_root->get_eval(color); float winrate = 100.0f * feval; // UCI-like output wants a depth and a cp. - // convert winrate to a cp estimate ... assume winrate = 1 / (1 + exp(-cp / 650)) - // (650 can be tuned to have an output more or less matching e.g. SF, once both have similar strength) - int cp = -650 * log(1 / feval - 1); + // convert winrate to a cp estimate ... assume winrate = 1 / (1 + exp(-cp / 91)) + // (91 can be tuned to have an output more or less matching e.g. SF, once both have similar strength) + int cp = -91 * log(1 / feval - 1); // same for nodes to depth, assume nodes = 1.8 ^ depth. int depth = log(float(m_nodes)) / log(1.8); - auto visits = m_root.get_visits(); - printf("info depth %d nodes %d nps %.0f score cp %d winrate %5.2f%% time %lld pv %s\n", - depth, visits, 1000.0 * visits / (elapsed + 1), + // To report nodes, use visits. + // - Only includes expanded nodes. + // - Includes nodes carried over from tree reuse. + auto visits = m_root->get_visits(); + // To report nps, use m_playouts to exclude nodes added by tree reuse, + // which is similar to a ponder hit. The user will expect to know how + // fast nodes are being added, not how big the ponder hit was. + myprintf_so("info depth %d nodes %d nps %0.f score cp %d winrate %5.2f%% time %lld pv %s\n", + depth, visits, 1000.0 * m_playouts / (elapsed + 1), cp, winrate, elapsed, pvstring.c_str()); } @@ -210,8 +218,9 @@ bool UCTSearch::is_running() const { return m_run && m_nodes < MAX_TREE_SIZE; } -bool UCTSearch::playout_limit_reached() const { - return m_playouts >= m_maxplayouts; +bool UCTSearch::pv_limit_reached() const { + return m_playouts >= m_maxplayouts + || m_root->get_visits() >= m_maxvisits; } void UCTWorker::operator()() { @@ -228,9 +237,31 @@ void UCTSearch::increment_playouts() { m_playouts++; } -Move UCTSearch::think() { - assert(m_playouts == 0); - assert(m_nodes == 0); +Move UCTSearch::think(BoardHistory&& new_bh) { +#ifndef NDEBUG + auto start_nodes = m_root->count_nodes(); +#endif + + // See if the position is in our previous search tree. + // If not, construct a new m_root. + m_root = m_root->find_new_root(m_prevroot_full_key, new_bh); + if (!m_root) { + m_root = std::make_unique(new_bh.cur().get_move(), 0.0f, 0.5f); + } + + m_playouts = 0; + m_nodes = m_root->count_nodes(); + // TODO: Both UCI and the next line do shallow_clone. + // Could optimize this. + bh_ = new_bh.shallow_clone(); + m_prevroot_full_key = new_bh.cur().full_key(); + +#ifndef NDEBUG + myprintf("update_root, %d -> %d expanded nodes (%.1f%% reused)\n", + start_nodes, + m_nodes.load(), + m_nodes > 0 ? 100.0 * m_nodes.load() / start_nodes : 0); +#endif // set up timing info @@ -238,25 +269,29 @@ Move UCTSearch::think() { m_target_time = get_search_time(); m_start_time = Limits.timeStarted(); - // create a sorted list of legal moves (make sure we play something legal and decent even in time trouble) - float root_eval; - m_root.create_children(m_nodes, bh_, root_eval); + // create a sorted list of legal moves (make sure we + // play something legal and decent even in time trouble) + if (!m_root->has_children()) { + float root_eval; + m_root->create_children(m_nodes, bh_, root_eval); + m_root->update(root_eval); + } if (cfg_noise) { - m_root.dirichlet_noise(0.25f, 0.3f); + m_root->dirichlet_noise(0.25f, 0.3f); } m_run = true; int cpus = cfg_num_threads; ThreadGroup tg(thread_pool); for (int i = 1; i < cpus; i++) { - tg.add_task(UCTWorker(bh_, this, &m_root)); + tg.add_task(UCTWorker(bh_, this, m_root.get())); } bool keeprunning = true; int last_update = 0; do { auto currstate = bh_.shallow_clone(); - auto result = play_simulation(currstate, &m_root); + auto result = play_simulation(currstate, m_root.get()); if (result.valid()) { increment_playouts(); } @@ -277,13 +312,13 @@ Move UCTSearch::think() { // stop the search m_run = false; tg.wait_all(); - if (!m_root.has_children()) { + if (!m_root->has_children()) { return MOVE_NONE; } // display search info - dump_stats(bh_, m_root); - Training::record(bh_, m_root); + dump_stats(bh_, *m_root); + Training::record(bh_, *m_root); int64_t milliseconds_elapsed = now() - m_start_time; if (milliseconds_elapsed > 0) { @@ -301,11 +336,11 @@ void UCTSearch::ponder() { int cpus = cfg_num_threads; ThreadGroup tg(thread_pool); for (int i = 1; i < cpus; i++) { - tg.add_task(UCTWorker(bh_, this, &m_root)); + tg.add_task(UCTWorker(bh_, this, m_root.get())); } do { auto bh = bh_.shallow_clone(); - auto result = play_simulation(bh, &m_root); + auto result = play_simulation(bh, m_root.get()); if (result.valid()) { increment_playouts(); } @@ -316,9 +351,9 @@ void UCTSearch::ponder() { tg.wait_all(); // display search info myprintf("\n"); - dump_stats(bh_, m_root); + dump_stats(bh_, *m_root); - myprintf("\n%d visits, %d nodes\n\n", m_root.get_visits(), (int)m_nodes); + myprintf("\n%d visits, %d expanded nodes\n\n", m_root->get_visits(), (int)m_nodes); } // Returns the amount of time to use for a turn in milliseconds @@ -332,17 +367,28 @@ int UCTSearch::get_search_time() { // Used to check if we've run out of time or reached out playout limit bool UCTSearch::halt_search() { - return m_target_time < 0 ? playout_limit_reached() : m_target_time - 50 < now() - m_start_time; + return m_target_time < 0 ? pv_limit_reached() : m_target_time - 50 < now() - m_start_time; } void UCTSearch::set_playout_limit(int playouts) { static_assert(std::is_convertible::value, "Inconsistent types for playout amount."); if (playouts == 0) { - m_maxplayouts = std::numeric_limits::max(); + // Divide max by 2 to prevent overflow when multithreading. + m_maxplayouts = MAXINT_DIV2; } else { m_maxplayouts = playouts; } } +void UCTSearch::set_visit_limit(int visits) { + static_assert(std::is_convertible::value, "Inconsistent types for visits amount."); + if (visits == 0) { + // Divide max by 2 to prevent overflow when multithreading. + m_maxvisits = MAXINT_DIV2; + } else { + m_maxvisits = visits; + } +} + diff --git a/src/UCTSearch.h b/src/UCTSearch.h index 392ca6d64..b8dac2870 100644 --- a/src/UCTSearch.h +++ b/src/UCTSearch.h @@ -67,13 +67,14 @@ class UCTSearch { static constexpr auto MAX_TREE_SIZE = 40'000'000; UCTSearch(BoardHistory&& bh); - Move think(); + Move think(BoardHistory&& bh); void set_playout_limit(int playouts); + void set_visit_limit(int visits); void set_analyzing(bool flag); void set_quiet(bool flag); void ponder(); bool is_running() const; - bool playout_limit_reached() const; + bool pv_limit_reached() const; void increment_playouts(); bool halt_search(); SearchResult play_simulation(BoardHistory& bh, UCTNode* const node); @@ -85,13 +86,15 @@ class UCTSearch { Move get_best_move(); BoardHistory bh_; - UCTNode m_root{MOVE_NONE, 0.0f, 0.5f}; + Key m_prevroot_full_key{0}; + std::unique_ptr m_root; std::atomic m_nodes{0}; std::atomic m_playouts{0}; std::atomic m_target_time{0}; std::atomic m_start_time{0}; std::atomic m_run{false}; int m_maxplayouts; + int m_maxvisits; bool quiet_ = true; diff --git a/src/Utils.cpp b/src/Utils.cpp index d1a89ba92..399921a76 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -19,6 +19,7 @@ #include "config.h" #include "Utils.h" +#include #include #include #include @@ -94,6 +95,20 @@ void Utils::myprintf(const char *fmt, ...) { } } +void Utils::myprintf_so(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + + if (cfg_logfile_handle) { + std::lock_guard lock(IOmutex); + va_start(ap, fmt); + vfprintf(cfg_logfile_handle, fmt, ap); + va_end(ap); + } +} + static void gtp_fprintf(FILE* file, const std::string& prefix, const char *fmt, va_list ap) { fprintf(file, "%s ", prefix.c_str()); diff --git a/src/Utils.h b/src/Utils.h index b6dfd66f5..69ab4cbe7 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -33,6 +33,7 @@ extern Utils::ThreadPool thread_pool; namespace Utils { void myprintf(const char *fmt, ...); + void myprintf_so(const char *fmt, ...); void gtp_printf(int id, const char *fmt, ...); void gtp_fail_printf(int id, const char *fmt, ...); void log_input(const std::string& input); diff --git a/src/config.h b/src/config.h index e0aad976b..679b9e272 100644 --- a/src/config.h +++ b/src/config.h @@ -31,8 +31,10 @@ #define USE_OPENBLAS #endif //#define USE_MKL +#ifndef FEATURE_USE_CPU_ONLY #define USE_OPENCL #define USE_OPENCL_SELFCHECK +#endif static constexpr int SELFCHECK_PROBABILITY = 2000; static constexpr int SELFCHECK_MIN_EXPANSIONS = 2'000'000; //#define USE_TUNER diff --git a/src/main.cpp b/src/main.cpp index 2afe5f0fc..91dc8b1c8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -40,7 +40,7 @@ using namespace Utils; static void license_blurb() { - printf( + myprintf_so( "LCZero Copyright (C) 2017 Gary Linscott\n" "Based on:" "Leela Chess Copyright (C) 2017 benediamond\n" @@ -59,11 +59,13 @@ static std::string parse_commandline(int argc, char *argv[]) { v_desc.add_options() ("help,h", "Show commandline options.") ("threads,t", po::value()->default_value - (std::min(2, cfg_num_threads)), + (std::min(cfg_num_threads, cfg_max_threads)), "Number of threads to use.") ("playouts,p", po::value(), "Weaken engine by limiting the number of playouts. " "Requires --noponder.") + ("visits,v", po::value(), + "Weaken engine by limiting the number of visits.") ("resignpct,r", po::value()->default_value(cfg_resignpct), "Resign when winrate is less than x%.") ("noise,n", "Apply dirichlet noise to root.") @@ -157,12 +159,14 @@ static std::string parse_commandline(int argc, char *argv[]) { if (vm.count("threads")) { int num_threads = vm["threads"].as(); - if (num_threads > cfg_num_threads) { - myprintf("Clamping threads to maximum = %d\n", cfg_num_threads); - } else if (num_threads != cfg_num_threads) { + if (num_threads > cfg_max_threads) { + myprintf("Clamping threads to maximum = %d\n", cfg_max_threads); + cfg_num_threads = cfg_max_threads; + } else { myprintf("Using %d thread(s).\n", num_threads); cfg_num_threads = num_threads; } + } if (vm.count("seed")) { @@ -205,6 +209,15 @@ static std::string parse_commandline(int argc, char *argv[]) { "Add --noponder if you want a weakened engine.\n"); exit(EXIT_FAILURE); } + if (!vm.count("visits")) { + // If the user specifies playouts they probably + // do not want the default 800 visits. + cfg_max_visits = MAXINT_DIV2; + } + } + + if (vm.count("visits")) { + cfg_max_visits = vm["visits"].as(); } if (vm.count("resignpct")) { @@ -282,7 +295,7 @@ void generate_supervised_data(const std::string& filename) { fs::path dir("supervise-" + fp.stem().string()); if (!fs::exists(dir)) { fs::create_directories(dir); - printf("Created dirs %s\n", dir.string().c_str()); + myprintf_so("Created dirs %s\n", dir.string().c_str()); } auto chunker = OutputChunker{dir.string() + "/training", true, 15000}; @@ -295,10 +308,10 @@ void generate_supervised_data(const std::string& filename) { Training::clear_training(); auto game = parser.parse(); if (game == nullptr) { - printf("Invalid game in %s\n", filename.c_str()); + myprintf_so("Invalid game in %s\n", filename.c_str()); break; } - printf("\rProcessed %d games", ++games); + myprintf_so("\rProcessed %d games", ++games); BoardHistory bh; bh.set(Position::StartFEN); for (int i = 0; i < static_cast(game->bh.positions.size()) - 1; ++i) { diff --git a/src/tests/position_test.cpp b/src/tests/position_test.cpp index f3e365bdb..06ba3e5d3 100644 --- a/src/tests/position_test.cpp +++ b/src/tests/position_test.cpp @@ -12,6 +12,10 @@ class PositionTest: public ::testing::Test { } }; +void mock_shallow_repetitions(BoardHistory&& bh, int rep_cnt) { + EXPECT_EQ(bh.cur().repetitions_count(), rep_cnt); +} + TEST_F(PositionTest, IsDrawStartPosition) { Position pos; StateInfo si; @@ -152,4 +156,47 @@ TEST_F(PositionTest, KeyTest) { EXPECT_EQ(bh_.cur().rule50_count(), 8); EXPECT_TRUE(key == bh_.cur().key()); EXPECT_FALSE(full_key == bh_.cur().full_key()); + + // Longer repetition test with shallow_clone + bh_.set(Position::StartFEN); + bh_.do_move(UCI::to_move(bh_.cur(), "a2a4")); bh_.do_move(UCI::to_move(bh_.cur(), "h7h5")); + bh_.do_move(UCI::to_move(bh_.cur(), "a1a2")); bh_.do_move(UCI::to_move(bh_.cur(), "h8h6")); + bh_.do_move(UCI::to_move(bh_.cur(), "a2a3")); bh_.do_move(UCI::to_move(bh_.cur(), "h6a6")); + bh_.do_move(UCI::to_move(bh_.cur(), "a3b3")); bh_.do_move(UCI::to_move(bh_.cur(), "a6b6")); + bh_.do_move(UCI::to_move(bh_.cur(), "b3c3")); bh_.do_move(UCI::to_move(bh_.cur(), "b6c6")); + bh_.do_move(UCI::to_move(bh_.cur(), "c3d3")); bh_.do_move(UCI::to_move(bh_.cur(), "c6d6")); + bh_.do_move(UCI::to_move(bh_.cur(), "d3e3")); bh_.do_move(UCI::to_move(bh_.cur(), "d6e6")); + bh_.do_move(UCI::to_move(bh_.cur(), "e3f3")); bh_.do_move(UCI::to_move(bh_.cur(), "e6f6")); + bh_.do_move(UCI::to_move(bh_.cur(), "f3g3")); bh_.do_move(UCI::to_move(bh_.cur(), "f6g6")); + bh_.do_move(UCI::to_move(bh_.cur(), "g3h3")); + EXPECT_EQ(bh_.cur().repetitions_count(), 0); + mock_shallow_repetitions(bh_.shallow_clone(), 0); + bh_.do_move(UCI::to_move(bh_.cur(), "g6h6")); + EXPECT_EQ(bh_.cur().repetitions_count(), 0); + mock_shallow_repetitions(bh_.shallow_clone(), 0); + bh_.do_move(UCI::to_move(bh_.cur(), "h3a3")); + EXPECT_EQ(bh_.cur().repetitions_count(), 1); + mock_shallow_repetitions(bh_.shallow_clone(), 1); + bh_.do_move(UCI::to_move(bh_.cur(), "h6a6")); + EXPECT_EQ(bh_.cur().repetitions_count(), 1); + mock_shallow_repetitions(bh_.shallow_clone(), 1); +} + +TEST_F(PositionTest, PGNTest) { + BoardHistory bh_; + bh_.set(Position::StartFEN); + bh_.do_move(UCI::to_move(bh_.cur(), "f2f4")); + bh_.do_move(UCI::to_move(bh_.cur(), "a7a6")); + bh_.do_move(UCI::to_move(bh_.cur(), "f4f5")); + bh_.do_move(UCI::to_move(bh_.cur(), "e7e6")); + bh_.do_move(UCI::to_move(bh_.cur(), "f5e6")); + EXPECT_EQ(bh_.pgn(), "1. f4 a6 2. f5 e6 3. fxe6 "); + + bh_.set(Position::StartFEN); + bh_.do_move(UCI::to_move(bh_.cur(), "f2f4")); + bh_.do_move(UCI::to_move(bh_.cur(), "a7a6")); + bh_.do_move(UCI::to_move(bh_.cur(), "f4f5")); + bh_.do_move(UCI::to_move(bh_.cur(), "e7e5")); + bh_.do_move(UCI::to_move(bh_.cur(), "f5e6")); + EXPECT_EQ(bh_.pgn(), "1. f4 a6 2. f5 e5 3. fxe6 "); } diff --git a/training/tf/chunkparser.py b/training/tf/chunkparser.py index 64a71f492..53bf194f5 100644 --- a/training/tf/chunkparser.py +++ b/training/tf/chunkparser.py @@ -170,7 +170,7 @@ def convert_v1_to_v2(self, text_item): them_oo = int(text_item[115]) stm = int(text_item[116]) rule50_count = min(int(text_item[117]), 255) - move_count = min(int(text_item[118]), 255) + move_count = 0 # Load the probabilities. probabilities = np.array(text_item[119].split()).astype(np.float32) @@ -208,6 +208,8 @@ def convert_v2_to_tuple(self, content): uint8 planes (120 * 8 * 8 bytes) """ (ver, probs, planes, us_ooo, us_oo, them_ooo, them_oo, stm, rule50_count, move_count, winner) = self.v2_struct.unpack(content) + # Enforce move_count to 0 + move_count = 0 # Unpack planes. planes = np.unpackbits(np.frombuffer(planes, dtype=np.uint8)) planes = planes.tobytes() + self.flat_planes[us_ooo] + self.flat_planes[us_oo] + self.flat_planes[them_ooo] + self.flat_planes[them_oo] + self.flat_planes[stm] + self.flat_planes[rule50_count] + self.flat_planes[move_count] + self.flat_planes[0] diff --git a/training/tf/net_to_model.py b/training/tf/net_to_model.py index b3801aeeb..16cc5b3f8 100755 --- a/training/tf/net_to_model.py +++ b/training/tf/net_to_model.py @@ -3,12 +3,42 @@ import os import sys import yaml +import textwrap from tfprocess import TFProcess -cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) -print(yaml.dump(cfg, default_flow_style=False)) -with open(sys.argv[2], 'r') as f: +YAMLCFG = """ +%YAML 1.2 +--- +name: 'online-64x6' +gpu: 0 + +dataset: + num_chunks: 200000 + train_ratio: 0.90 + +training: + batch_size: 2048 + total_steps: 60000 + shuffle_size: 1048576 + lr_values: + - 0.04 + - 0.002 + lr_boundaries: + - 35000 + policy_loss_weight: 1.0 + value_loss_weight: 1.0 + path: /dev/null + +model: + filters: 64 + residual_blocks: 6 +... +""" +YAMLCFG = textwrap.dedent(YAMLCFG).strip() +cfg = yaml.safe_load(YAMLCFG) + +with open(sys.argv[1], 'r') as f: weights = [] for e, line in enumerate(f): if e == 0: @@ -19,22 +49,27 @@ else: weights.append(list(map(float, line.split(' ')))) if e == 2: - channels = len(line.split(' ')) - print("Channels", channels) + filters = len(line.split(' ')) + print("Channels", filters) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks //= 8 print("Blocks", blocks) +cfg['model']['filters'] = filters +cfg['model']['residual_blocks'] = blocks +print(yaml.dump(cfg, default_flow_style=False)) + +x = [ + tf.placeholder(tf.float32, [None, 120, 8*8]), + tf.placeholder(tf.float32, [None, 1924]), + tf.placeholder(tf.float32, [None, 1]) + ] + tfprocess = TFProcess(cfg) -tfprocess.init(1) -if tfprocess.RESIDUAL_BLOCKS != blocks: - raise ValueError("Number of blocks in tensorflow model doesn't match "\ - "number of blocks in input network") -if tfprocess.RESIDUAL_FILTERS != channels: - raise ValueError("Number of filters in tensorflow model doesn't match "\ - "number of filters in input network") +tfprocess.init_net(x) tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0) +print("Writted model to {}".format(path)) diff --git a/training/tf/test_v2.py b/training/tf/test_v2.py new file mode 100755 index 000000000..8cf78edaf --- /dev/null +++ b/training/tf/test_v2.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import sys +import chunkparser +import numpy as np +import gzip + +with gzip.open(sys.argv[1], 'rb') as f: + v1 = f.read() + +with gzip.open(sys.argv[2], 'rb') as f: + v2 = f.read() + +parser = chunkparser.ChunkParser(chunkparser.ChunkDataSrc([v1, v2]), workers=1) +gen1 = parser.convert_chunkdata_to_v2(v1) +gen2 = parser.convert_chunkdata_to_v2(v2) + +for t1 in gen1: + t2 = next(gen2) + t1 = parser.convert_v2_to_tuple(t1) + t2 = parser.convert_v2_to_tuple(t2) + p1 = np.frombuffer(t1[1], dtype=np.float32, count=1924) + p2 = np.frombuffer(t2[1], dtype=np.float32, count=1924) + pl1 = np.frombuffer(t1[0], dtype=np.uint8, count=120*8*8) + pl2 = np.frombuffer(t2[0], dtype=np.uint8, count=120*8*8) + assert((pl1[0] == pl2[0]).all()) + assert(t1[2] == t2[2]) + a = np.argsort(p1[p1>0]) + b = np.argsort(p2[p2>0]) + assert((a == b).all()) + +# drain the parser +for _ in parser.parse(): + pass diff --git a/training/tf/tfprocess.py b/training/tf/tfprocess.py index 5a722fca9..1bc6a57d9 100644 --- a/training/tf/tfprocess.py +++ b/training/tf/tfprocess.py @@ -82,8 +82,8 @@ def init(self, dataset, train_iterator, test_iterator): self.init_net(self.next_batch) def init_net(self, next_batch): - self.x = next_batch[0] # tf.placeholder(tf.float32, [None, 18, 19 * 19]) - self.y_ = next_batch[1] # tf.placeholder(tf.float32, [None, 362]) + self.x = next_batch[0] # tf.placeholder(tf.float32, [None, 120, 8*8]) + self.y_ = next_batch[1] # tf.placeholder(tf.float32, [None, 1924]) self.z_ = next_batch[2] # tf.placeholder(tf.float32, [None, 1]) self.batch_norm_count = 0 self.y_conv, self.z_conv = self.construct_net(self.x) @@ -201,7 +201,7 @@ def process(self, batch_size, test_batches): # Determine learning rate lr_values = self.cfg['training']['lr_values'] lr_boundaries = self.cfg['training']['lr_boundaries'] - steps_total = steps % self.cfg['training']['total_steps'] + steps_total = (steps-1) % self.cfg['training']['total_steps'] self.lr = lr_values[bisect.bisect_right(lr_boundaries, steps_total)] # Keep running averages diff --git a/training/tf/train.py b/training/tf/train.py index 6280a5ea9..aa9364ae6 100755 --- a/training/tf/train.py +++ b/training/tf/train.py @@ -17,8 +17,11 @@ # along with Leela Zero. If not, see . import binascii +import argparse import os import yaml +import requests +import hashlib import sys import glob import gzip @@ -63,6 +66,11 @@ def get_latest_chunks(path, num_chunks): return chunks +def upload(url, data, filename): + files = {'file': open(filename, 'rb')} + r = requests.post(url, data=data, files=files) + + class FileDataSrc: """ data source yielding chunkdata from chunk files. @@ -86,47 +94,16 @@ def next(self): print("failed to parse {}".format(filename)) -def benchmark(parser): - """ - Benchmark for parser - """ - gen = parser.parse() - batch=100 - while True: - start = time.time() - for _ in range(batch): - next(gen) - end = time.time() - print("{} pos/sec {} secs".format( ChunkParser.BATCH_SIZE * batch / (end - start), (end - start))) - - -def benchmark1(t): - """ - Benchmark for full input pipeline, including tensorflow conversion - """ - batch=100 - while True: - start = time.time() - for _ in range(batch): - t.session.run([t.next_batch], - feed_dict={t.training: True, t.learning_rate: 0.01, t.handle: t.train_handle}) - - end = time.time() - print("{} pos/sec {} secs".format( ChunkParser.BATCH_SIZE * batch / (end - start), (end - start))) - - -def main(): - if len(sys.argv) != 2: - print("Usage: {} config.yaml".format(sys.argv[0])) - return 1 - cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) +def main(cmd): + cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) - num_train = int(num_chunks*cfg['dataset']['train_ratio']) + train_ratio = cfg['dataset']['train_ratio'] + num_train = int(num_chunks*train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] @@ -134,19 +111,17 @@ def main(): if not os.path.exists(root_dir): os.makedirs(root_dir) - #bench_parser = ChunkParser(FileDataSrc(chunks[:1000]), shuffle_size=1<<14, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - #benchmark(bench_parser) - train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - #benchmark(train_parser) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() - test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), batch_size=ChunkParser.BATCH_SIZE) + shuffle_size = int(shuffle_size*(1.0-train_ratio)) + test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), + shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) @@ -161,15 +136,42 @@ def main(): tfprocess.restore(cp) # Sweeps through all test chunks statistically - num_evals = int(round(((num_chunks-num_train) * (200 / SKIP)) / ChunkParser.BATCH_SIZE)) + num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) - # while True: for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) + tfprocess.save_leelaz_weights('/tmp/weights.txt') + + with open('/tmp/weights.txt', 'rb') as f: + m = hashlib.sha256() + w = f.read() + m.update(w) + digest = m.hexdigest() + + filename = '/tmp/{}.gz'.format(digest) + with gzip.open(filename, 'wb') as f: + f.write(w) + + if cmd.upload: + metadata = {'training_id':'1', 'layers':cfg['model']['residual_blocks'], + 'filters':cfg['model']['filters']} + print("\nUploading `{}'...".format(digest[:8]), end='') + upload(cmd.upload, metadata, filename) + print("[done]\n") + else: + print("\nStored `{}'\n".format(filename)) + if __name__ == "__main__": + argparser = argparse.ArgumentParser(description=\ + 'Tensorflow pipeline for training Leela Chess.') + argparser.add_argument('--cfg', type=argparse.FileType('r'), + help='yaml configuration with training parameters') + argparser.add_argument('--upload', type=str, default="", + help='url to upload gzipped nets to') + mp.set_start_method('spawn') - main() + main(argparser.parse_args()) mp.freeze_support()