Craftimizer/Solver/Crafty/Solver.cs

using Craftimizer.Simulator;
using Craftimizer.Simulator.Actions;
using System.Diagnostics.Contracts;
using System.Numerics;
using System.Runtime.CompilerServices;
using Node = Craftimizer.Solver.Crafty.ArenaNode<Craftimizer.Solver.Crafty.SimulationNode>;

namespace Craftimizer.Solver.Crafty;

// https://github.com/alostsock/crafty/blob/cffbd0cad8bab3cef9f52a3e3d5da4f5e3781842/crafty/src/simulator.rs
public sealed class Solver
{
    public SolverConfig Config;
    public Node RootNode;

    public Random Random;

    public Solver(SolverConfig config, SimulationState state, bool strict)
    {
        Config = config;
        var sim = new Simulator(state, config.MaxStepCount);
        RootNode = new(new(
            state,
            null,
            sim.CompletionState,
            sim.AvailableActionsHeuristic(strict)
        ));
        Random = state.Input.Random;
    }

    public Solver(SolverConfig config, SimulationInput input, bool strict) : this(config, new SimulationState(input), strict)
    {
    }

    private static SimulationNode Execute(Simulator simulator, SimulationState state, ActionType action, bool strict)
    {
        (_, var newState) = simulator.Execute(state, action);
        return new(
            newState,
            action,
            simulator.CompletionState,
            simulator.AvailableActionsHeuristic(strict)
        );
    }

    public static (Node EndNode, CompletionState State) ExecuteActions(Simulator simulator, Node startNode, ReadOnlySpan<ActionType> actions, bool strict = false)
    {
        foreach (var action in actions)
        {
            var state = startNode.State;
            if (state.IsComplete)
                return (startNode, state.CompletionState);

            if (!state.AvailableActions.HasAction(action))
                return (startNode, CompletionState.InvalidAction);
            state.AvailableActions.RemoveAction(action);

            startNode = startNode.Add(Execute(simulator, state.State, action, strict));
        }

        return (startNode, startNode.State.CompletionState);
    }

    [Pure]
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static Node ChildMaxScore(ref Node.ChildBuffer children)
    {
        var length = children.Count;
        var vecLength = Vector<float>.Count;

        Span<float> scores = stackalloc float[vecLength];

        var max = (0, 0);
        var maxScore = 0f;
        for (var i = 0; length > 0; ++i)
        {
            var iterCount = Math.Min(vecLength, length);

            ref var chunk = ref children.Data[i];
            for (var j = 0; j < iterCount; ++j)
                scores[j] = chunk[j].State.Scores.MaxScore;

            var idx = Intrinsics.HMaxIndex(new Vector<float>(scores), iterCount);

            if (scores[idx] >= maxScore)
            {
                max = (i, idx);
                maxScore = scores[idx];
            }

            length -= iterCount;
        }

        return children.Data[max.Item1][max.Item2];
    }

    [Pure]
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private Node? EvalBestChild(int parentVisits, ref Node.ChildBuffer children)
    {
        if (parentVisits == 0)
            return null;

        var length = children.Count;
        var vecLength = Vector<float>.Count;

        var C = MathF.Sqrt(Config.ExplorationConstant * MathF.Log(parentVisits));
        var w = Config.MaxScoreWeightingConstant;
        var W = 1f - w;
        var CVector = new Vector<float>(C);

        Span<float> scoreSums = stackalloc float[vecLength];
        Span<int> visits = stackalloc int[vecLength];
        Span<float> maxScores = stackalloc float[vecLength];

        var max = (0, 0);
        var maxScore = 0f;
        for (var i = 0; length > 0; ++i)
        {
            var iterCount = Math.Min(vecLength, length);

            ref var chunk = ref children.Data[i];
            for (var j = 0; j < iterCount; ++j)
            {
                var node = chunk[j]?.State.Scores ?? new();
                scoreSums[j] = node.ScoreSum;
                visits[j] = node.Visits;
                maxScores[j] = node.MaxScore;
            }

            var s = new Vector<float>(scoreSums);
            var m = new Vector<float>(maxScores);
            var vInt = new Vector<int>(visits);
            vInt = Vector.Max(vInt, Vector<int>.One);
            var v = Vector.ConvertToSingle(vInt);
            var exploitation = (W * (s / v)) + (w * m);
            var exploration = CVector * Intrinsics.ReciprocalSqrt(v);
            var evalScores = exploitation + exploration;

            var idx = Intrinsics.HMaxIndex(evalScores, iterCount);

            if (evalScores[idx] >= maxScore)
            {
                max = (i, idx);
                maxScore = evalScores[idx];
            }

            length -= iterCount;
        }

        return children.Data[max.Item1][max.Item2];
    }

    [Pure]
    public Node Select()
    {
        var node = RootNode;
        while (true)
        {
            var expandable = !node.State.AvailableActions.IsEmpty;
            var likelyTerminal = node.Children.Count == 0;
            if (expandable || likelyTerminal)
                return node;

            // select the node with the highest score
            // if null (current node is invalid & not backpropagated just yet), try again from root
            node = EvalBestChild(node.State.Scores.Visits, ref node.Children) ?? RootNode;
        }
    }

    public (Node ExpandedNode, float Score)? ExpandAndRollout(Simulator simulator, Node initialNode)
    {
        ref var initialState = ref initialNode.State;
        // expand once
        if (initialState.IsComplete)
            return (initialNode, initialState.CalculateScore(Config.MaxStepCount) ?? 0);

        var poppedAction = initialState.AvailableActions.PopRandom(Random);
        if (!poppedAction.HasValue)
            return null;
        var expandedNode = initialNode.Add(Execute(simulator, initialState.State, poppedAction.Value, true));

        // playout to a terminal state
        var currentState = expandedNode.State.State;
        var currentCompletionState = expandedNode.State.SimulationCompletionState;
        var currentActions = expandedNode.State.AvailableActions;

        byte actionCount = 0;
        Span<ActionType> actions = stackalloc ActionType[Config.MaxStepCount];
        while (true)
        {
            if (SimulationNode.GetCompletionState(currentCompletionState, currentActions) != CompletionState.Incomplete)
                break;
            var nextAction = currentActions.SelectRandom(Random);
            actions[actionCount++] = nextAction;
            (_, currentState) = simulator.Execute(currentState, nextAction);
            currentCompletionState = simulator.CompletionState;
            currentActions = simulator.AvailableActionsHeuristic(true);
        }

        // store the result if a max score was reached
        var score = SimulationNode.CalculateScoreForState(currentState, currentCompletionState, Config.MaxStepCount) ?? 0;
        if (currentCompletionState == CompletionState.ProgressComplete)
        {
            if (score >= Config.ScoreStorageThreshold && score >= RootNode.State.Scores.MaxScore)
            {
                (var terminalNode, _) = ExecuteActions(simulator, expandedNode, actions[..actionCount], true);
                return (terminalNode, score);
            }
        }
        return (expandedNode, score);
    }

    public void Backpropagate(Node startNode, float score)
    {
        while (true)
        {
            startNode.State.Scores.Visit(score);

            if (startNode == RootNode)
                break;

            startNode = startNode.Parent!;
        }
    }

    public void SearchThread(CancellationToken token)
    {
        Simulator simulator = new(RootNode.State.State, Config.MaxStepCount);
        for (var i = 0; i < Config.Iterations; i++)
        {
            if (token.IsCancellationRequested)
                break;

            var selectedNode = Select();
            var rolledOut = ExpandAndRollout(simulator, selectedNode);
            if (!rolledOut.HasValue)
            {
                // Retry, count this iteration as moot
                i--;
                continue;
            }

            var (endNode, score) = rolledOut.Value;
            Backpropagate(endNode, score);
        }
    }

    public void Search(CancellationToken token)
    {
        var tasks = new Task[Config.ThreadCount];
        for (var i = 0; i < Config.ThreadCount; ++i)
            tasks[i] = Task.Run(() => SearchThread(token), token);
        Task.WaitAll(tasks, CancellationToken.None);
    }

    [Pure]
    public (List<ActionType> Actions, SimulationNode Node) Solution()
    {
        var actions = new List<ActionType>();
        var node = RootNode;
        while (node.Children.Count != 0)
        {
            node = ChildMaxScore(ref node.Children);

            if (node.State.Action != null)
                actions.Add(node.State.Action.Value);
        }

        return (actions, node.State);
    }

    public static (List<ActionType> Actions, SimulationState State) SearchStepwise(SolverConfig config, SimulationInput input, Action<ActionType>? actionCallback, CancellationToken token = default) =>
        SearchStepwise(config, new SimulationState(input), actionCallback, token);

    public static (List<ActionType> Actions, SimulationState State) SearchStepwise(SolverConfig config, SimulationState state, Action<ActionType>? actionCallback, CancellationToken token = default)
    {
        var actions = new List<ActionType>();
        Simulator sim = new(state, config.MaxStepCount);
        var solver = new Solver(config, state, true);
        while (!sim.IsComplete)
        {
            if (token.IsCancellationRequested)
                break;

            solver.Search(token);
            var (solution_actions, solution_node) = solver.Solution();

            if (solution_node.Scores.MaxScore >= 1.0)
            {
                actions.AddRange(solution_actions);
                return (actions, solution_node.State);
            }

            var chosen_action = solution_actions[0];
            (_, state) = sim.Execute(state, chosen_action);
            actions.Add(chosen_action);

            actionCallback?.Invoke(chosen_action);

            solver = new Solver(config, state, true);
        }

        return (actions, state);
    }

    public static (List<ActionType> Actions, SimulationState State) SearchOneshot(SolverConfig config, SimulationInput input, CancellationToken token = default) =>
        SearchOneshot(config, new SimulationState(input), token);

    public static (List<ActionType> Actions, SimulationState State) SearchOneshot(SolverConfig config, SimulationState state, CancellationToken token = default)
    {
        var solver = new Solver(config, state, false);
        solver.Search(token);
        var (solution_actions, solution_node) = solver.Solution();
        return (solution_actions, solution_node.State);
    }
}