318 lines
11 KiB
C#
318 lines
11 KiB
C#
using Craftimizer.Simulator;
|
|
using Craftimizer.Simulator.Actions;
|
|
using System.Diagnostics.Contracts;
|
|
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
using Node = Craftimizer.Solver.Crafty.ArenaNode<Craftimizer.Solver.Crafty.SimulationNode>;
|
|
|
|
namespace Craftimizer.Solver.Crafty;
|
|
|
|
// https://github.com/alostsock/crafty/blob/cffbd0cad8bab3cef9f52a3e3d5da4f5e3781842/crafty/src/simulator.rs
|
|
public sealed class Solver
|
|
{
|
|
public SolverConfig Config;
|
|
public Node RootNode;
|
|
|
|
public Random Random;
|
|
|
|
public Solver(SolverConfig config, SimulationState state, bool strict)
|
|
{
|
|
Config = config;
|
|
var sim = new Simulator(state, config.MaxStepCount);
|
|
RootNode = new(new(
|
|
state,
|
|
null,
|
|
sim.CompletionState,
|
|
sim.AvailableActionsHeuristic(strict)
|
|
));
|
|
Random = state.Input.Random;
|
|
}
|
|
|
|
public Solver(SolverConfig config, SimulationInput input, bool strict) : this(config, new SimulationState(input), strict)
|
|
{
|
|
}
|
|
|
|
private static SimulationNode Execute(Simulator simulator, SimulationState state, ActionType action, bool strict)
|
|
{
|
|
(_, var newState) = simulator.Execute(state, action);
|
|
return new(
|
|
newState,
|
|
action,
|
|
simulator.CompletionState,
|
|
simulator.AvailableActionsHeuristic(strict)
|
|
);
|
|
}
|
|
|
|
public static (Node EndNode, CompletionState State) ExecuteActions(Simulator simulator, Node startNode, ReadOnlySpan<ActionType> actions, bool strict = false)
|
|
{
|
|
foreach (var action in actions)
|
|
{
|
|
var state = startNode.State;
|
|
if (state.IsComplete)
|
|
return (startNode, state.CompletionState);
|
|
|
|
if (!state.AvailableActions.HasAction(action))
|
|
return (startNode, CompletionState.InvalidAction);
|
|
state.AvailableActions.RemoveAction(action);
|
|
|
|
startNode = startNode.Add(Execute(simulator, state.State, action, strict));
|
|
}
|
|
|
|
return (startNode, startNode.State.CompletionState);
|
|
}
|
|
|
|
[Pure]
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static Node ChildMaxScore(ref Node.ChildBuffer children)
|
|
{
|
|
var length = children.Count;
|
|
var vecLength = Vector<float>.Count;
|
|
|
|
Span<float> scores = stackalloc float[vecLength];
|
|
|
|
var max = (0, 0);
|
|
var maxScore = 0f;
|
|
for (var i = 0; length > 0; ++i)
|
|
{
|
|
var iterCount = Math.Min(vecLength, length);
|
|
|
|
ref var chunk = ref children.Data[i];
|
|
for (var j = 0; j < iterCount; ++j)
|
|
scores[j] = chunk[j].State.Scores.MaxScore;
|
|
|
|
var idx = Intrinsics.HMaxIndex(new Vector<float>(scores), iterCount);
|
|
|
|
if (scores[idx] >= maxScore)
|
|
{
|
|
max = (i, idx);
|
|
maxScore = scores[idx];
|
|
}
|
|
|
|
length -= iterCount;
|
|
}
|
|
|
|
return children.Data[max.Item1][max.Item2];
|
|
}
|
|
|
|
[Pure]
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private Node? EvalBestChild(int parentVisits, ref Node.ChildBuffer children)
|
|
{
|
|
if (parentVisits == 0)
|
|
return null;
|
|
|
|
var length = children.Count;
|
|
var vecLength = Vector<float>.Count;
|
|
|
|
var C = MathF.Sqrt(Config.ExplorationConstant * MathF.Log(parentVisits));
|
|
var w = Config.MaxScoreWeightingConstant;
|
|
var W = 1f - w;
|
|
var CVector = new Vector<float>(C);
|
|
|
|
Span<float> scoreSums = stackalloc float[vecLength];
|
|
Span<int> visits = stackalloc int[vecLength];
|
|
Span<float> maxScores = stackalloc float[vecLength];
|
|
|
|
var max = (0, 0);
|
|
var maxScore = 0f;
|
|
for (var i = 0; length > 0; ++i)
|
|
{
|
|
var iterCount = Math.Min(vecLength, length);
|
|
|
|
ref var chunk = ref children.Data[i];
|
|
for (var j = 0; j < iterCount; ++j)
|
|
{
|
|
var node = chunk[j]?.State.Scores ?? new();
|
|
scoreSums[j] = node.ScoreSum;
|
|
visits[j] = node.Visits;
|
|
maxScores[j] = node.MaxScore;
|
|
}
|
|
|
|
var s = new Vector<float>(scoreSums);
|
|
var m = new Vector<float>(maxScores);
|
|
var vInt = new Vector<int>(visits);
|
|
vInt = Vector.Max(vInt, Vector<int>.One);
|
|
var v = Vector.ConvertToSingle(vInt);
|
|
var exploitation = (W * (s / v)) + (w * m);
|
|
var exploration = CVector * Intrinsics.ReciprocalSqrt(v);
|
|
var evalScores = exploitation + exploration;
|
|
|
|
var idx = Intrinsics.HMaxIndex(evalScores, iterCount);
|
|
|
|
if (evalScores[idx] >= maxScore)
|
|
{
|
|
max = (i, idx);
|
|
maxScore = evalScores[idx];
|
|
}
|
|
|
|
length -= iterCount;
|
|
}
|
|
|
|
return children.Data[max.Item1][max.Item2];
|
|
}
|
|
|
|
[Pure]
|
|
public Node Select()
|
|
{
|
|
var node = RootNode;
|
|
while (true)
|
|
{
|
|
var expandable = !node.State.AvailableActions.IsEmpty;
|
|
var likelyTerminal = node.Children.Count == 0;
|
|
if (expandable || likelyTerminal)
|
|
return node;
|
|
|
|
// select the node with the highest score
|
|
// if null (current node is invalid & not backpropagated just yet), try again from root
|
|
node = EvalBestChild(node.State.Scores.Visits, ref node.Children) ?? RootNode;
|
|
}
|
|
}
|
|
|
|
public (Node ExpandedNode, float Score)? ExpandAndRollout(Simulator simulator, Node initialNode)
|
|
{
|
|
ref var initialState = ref initialNode.State;
|
|
// expand once
|
|
if (initialState.IsComplete)
|
|
return (initialNode, initialState.CalculateScore(Config.MaxStepCount) ?? 0);
|
|
|
|
var poppedAction = initialState.AvailableActions.PopRandom(Random);
|
|
if (!poppedAction.HasValue)
|
|
return null;
|
|
var expandedNode = initialNode.Add(Execute(simulator, initialState.State, poppedAction.Value, true));
|
|
|
|
// playout to a terminal state
|
|
var currentState = expandedNode.State.State;
|
|
var currentCompletionState = expandedNode.State.SimulationCompletionState;
|
|
var currentActions = expandedNode.State.AvailableActions;
|
|
|
|
byte actionCount = 0;
|
|
Span<ActionType> actions = stackalloc ActionType[Config.MaxStepCount];
|
|
while (true)
|
|
{
|
|
if (SimulationNode.GetCompletionState(currentCompletionState, currentActions) != CompletionState.Incomplete)
|
|
break;
|
|
var nextAction = currentActions.SelectRandom(Random);
|
|
actions[actionCount++] = nextAction;
|
|
(_, currentState) = simulator.Execute(currentState, nextAction);
|
|
currentCompletionState = simulator.CompletionState;
|
|
currentActions = simulator.AvailableActionsHeuristic(true);
|
|
}
|
|
|
|
// store the result if a max score was reached
|
|
var score = SimulationNode.CalculateScoreForState(currentState, currentCompletionState, Config.MaxStepCount) ?? 0;
|
|
if (currentCompletionState == CompletionState.ProgressComplete)
|
|
{
|
|
if (score >= Config.ScoreStorageThreshold && score >= RootNode.State.Scores.MaxScore)
|
|
{
|
|
(var terminalNode, _) = ExecuteActions(simulator, expandedNode, actions[..actionCount], true);
|
|
return (terminalNode, score);
|
|
}
|
|
}
|
|
return (expandedNode, score);
|
|
}
|
|
|
|
public void Backpropagate(Node startNode, float score)
|
|
{
|
|
while (true)
|
|
{
|
|
startNode.State.Scores.Visit(score);
|
|
|
|
if (startNode == RootNode)
|
|
break;
|
|
|
|
startNode = startNode.Parent!;
|
|
}
|
|
}
|
|
|
|
public void SearchThread(CancellationToken token)
|
|
{
|
|
Simulator simulator = new(RootNode.State.State, Config.MaxStepCount);
|
|
for (var i = 0; i < Config.Iterations; i++)
|
|
{
|
|
if (token.IsCancellationRequested)
|
|
break;
|
|
|
|
var selectedNode = Select();
|
|
var rolledOut = ExpandAndRollout(simulator, selectedNode);
|
|
if (!rolledOut.HasValue)
|
|
{
|
|
// Retry, count this iteration as moot
|
|
i--;
|
|
continue;
|
|
}
|
|
|
|
var (endNode, score) = rolledOut.Value;
|
|
Backpropagate(endNode, score);
|
|
}
|
|
}
|
|
|
|
public void Search(CancellationToken token)
|
|
{
|
|
var tasks = new Task[Config.ThreadCount];
|
|
for (var i = 0; i < Config.ThreadCount; ++i)
|
|
tasks[i] = Task.Run(() => SearchThread(token), token);
|
|
Task.WaitAll(tasks, CancellationToken.None);
|
|
}
|
|
|
|
[Pure]
|
|
public (List<ActionType> Actions, SimulationNode Node) Solution()
|
|
{
|
|
var actions = new List<ActionType>();
|
|
var node = RootNode;
|
|
while (node.Children.Count != 0)
|
|
{
|
|
node = ChildMaxScore(ref node.Children);
|
|
|
|
if (node.State.Action != null)
|
|
actions.Add(node.State.Action.Value);
|
|
}
|
|
|
|
return (actions, node.State);
|
|
}
|
|
|
|
public static (List<ActionType> Actions, SimulationState State) SearchStepwise(SolverConfig config, SimulationInput input, Action<ActionType>? actionCallback, CancellationToken token = default) =>
|
|
SearchStepwise(config, new SimulationState(input), actionCallback, token);
|
|
|
|
public static (List<ActionType> Actions, SimulationState State) SearchStepwise(SolverConfig config, SimulationState state, Action<ActionType>? actionCallback, CancellationToken token = default)
|
|
{
|
|
var actions = new List<ActionType>();
|
|
Simulator sim = new(state, config.MaxStepCount);
|
|
var solver = new Solver(config, state, true);
|
|
while (!sim.IsComplete)
|
|
{
|
|
if (token.IsCancellationRequested)
|
|
break;
|
|
|
|
solver.Search(token);
|
|
var (solution_actions, solution_node) = solver.Solution();
|
|
|
|
if (solution_node.Scores.MaxScore >= 1.0)
|
|
{
|
|
actions.AddRange(solution_actions);
|
|
return (actions, solution_node.State);
|
|
}
|
|
|
|
var chosen_action = solution_actions[0];
|
|
(_, state) = sim.Execute(state, chosen_action);
|
|
actions.Add(chosen_action);
|
|
|
|
actionCallback?.Invoke(chosen_action);
|
|
|
|
solver = new Solver(config, state, true);
|
|
}
|
|
|
|
return (actions, state);
|
|
}
|
|
|
|
public static (List<ActionType> Actions, SimulationState State) SearchOneshot(SolverConfig config, SimulationInput input, CancellationToken token = default) =>
|
|
SearchOneshot(config, new SimulationState(input), token);
|
|
|
|
public static (List<ActionType> Actions, SimulationState State) SearchOneshot(SolverConfig config, SimulationState state, CancellationToken token = default)
|
|
{
|
|
var solver = new Solver(config, state, false);
|
|
solver.Search(token);
|
|
var (solution_actions, solution_node) = solver.Solution();
|
|
return (solution_actions, solution_node.State);
|
|
}
|
|
}
|