- Use tokinzer for URL parsing

- Regex based split for letter selector
This commit is contained in:
Infi
2024-05-21 06:36:57 +02:00
parent 759aca4ce0
commit 140343e654
7 changed files with 106 additions and 85 deletions
+19
View File
@@ -70,6 +70,11 @@ public class TextChunk : Chunk
[Key(6)] [Key(6)]
public string Content { get; set; } public string Content { get; set; }
private TextChunk(Chunk chunk, string content) : base(chunk.Source, chunk.Link)
{
Content = content;
}
internal TextChunk(ChunkSource source, Payload? link, string content) : base(source, link) internal TextChunk(ChunkSource source, Payload? link, string content) : base(source, link)
{ {
// This has been null in the past, and it broke rendering code. // This has been null in the past, and it broke rendering code.
@@ -102,6 +107,20 @@ public class TextChunk : Chunk
Italic = Italic, Italic = Italic,
}; };
} }
/// <summary>
/// Creates a new TextChunk with identical styling to this one.
/// </summary>
public TextChunk NewWithStyle(Chunk chunk, string content)
{
return new TextChunk(chunk, content)
{
FallbackColour = FallbackColour,
Foreground = Foreground,
Glow = Glow,
Italic = Italic,
};
}
} }
[MessagePackObject] [MessagePackObject]
+30 -63
View File
@@ -153,47 +153,10 @@ internal partial class Message
newChunks.Add(chunk); newChunks.Add(chunk);
} }
void AddContentAfterURLCheck(string content, TextChunk text, Chunk chunk)
{
// This works because c# will split regex string, while keeping named groups as separated splits
// If the match is the first content of a string, the array will start with a ""
// Same if 2 matches are next to each other, they will be split with a ""
var splits = URLRegex.Split(content);
if (splits.Length == 1)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, content));
return;
}
var nextIsMatch = false;
foreach (var split in splits)
{
if (split == "" || !nextIsMatch)
{
nextIsMatch = true;
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split));
continue;
}
// Create a new TextChunk with a URIPayload for the URL text.
nextIsMatch = false;
try
{
var link = UriPayload.ResolveURI(split);
AddChunkWithMessage(text.NewWithStyle(chunk.Source, link, split));
}
catch (UriFormatException)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split));
Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{split}'");
}
}
}
var checkForEmotes = (Code.IsPlayerMessage() || extraChatChannel != Guid.Empty) && Plugin.Config.ShowEmotes; var checkForEmotes = (Code.IsPlayerMessage() || extraChatChannel != Guid.Empty) && Plugin.Config.ShowEmotes;
foreach (var chunk in oldChunks) foreach (var chunk in oldChunks)
{ {
// Use as is if it's not a text chunk or it already has a payload. // Use as is if it's not a text chunk, or it already has a payload.
if (chunk is not TextChunk text || chunk.Link != null) if (chunk is not TextChunk text || chunk.Link != null)
{ {
// No need to call AddChunkWithMessage here since the chunk // No need to call AddChunkWithMessage here since the chunk
@@ -215,28 +178,49 @@ internal partial class Message
var word = wordBuilder.ToString(); var word = wordBuilder.ToString();
wordBuilder.Clear(); wordBuilder.Clear();
var wordUsed = false;
var tokenUsed = false;
if (checkForEmotes && EmoteCache.Exists(word) && !Plugin.Config.BlockedEmotes.Contains(word)) if (checkForEmotes && EmoteCache.Exists(word) && !Plugin.Config.BlockedEmotes.Contains(word))
{ {
// Add the previous sentence before the emote // Add the previous sentence before adding the emote
AddContentAfterURLCheck(sentenceBuilder.ToString(), text, chunk); AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.ToString()));
AddChunkWithMessage(new TextChunk(chunk.Source, EmotePayload.ResolveEmote(word), word) { FallbackColour = text.FallbackColour }); AddChunkWithMessage(new TextChunk(chunk.Source, EmotePayload.ResolveEmote(word), word) { FallbackColour = text.FallbackColour });
// Append the current punctuation symbol wordUsed = true;
sentenceBuilder.Clear();
}
if (token.TokenType == Tokenizer.TokenType.UrlString)
{
// Add the previous sentence before adding the url
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, sentenceBuilder.Append(!wordUsed ? word : "").ToString()));
try
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, UriPayload.ResolveURI(token.Value), token.Value));
}
catch (UriFormatException)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, token.Value));
Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{token.Value}'");
}
wordUsed = true;
tokenUsed = true;
sentenceBuilder.Clear(); sentenceBuilder.Clear();
sentenceBuilder.Append(token.Value);
continue;
} }
// Append match if we haven't reached end of string yet // Append match if we haven't reached end of string yet
if (token.TokenType != Tokenizer.TokenType.SequenceTerminator) if (token.TokenType != Tokenizer.TokenType.SequenceTerminator)
{ {
sentenceBuilder.Append(word); sentenceBuilder.Append(!wordUsed ? word : "");
sentenceBuilder.Append(token.Value); sentenceBuilder.Append(!tokenUsed ? token.Value : "");
continue; continue;
} }
// End of string reached, we add our leftover // End of string reached, we add our leftover
AddContentAfterURLCheck(sentenceBuilder.Append(word).ToString(), text, chunk); AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.Append(!wordUsed ? word : "").ToString()));
} }
} }
@@ -340,23 +324,6 @@ internal partial class Message
Content = newChunks; Content = newChunks;
} }
/// <summary>
/// URLRegex returns a regex object that matches URLs like:
/// - https://example.com
/// - http://example.com
/// - www.example.com
/// - https://sub.example.com
/// - example.com
/// - sub.example.com
///
/// It matches URLs with www. or https:// prefix, and also matches URLs
/// without a prefix on specific TLDs.
/// </summary>
private static Regex URLRegex = new(
@"(?<URL>((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]+)?)",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture
);
[GeneratedRegex("(<item>|<flag>)")] [GeneratedRegex("(<item>|<flag>)")]
private static partial Regex TextParamRegex(); private static partial Regex TextParamRegex();
} }
+2 -2
View File
@@ -2409,7 +2409,7 @@ namespace ChatTwo.Resources {
} }
/// <summary> /// <summary>
/// Looks up a localized string similar to Text Preview:. /// Looks up a localized string similar to Message Preview:.
/// </summary> /// </summary>
internal static string Options_Preview_Header { internal static string Options_Preview_Header {
get { get {
@@ -2427,7 +2427,7 @@ namespace ChatTwo.Resources {
} }
/// <summary> /// <summary>
/// Looks up a localized string similar to Text preview. /// Looks up a localized string similar to Message preview.
/// </summary> /// </summary>
internal static string Options_Preview_Name { internal static string Options_Preview_Name {
get { get {
+2 -2
View File
@@ -1055,7 +1055,7 @@
<value>Displays a preview with special parameter evaluated, like emotes and &lt;item&gt;</value> <value>Displays a preview with special parameter evaluated, like emotes and &lt;item&gt;</value>
</data> </data>
<data name="Options_Preview_Name" xml:space="preserve"> <data name="Options_Preview_Name" xml:space="preserve">
<value>Text preview</value> <value>Message preview</value>
</data> </data>
<data name="Options_Preview_None" xml:space="preserve"> <data name="Options_Preview_None" xml:space="preserve">
<value>None</value> <value>None</value>
@@ -1070,7 +1070,7 @@
<value>Bottom</value> <value>Bottom</value>
</data> </data>
<data name="Options_Preview_Header" xml:space="preserve"> <data name="Options_Preview_Header" xml:space="preserve">
<value>Text Preview:</value> <value>Message Preview:</value>
</data> </data>
<data name="Options_Preview_Tooltip" xml:space="preserve"> <data name="Options_Preview_Tooltip" xml:space="preserve">
<value>Tooltip</value> <value>Tooltip</value>
+3 -3
View File
@@ -49,7 +49,7 @@ public sealed class ChatLogWindow : Window
} }
} }
internal bool KeepFocusedThroughPreview; internal bool FocusedPreview;
internal bool Activate; internal bool Activate;
private int ActivatePos = -1; private int ActivatePos = -1;
internal string Chat = string.Empty; internal string Chat = string.Empty;
@@ -686,9 +686,9 @@ public sealed class ChatLogWindow : Window
var push = inputColour != null; var push = inputColour != null;
using (ImRaii.PushColor(ImGuiCol.Text, push ? ColourUtil.RgbaToAbgr(inputColour!.Value) : 0, push)) using (ImRaii.PushColor(ImGuiCol.Text, push ? ColourUtil.RgbaToAbgr(inputColour!.Value) : 0, push))
{ {
if (Activate || KeepFocusedThroughPreview) if (Activate || FocusedPreview)
{ {
KeepFocusedThroughPreview = false; FocusedPreview = false;
ImGui.SetKeyboardFocusHere(); ImGui.SetKeyboardFocusHere();
} }
+11 -11
View File
@@ -1,6 +1,7 @@
using System.Numerics; using System.Numerics;
using System.Reflection; using System.Reflection;
using System.Text; using System.Text;
using System.Text.RegularExpressions;
using ChatTwo.Code; using ChatTwo.Code;
using ChatTwo.Resources; using ChatTwo.Resources;
using ChatTwo.Util; using ChatTwo.Util;
@@ -14,7 +15,7 @@ using ImGuiNET;
namespace ChatTwo.Ui; namespace ChatTwo.Ui;
public class InputPreview : Window public partial class InputPreview : Window
{ {
private ChatLogWindow LogWindow { get; } private ChatLogWindow LogWindow { get; }
@@ -231,24 +232,20 @@ public class InputPreview : Window
CursorPosition += "<flag>".Length; CursorPosition += "<flag>".Length;
else if (text.Link is EmotePayload emote) else if (text.Link is EmotePayload emote)
CursorPosition += emote.Code.Length; CursorPosition += emote.Code.Length;
else if (text.Link is UriPayload)
CursorPosition += text.Content.Length;
ImGuiUtil.WrapText(text.Content, chunk, handler, LogWindow.DefaultText, lineWidth); ImGuiUtil.WrapText(text.Content, chunk, handler, LogWindow.DefaultText, lineWidth);
return; return;
} }
var splits = text.Content.Split(" "); foreach (var word in WhitespaceRegex().Split(text.Content).Where(s => s != string.Empty))
for (var i = 0; i < splits.Length; i++)
{ {
// The last character should never be an empty string var wordSize = ImGui.CalcTextSize(word);
// Sorting this out because it leads to double whitespaces
if (i + 1 == splits.Length && splits[i] == "")
break;
var wordSize = ImGui.CalcTextSize(splits[i]);
if (ImGui.GetContentRegionAvail().X < wordSize.X) if (ImGui.GetContentRegionAvail().X < wordSize.X)
ImGui.NewLine(); ImGui.NewLine();
foreach (var letter in $"{splits[i]} ") foreach (var letter in word)
{ {
var letterSize = ImGui.CalcTextSize(letter.ToString()); var letterSize = ImGui.CalcTextSize(letter.ToString());
@@ -256,11 +253,14 @@ public class InputPreview : Window
if (ImGui.Selectable($"{letter}##{CursorPosition + unique}", false, ImGuiSelectableFlags.None, letterSize)) if (ImGui.Selectable($"{letter}##{CursorPosition + unique}", false, ImGuiSelectableFlags.None, letterSize))
{ {
SelectedCursorPos = CursorPosition; SelectedCursorPos = CursorPosition;
LogWindow.KeepFocusedThroughPreview = true; LogWindow.FocusedPreview = true;
} }
ImGui.SameLine(); ImGui.SameLine();
} }
} }
ImGui.NewLine(); ImGui.NewLine();
} }
[GeneratedRegex(@"(\s)")]
private static partial Regex WhitespaceRegex();
} }
+39 -4
View File
@@ -16,6 +16,7 @@ public static class Tokenizer
Whitespace, Whitespace,
Equals, Equals,
OpenParenthesis, OpenParenthesis,
UrlString,
StringValue, StringValue,
Leftover, Leftover,
SequenceTerminator SequenceTerminator
@@ -46,6 +47,7 @@ public static class Tokenizer
new(TokenType.Whitespace, "\\s", 1), new(TokenType.Whitespace, "\\s", 1),
new(TokenType.Equals, "=", 1), new(TokenType.Equals, "=", 1),
new(TokenType.OpenParenthesis, "\\(", 1), new(TokenType.OpenParenthesis, "\\(", 1),
new(TokenType.UrlString, URLRegex, 1),
new(TokenType.StringValue, "\\p{IsBasicLatin}", 2), new(TokenType.StringValue, "\\p{IsBasicLatin}", 2),
new(TokenType.Leftover, ".", 3) new(TokenType.Leftover, ".", 3)
}; };
@@ -85,9 +87,25 @@ public static class Tokenizer
} }
} }
private class TokenDefinition(TokenType returnsToken, string regexPattern, int precedence) private class TokenDefinition
{ {
private readonly Regex Regex = new(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled); private readonly TokenType Type;
private readonly int Precedence;
private readonly Regex Regex;
public TokenDefinition(TokenType returnsToken, string regexPattern, int precedence)
{
Type = returnsToken;
Precedence = precedence;
Regex = new Regex(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled);
}
public TokenDefinition(TokenType returnsToken, Regex regex, int precedence)
{
Type = returnsToken;
Precedence = precedence;
Regex = regex;
}
public IEnumerable<TokenMatch> FindMatches(string inputString) public IEnumerable<TokenMatch> FindMatches(string inputString)
{ {
@@ -98,9 +116,9 @@ public static class Tokenizer
{ {
StartIndex = matches[i].Index, StartIndex = matches[i].Index,
EndIndex = matches[i].Index + matches[i].Length, EndIndex = matches[i].Index + matches[i].Length,
TokenType = returnsToken, TokenType = Type,
Value = matches[i].Value, Value = matches[i].Value,
Precedence = precedence Precedence = Precedence
}; };
} }
} }
@@ -114,4 +132,21 @@ public static class Tokenizer
public int EndIndex { get; set; } public int EndIndex { get; set; }
public int Precedence { get; set; } public int Precedence { get; set; }
} }
/// <summary>
/// URLRegex returns a regex object that matches URLs like:
/// - https://example.com
/// - http://example.com
/// - www.example.com
/// - https://sub.example.com
/// - example.com
/// - sub.example.com
///
/// It matches URLs with www. or https:// prefix, and also matches URLs
/// without a prefix on specific TLDs.
/// </summary>
private static Regex URLRegex = new(
@"(?<URL>((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]*)?)",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture
);
} }