From 140343e6540b30263c936060140b0452e05066df Mon Sep 17 00:00:00 2001 From: Infi Date: Tue, 21 May 2024 06:36:57 +0200 Subject: [PATCH] - Use tokinzer for URL parsing - Regex based split for letter selector --- ChatTwo/Chunk.cs | 19 ++++++ ChatTwo/Message.cs | 93 +++++++++----------------- ChatTwo/Resources/Language.Designer.cs | 4 +- ChatTwo/Resources/Language.resx | 4 +- ChatTwo/Ui/ChatLogWindow.cs | 6 +- ChatTwo/Ui/InputPreview.cs | 22 +++--- ChatTwo/Util/Tokenizer.cs | 43 ++++++++++-- 7 files changed, 106 insertions(+), 85 deletions(-) diff --git a/ChatTwo/Chunk.cs b/ChatTwo/Chunk.cs index f30b248..7b44562 100755 --- a/ChatTwo/Chunk.cs +++ b/ChatTwo/Chunk.cs @@ -70,6 +70,11 @@ public class TextChunk : Chunk [Key(6)] public string Content { get; set; } + private TextChunk(Chunk chunk, string content) : base(chunk.Source, chunk.Link) + { + Content = content; + } + internal TextChunk(ChunkSource source, Payload? link, string content) : base(source, link) { // This has been null in the past, and it broke rendering code. @@ -102,6 +107,20 @@ public class TextChunk : Chunk Italic = Italic, }; } + + /// + /// Creates a new TextChunk with identical styling to this one. + /// + public TextChunk NewWithStyle(Chunk chunk, string content) + { + return new TextChunk(chunk, content) + { + FallbackColour = FallbackColour, + Foreground = Foreground, + Glow = Glow, + Italic = Italic, + }; + } } [MessagePackObject] diff --git a/ChatTwo/Message.cs b/ChatTwo/Message.cs index 155e116..42209c6 100755 --- a/ChatTwo/Message.cs +++ b/ChatTwo/Message.cs @@ -153,47 +153,10 @@ internal partial class Message newChunks.Add(chunk); } - void AddContentAfterURLCheck(string content, TextChunk text, Chunk chunk) - { - // This works because c# will split regex string, while keeping named groups as separated splits - // If the match is the first content of a string, the array will start with a "" - // Same if 2 matches are next to each other, they will be split with a "" - var splits = URLRegex.Split(content); - if (splits.Length == 1) - { - AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, content)); - return; - } - - var nextIsMatch = false; - foreach (var split in splits) - { - if (split == "" || !nextIsMatch) - { - nextIsMatch = true; - AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split)); - continue; - } - - // Create a new TextChunk with a URIPayload for the URL text. - nextIsMatch = false; - try - { - var link = UriPayload.ResolveURI(split); - AddChunkWithMessage(text.NewWithStyle(chunk.Source, link, split)); - } - catch (UriFormatException) - { - AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split)); - Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{split}'"); - } - } - } - var checkForEmotes = (Code.IsPlayerMessage() || extraChatChannel != Guid.Empty) && Plugin.Config.ShowEmotes; foreach (var chunk in oldChunks) { - // Use as is if it's not a text chunk or it already has a payload. + // Use as is if it's not a text chunk, or it already has a payload. if (chunk is not TextChunk text || chunk.Link != null) { // No need to call AddChunkWithMessage here since the chunk @@ -215,28 +178,49 @@ internal partial class Message var word = wordBuilder.ToString(); wordBuilder.Clear(); + + var wordUsed = false; + var tokenUsed = false; + if (checkForEmotes && EmoteCache.Exists(word) && !Plugin.Config.BlockedEmotes.Contains(word)) { - // Add the previous sentence before the emote - AddContentAfterURLCheck(sentenceBuilder.ToString(), text, chunk); + // Add the previous sentence before adding the emote + AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.ToString())); AddChunkWithMessage(new TextChunk(chunk.Source, EmotePayload.ResolveEmote(word), word) { FallbackColour = text.FallbackColour }); - // Append the current punctuation symbol + wordUsed = true; + sentenceBuilder.Clear(); + } + + if (token.TokenType == Tokenizer.TokenType.UrlString) + { + // Add the previous sentence before adding the url + AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, sentenceBuilder.Append(!wordUsed ? word : "").ToString())); + try + { + AddChunkWithMessage(text.NewWithStyle(chunk.Source, UriPayload.ResolveURI(token.Value), token.Value)); + } + catch (UriFormatException) + { + AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, token.Value)); + Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{token.Value}'"); + } + + wordUsed = true; + tokenUsed = true; sentenceBuilder.Clear(); - sentenceBuilder.Append(token.Value); - continue; } // Append match if we haven't reached end of string yet if (token.TokenType != Tokenizer.TokenType.SequenceTerminator) { - sentenceBuilder.Append(word); - sentenceBuilder.Append(token.Value); + sentenceBuilder.Append(!wordUsed ? word : ""); + sentenceBuilder.Append(!tokenUsed ? token.Value : ""); continue; } // End of string reached, we add our leftover - AddContentAfterURLCheck(sentenceBuilder.Append(word).ToString(), text, chunk); + AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.Append(!wordUsed ? word : "").ToString())); } } @@ -340,23 +324,6 @@ internal partial class Message Content = newChunks; } - /// - /// URLRegex returns a regex object that matches URLs like: - /// - https://example.com - /// - http://example.com - /// - www.example.com - /// - https://sub.example.com - /// - example.com - /// - sub.example.com - /// - /// It matches URLs with www. or https:// prefix, and also matches URLs - /// without a prefix on specific TLDs. - /// - private static Regex URLRegex = new( - @"(?((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]+)?)", - RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture - ); - [GeneratedRegex("(|)")] private static partial Regex TextParamRegex(); } diff --git a/ChatTwo/Resources/Language.Designer.cs b/ChatTwo/Resources/Language.Designer.cs index 48a01d2..e668d90 100755 --- a/ChatTwo/Resources/Language.Designer.cs +++ b/ChatTwo/Resources/Language.Designer.cs @@ -2409,7 +2409,7 @@ namespace ChatTwo.Resources { } /// - /// Looks up a localized string similar to Text Preview:. + /// Looks up a localized string similar to Message Preview:. /// internal static string Options_Preview_Header { get { @@ -2427,7 +2427,7 @@ namespace ChatTwo.Resources { } /// - /// Looks up a localized string similar to Text preview. + /// Looks up a localized string similar to Message preview. /// internal static string Options_Preview_Name { get { diff --git a/ChatTwo/Resources/Language.resx b/ChatTwo/Resources/Language.resx index d91b1cf..2f450a8 100644 --- a/ChatTwo/Resources/Language.resx +++ b/ChatTwo/Resources/Language.resx @@ -1055,7 +1055,7 @@ Displays a preview with special parameter evaluated, like emotes and <item> - Text preview + Message preview None @@ -1070,7 +1070,7 @@ Bottom - Text Preview: + Message Preview: Tooltip diff --git a/ChatTwo/Ui/ChatLogWindow.cs b/ChatTwo/Ui/ChatLogWindow.cs index 7886d75..3d00dc2 100644 --- a/ChatTwo/Ui/ChatLogWindow.cs +++ b/ChatTwo/Ui/ChatLogWindow.cs @@ -49,7 +49,7 @@ public sealed class ChatLogWindow : Window } } - internal bool KeepFocusedThroughPreview; + internal bool FocusedPreview; internal bool Activate; private int ActivatePos = -1; internal string Chat = string.Empty; @@ -686,9 +686,9 @@ public sealed class ChatLogWindow : Window var push = inputColour != null; using (ImRaii.PushColor(ImGuiCol.Text, push ? ColourUtil.RgbaToAbgr(inputColour!.Value) : 0, push)) { - if (Activate || KeepFocusedThroughPreview) + if (Activate || FocusedPreview) { - KeepFocusedThroughPreview = false; + FocusedPreview = false; ImGui.SetKeyboardFocusHere(); } diff --git a/ChatTwo/Ui/InputPreview.cs b/ChatTwo/Ui/InputPreview.cs index 727707c..de92e1a 100644 --- a/ChatTwo/Ui/InputPreview.cs +++ b/ChatTwo/Ui/InputPreview.cs @@ -1,6 +1,7 @@ using System.Numerics; using System.Reflection; using System.Text; +using System.Text.RegularExpressions; using ChatTwo.Code; using ChatTwo.Resources; using ChatTwo.Util; @@ -14,7 +15,7 @@ using ImGuiNET; namespace ChatTwo.Ui; -public class InputPreview : Window +public partial class InputPreview : Window { private ChatLogWindow LogWindow { get; } @@ -231,24 +232,20 @@ public class InputPreview : Window CursorPosition += "".Length; else if (text.Link is EmotePayload emote) CursorPosition += emote.Code.Length; + else if (text.Link is UriPayload) + CursorPosition += text.Content.Length; ImGuiUtil.WrapText(text.Content, chunk, handler, LogWindow.DefaultText, lineWidth); return; } - var splits = text.Content.Split(" "); - for (var i = 0; i < splits.Length; i++) + foreach (var word in WhitespaceRegex().Split(text.Content).Where(s => s != string.Empty)) { - // The last character should never be an empty string - // Sorting this out because it leads to double whitespaces - if (i + 1 == splits.Length && splits[i] == "") - break; - - var wordSize = ImGui.CalcTextSize(splits[i]); + var wordSize = ImGui.CalcTextSize(word); if (ImGui.GetContentRegionAvail().X < wordSize.X) ImGui.NewLine(); - foreach (var letter in $"{splits[i]} ") + foreach (var letter in word) { var letterSize = ImGui.CalcTextSize(letter.ToString()); @@ -256,11 +253,14 @@ public class InputPreview : Window if (ImGui.Selectable($"{letter}##{CursorPosition + unique}", false, ImGuiSelectableFlags.None, letterSize)) { SelectedCursorPos = CursorPosition; - LogWindow.KeepFocusedThroughPreview = true; + LogWindow.FocusedPreview = true; } ImGui.SameLine(); } } ImGui.NewLine(); } + + [GeneratedRegex(@"(\s)")] + private static partial Regex WhitespaceRegex(); } diff --git a/ChatTwo/Util/Tokenizer.cs b/ChatTwo/Util/Tokenizer.cs index 0b5ac1c..9092c7e 100644 --- a/ChatTwo/Util/Tokenizer.cs +++ b/ChatTwo/Util/Tokenizer.cs @@ -16,6 +16,7 @@ public static class Tokenizer Whitespace, Equals, OpenParenthesis, + UrlString, StringValue, Leftover, SequenceTerminator @@ -46,6 +47,7 @@ public static class Tokenizer new(TokenType.Whitespace, "\\s", 1), new(TokenType.Equals, "=", 1), new(TokenType.OpenParenthesis, "\\(", 1), + new(TokenType.UrlString, URLRegex, 1), new(TokenType.StringValue, "\\p{IsBasicLatin}", 2), new(TokenType.Leftover, ".", 3) }; @@ -85,9 +87,25 @@ public static class Tokenizer } } - private class TokenDefinition(TokenType returnsToken, string regexPattern, int precedence) + private class TokenDefinition { - private readonly Regex Regex = new(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled); + private readonly TokenType Type; + private readonly int Precedence; + private readonly Regex Regex; + + public TokenDefinition(TokenType returnsToken, string regexPattern, int precedence) + { + Type = returnsToken; + Precedence = precedence; + Regex = new Regex(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled); + } + + public TokenDefinition(TokenType returnsToken, Regex regex, int precedence) + { + Type = returnsToken; + Precedence = precedence; + Regex = regex; + } public IEnumerable FindMatches(string inputString) { @@ -98,9 +116,9 @@ public static class Tokenizer { StartIndex = matches[i].Index, EndIndex = matches[i].Index + matches[i].Length, - TokenType = returnsToken, + TokenType = Type, Value = matches[i].Value, - Precedence = precedence + Precedence = Precedence }; } } @@ -114,4 +132,21 @@ public static class Tokenizer public int EndIndex { get; set; } public int Precedence { get; set; } } + + /// + /// URLRegex returns a regex object that matches URLs like: + /// - https://example.com + /// - http://example.com + /// - www.example.com + /// - https://sub.example.com + /// - example.com + /// - sub.example.com + /// + /// It matches URLs with www. or https:// prefix, and also matches URLs + /// without a prefix on specific TLDs. + /// + private static Regex URLRegex = new( + @"(?((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]*)?)", + RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture + ); } \ No newline at end of file