- Use tokinzer for URL parsing

- Regex based split for letter selector
This commit is contained in:
Infi
2024-05-21 06:36:57 +02:00
parent 759aca4ce0
commit 140343e654
7 changed files with 106 additions and 85 deletions
+19
View File
@@ -70,6 +70,11 @@ public class TextChunk : Chunk
[Key(6)]
public string Content { get; set; }
private TextChunk(Chunk chunk, string content) : base(chunk.Source, chunk.Link)
{
Content = content;
}
internal TextChunk(ChunkSource source, Payload? link, string content) : base(source, link)
{
// This has been null in the past, and it broke rendering code.
@@ -102,6 +107,20 @@ public class TextChunk : Chunk
Italic = Italic,
};
}
/// <summary>
/// Creates a new TextChunk with identical styling to this one.
/// </summary>
public TextChunk NewWithStyle(Chunk chunk, string content)
{
return new TextChunk(chunk, content)
{
FallbackColour = FallbackColour,
Foreground = Foreground,
Glow = Glow,
Italic = Italic,
};
}
}
[MessagePackObject]
+30 -63
View File
@@ -153,47 +153,10 @@ internal partial class Message
newChunks.Add(chunk);
}
void AddContentAfterURLCheck(string content, TextChunk text, Chunk chunk)
{
// This works because c# will split regex string, while keeping named groups as separated splits
// If the match is the first content of a string, the array will start with a ""
// Same if 2 matches are next to each other, they will be split with a ""
var splits = URLRegex.Split(content);
if (splits.Length == 1)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, content));
return;
}
var nextIsMatch = false;
foreach (var split in splits)
{
if (split == "" || !nextIsMatch)
{
nextIsMatch = true;
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split));
continue;
}
// Create a new TextChunk with a URIPayload for the URL text.
nextIsMatch = false;
try
{
var link = UriPayload.ResolveURI(split);
AddChunkWithMessage(text.NewWithStyle(chunk.Source, link, split));
}
catch (UriFormatException)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, split));
Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{split}'");
}
}
}
var checkForEmotes = (Code.IsPlayerMessage() || extraChatChannel != Guid.Empty) && Plugin.Config.ShowEmotes;
foreach (var chunk in oldChunks)
{
// Use as is if it's not a text chunk or it already has a payload.
// Use as is if it's not a text chunk, or it already has a payload.
if (chunk is not TextChunk text || chunk.Link != null)
{
// No need to call AddChunkWithMessage here since the chunk
@@ -215,28 +178,49 @@ internal partial class Message
var word = wordBuilder.ToString();
wordBuilder.Clear();
var wordUsed = false;
var tokenUsed = false;
if (checkForEmotes && EmoteCache.Exists(word) && !Plugin.Config.BlockedEmotes.Contains(word))
{
// Add the previous sentence before the emote
AddContentAfterURLCheck(sentenceBuilder.ToString(), text, chunk);
// Add the previous sentence before adding the emote
AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.ToString()));
AddChunkWithMessage(new TextChunk(chunk.Source, EmotePayload.ResolveEmote(word), word) { FallbackColour = text.FallbackColour });
// Append the current punctuation symbol
wordUsed = true;
sentenceBuilder.Clear();
}
if (token.TokenType == Tokenizer.TokenType.UrlString)
{
// Add the previous sentence before adding the url
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, sentenceBuilder.Append(!wordUsed ? word : "").ToString()));
try
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, UriPayload.ResolveURI(token.Value), token.Value));
}
catch (UriFormatException)
{
AddChunkWithMessage(text.NewWithStyle(chunk.Source, chunk.Link, token.Value));
Plugin.Log.Debug($"Invalid URL accepted by Regex but failed URI parsing: '{token.Value}'");
}
wordUsed = true;
tokenUsed = true;
sentenceBuilder.Clear();
sentenceBuilder.Append(token.Value);
continue;
}
// Append match if we haven't reached end of string yet
if (token.TokenType != Tokenizer.TokenType.SequenceTerminator)
{
sentenceBuilder.Append(word);
sentenceBuilder.Append(token.Value);
sentenceBuilder.Append(!wordUsed ? word : "");
sentenceBuilder.Append(!tokenUsed ? token.Value : "");
continue;
}
// End of string reached, we add our leftover
AddContentAfterURLCheck(sentenceBuilder.Append(word).ToString(), text, chunk);
AddChunkWithMessage(text.NewWithStyle(chunk, sentenceBuilder.Append(!wordUsed ? word : "").ToString()));
}
}
@@ -340,23 +324,6 @@ internal partial class Message
Content = newChunks;
}
/// <summary>
/// URLRegex returns a regex object that matches URLs like:
/// - https://example.com
/// - http://example.com
/// - www.example.com
/// - https://sub.example.com
/// - example.com
/// - sub.example.com
///
/// It matches URLs with www. or https:// prefix, and also matches URLs
/// without a prefix on specific TLDs.
/// </summary>
private static Regex URLRegex = new(
@"(?<URL>((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]+)?)",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture
);
[GeneratedRegex("(<item>|<flag>)")]
private static partial Regex TextParamRegex();
}
+2 -2
View File
@@ -2409,7 +2409,7 @@ namespace ChatTwo.Resources {
}
/// <summary>
/// Looks up a localized string similar to Text Preview:.
/// Looks up a localized string similar to Message Preview:.
/// </summary>
internal static string Options_Preview_Header {
get {
@@ -2427,7 +2427,7 @@ namespace ChatTwo.Resources {
}
/// <summary>
/// Looks up a localized string similar to Text preview.
/// Looks up a localized string similar to Message preview.
/// </summary>
internal static string Options_Preview_Name {
get {
+2 -2
View File
@@ -1055,7 +1055,7 @@
<value>Displays a preview with special parameter evaluated, like emotes and &lt;item&gt;</value>
</data>
<data name="Options_Preview_Name" xml:space="preserve">
<value>Text preview</value>
<value>Message preview</value>
</data>
<data name="Options_Preview_None" xml:space="preserve">
<value>None</value>
@@ -1070,7 +1070,7 @@
<value>Bottom</value>
</data>
<data name="Options_Preview_Header" xml:space="preserve">
<value>Text Preview:</value>
<value>Message Preview:</value>
</data>
<data name="Options_Preview_Tooltip" xml:space="preserve">
<value>Tooltip</value>
+3 -3
View File
@@ -49,7 +49,7 @@ public sealed class ChatLogWindow : Window
}
}
internal bool KeepFocusedThroughPreview;
internal bool FocusedPreview;
internal bool Activate;
private int ActivatePos = -1;
internal string Chat = string.Empty;
@@ -686,9 +686,9 @@ public sealed class ChatLogWindow : Window
var push = inputColour != null;
using (ImRaii.PushColor(ImGuiCol.Text, push ? ColourUtil.RgbaToAbgr(inputColour!.Value) : 0, push))
{
if (Activate || KeepFocusedThroughPreview)
if (Activate || FocusedPreview)
{
KeepFocusedThroughPreview = false;
FocusedPreview = false;
ImGui.SetKeyboardFocusHere();
}
+11 -11
View File
@@ -1,6 +1,7 @@
using System.Numerics;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using ChatTwo.Code;
using ChatTwo.Resources;
using ChatTwo.Util;
@@ -14,7 +15,7 @@ using ImGuiNET;
namespace ChatTwo.Ui;
public class InputPreview : Window
public partial class InputPreview : Window
{
private ChatLogWindow LogWindow { get; }
@@ -231,24 +232,20 @@ public class InputPreview : Window
CursorPosition += "<flag>".Length;
else if (text.Link is EmotePayload emote)
CursorPosition += emote.Code.Length;
else if (text.Link is UriPayload)
CursorPosition += text.Content.Length;
ImGuiUtil.WrapText(text.Content, chunk, handler, LogWindow.DefaultText, lineWidth);
return;
}
var splits = text.Content.Split(" ");
for (var i = 0; i < splits.Length; i++)
foreach (var word in WhitespaceRegex().Split(text.Content).Where(s => s != string.Empty))
{
// The last character should never be an empty string
// Sorting this out because it leads to double whitespaces
if (i + 1 == splits.Length && splits[i] == "")
break;
var wordSize = ImGui.CalcTextSize(splits[i]);
var wordSize = ImGui.CalcTextSize(word);
if (ImGui.GetContentRegionAvail().X < wordSize.X)
ImGui.NewLine();
foreach (var letter in $"{splits[i]} ")
foreach (var letter in word)
{
var letterSize = ImGui.CalcTextSize(letter.ToString());
@@ -256,11 +253,14 @@ public class InputPreview : Window
if (ImGui.Selectable($"{letter}##{CursorPosition + unique}", false, ImGuiSelectableFlags.None, letterSize))
{
SelectedCursorPos = CursorPosition;
LogWindow.KeepFocusedThroughPreview = true;
LogWindow.FocusedPreview = true;
}
ImGui.SameLine();
}
}
ImGui.NewLine();
}
[GeneratedRegex(@"(\s)")]
private static partial Regex WhitespaceRegex();
}
+39 -4
View File
@@ -16,6 +16,7 @@ public static class Tokenizer
Whitespace,
Equals,
OpenParenthesis,
UrlString,
StringValue,
Leftover,
SequenceTerminator
@@ -46,6 +47,7 @@ public static class Tokenizer
new(TokenType.Whitespace, "\\s", 1),
new(TokenType.Equals, "=", 1),
new(TokenType.OpenParenthesis, "\\(", 1),
new(TokenType.UrlString, URLRegex, 1),
new(TokenType.StringValue, "\\p{IsBasicLatin}", 2),
new(TokenType.Leftover, ".", 3)
};
@@ -85,9 +87,25 @@ public static class Tokenizer
}
}
private class TokenDefinition(TokenType returnsToken, string regexPattern, int precedence)
private class TokenDefinition
{
private readonly Regex Regex = new(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled);
private readonly TokenType Type;
private readonly int Precedence;
private readonly Regex Regex;
public TokenDefinition(TokenType returnsToken, string regexPattern, int precedence)
{
Type = returnsToken;
Precedence = precedence;
Regex = new Regex(regexPattern, RegexOptions.IgnoreCase|RegexOptions.Compiled);
}
public TokenDefinition(TokenType returnsToken, Regex regex, int precedence)
{
Type = returnsToken;
Precedence = precedence;
Regex = regex;
}
public IEnumerable<TokenMatch> FindMatches(string inputString)
{
@@ -98,9 +116,9 @@ public static class Tokenizer
{
StartIndex = matches[i].Index,
EndIndex = matches[i].Index + matches[i].Length,
TokenType = returnsToken,
TokenType = Type,
Value = matches[i].Value,
Precedence = precedence
Precedence = Precedence
};
}
}
@@ -114,4 +132,21 @@ public static class Tokenizer
public int EndIndex { get; set; }
public int Precedence { get; set; }
}
/// <summary>
/// URLRegex returns a regex object that matches URLs like:
/// - https://example.com
/// - http://example.com
/// - www.example.com
/// - https://sub.example.com
/// - example.com
/// - sub.example.com
///
/// It matches URLs with www. or https:// prefix, and also matches URLs
/// without a prefix on specific TLDs.
/// </summary>
private static Regex URLRegex = new(
@"(?<URL>((https?:\/\/|www\.)[a-z0-9-]+(\.[a-z0-9-]+)*|([a-z0-9-]+(\.[a-z0-9-]+)*\.(com|net|org|co|io|app)))(:[\d]{1,5})?(\/[^\s]*)?)",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture
);
}