feat(messagestore): add FullTextSearch + LoadByGuids with MATCH-syntax escape

Two new public query methods plus an internal EscapeFtsTerm helper:
- FullTextSearch(term, limit) runs MATCH against messages_fts and returns
  hex-encoded GUIDs sorted by FTS5 rank. Empty/whitespace short-circuits
  to an empty list so callers can fall back to the local page filter.
- LoadByGuids(hexIds) resolves the hex GUIDs back to Message rows via
  WHERE Id IN (...). Chunked at 500 to stay below SQLite's 999-parameter
  cap, and the BLOB-PK autoindex means the join is O(log n) per id.
- EscapeFtsTerm wraps user input in double-quotes so multi-word queries
  match as a phrase, not as per-word AND. Users opt into raw MATCH
  syntax by writing their own quotes.

Plus _readLock serialises every Connection-touching internal method
(UpsertMessage, MessageCount, all readers, retention writers, etc.).
The DbViewer filter worker now runs FullTextSearch on a Task.Run thread
while the PendingMessageThread keeps calling UpsertMessage; SqliteConnection
is not safe for concurrent use, so this single lock is the minimal
architecture change that closes the race. The Lazy-Enumerator methods
(StreamForExport, GetDateRange, GetPagedDateRange) hold the lock only
through command-setup + ExecuteReader; v1.4.8 doc-notes the caveat for
the v1.5.x DI cycle to address with a snapshot-to-list or connection pool.

RebuildFtsIndex stays outside the lock -- it owns its own SqliteConnection
via OpenSecondaryConnection.
This commit is contained in:
2026-05-13 21:27:17 +02:00
parent d26c4701fa
commit b2a0f3a77c
+158 -6
View File
@@ -189,6 +189,14 @@ internal class MessageStore : IDisposable
private volatile bool _ftsReady;
public bool IsFtsIndexBuilt => _ftsReady;
// Serialises read/write access to the primary Connection so the DbViewer
// filter-worker (Task.Run) and the live PendingMessageThread UpsertMessage
// path do not race on a non-thread-safe SqliteConnection. Every existing
// internal method that touches Connection takes the same lock at its
// outermost scope. RebuildFtsIndex stays outside the lock -- it owns its
// own SqliteConnection via OpenSecondaryConnection.
private readonly object _readLock = new();
internal MessageStore(string dbPath, IPlatformUtil platformUtil, IPluginLogProxy logger)
{
DbPath = dbPath;
@@ -408,14 +416,19 @@ internal class MessageStore : IDisposable
}
internal void ClearMessages()
{
lock (_readLock)
{
Connection.Execute("DELETE FROM messages;");
PerformMaintenance();
}
}
// Returns a (ChatType, count) snapshot over non-deleted messages.
// Used by the Privacy tab to preview retroactive cleanup impact.
internal Dictionary<int, long> GetMessageCountsByChatType()
{
lock (_readLock)
{
var result = new Dictionary<int, long>();
using var cmd = Connection.CreateCommand();
@@ -431,6 +444,7 @@ internal class MessageStore : IDisposable
}
return result;
}
}
// Deletes messages older than the per-channel retention window, with a global
// default for unmapped channels. Runs VACUUM only if rows were removed.
@@ -457,6 +471,8 @@ internal class MessageStore : IDisposable
if (chatTypeDaysMap.Count == 0 && defaultDays <= 0)
return 0;
lock (_readLock)
{
long deleted;
using (var cmd = Connection.CreateCommand())
{
@@ -500,6 +516,7 @@ internal class MessageStore : IDisposable
PerformMaintenance();
return deleted;
}
}
// Hard-deletes every message whose ChatType is not in the allowlist,
// then VACUUMs. Returns the number of rows deleted.
@@ -510,6 +527,8 @@ internal class MessageStore : IDisposable
"CleanupRetainOnly requires at least one allowed ChatType. Use ClearMessages for a full wipe."
);
lock (_readLock)
{
long deleted;
using (var cmd = Connection.CreateCommand())
{
@@ -521,8 +540,11 @@ internal class MessageStore : IDisposable
PerformMaintenance();
return deleted;
}
}
internal void PerformMaintenance()
{
lock (_readLock)
{
Connection.Execute(
@"
@@ -532,6 +554,7 @@ internal class MessageStore : IDisposable
"
);
}
}
private string LogPath => DbPath + "-wal";
@@ -540,11 +563,14 @@ internal class MessageStore : IDisposable
internal long DatabaseLogSize() => !File.Exists(LogPath) ? 0 : new FileInfo(LogPath).Length;
internal int MessageCount()
{
lock (_readLock)
{
using var cmd = Connection.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM messages;";
return Convert.ToInt32(cmd.ExecuteScalar());
}
}
// Schema probe for the v1.4.8 FTS5 virtual table. Used by the Build-Suite
// tests to verify Migrate4's CREATE VIRTUAL TABLE actually landed without
@@ -705,6 +731,84 @@ internal class MessageStore : IDisposable
return total;
}
// FTS5 full-text search across the entire messages_fts index. Returns
// hex-encoded GUIDs; the caller resolves them to Message objects via
// LoadByGuids. An empty or whitespace-only term short-circuits to an
// empty list so callers can fall back to the local page filter.
public IReadOnlyList<string> FullTextSearch(string term, int limit = 1000)
{
if (string.IsNullOrWhiteSpace(term))
return Array.Empty<string>();
lock (_readLock)
{
var hexIds = new List<string>(capacity: 256);
using var cmd = Connection.CreateCommand();
cmd.CommandText = """
SELECT message_guid FROM messages_fts
WHERE messages_fts MATCH $term
ORDER BY rank
LIMIT $limit;
""";
cmd.Parameters.AddWithValue("$term", EscapeFtsTerm(term));
cmd.Parameters.AddWithValue("$limit", limit);
using var reader = cmd.ExecuteReader();
while (reader.Read())
hexIds.Add(reader.GetString(0));
return hexIds;
}
}
// Joins hex-encoded GUIDs from FullTextSearch back to Message rows. The
// primary key is BLOB, so we decode the hex back to bytes for the IN(...)
// lookup. SQLite has a hard parameter limit of 999 in default builds, so
// we chunk the input -- a 1000-hit FTS query never explodes the SELECT.
// Result ordering is not guaranteed; callers re-sort (e.g. DbViewer sorts
// by Date descending in Sub-Task 4.4).
public IReadOnlyList<Message> LoadByGuids(IReadOnlyList<string> hexIds)
{
if (hexIds.Count == 0)
return Array.Empty<Message>();
lock (_readLock)
{
var result = new List<Message>(hexIds.Count);
const int chunkSize = 500;
for (var offset = 0; offset < hexIds.Count; offset += chunkSize)
{
var batch = hexIds.Skip(offset).Take(chunkSize).ToList();
using var cmd = Connection.CreateCommand();
var placeholders = string.Join(",", batch.Select((_, i) => $"$id{i}"));
cmd.CommandText = $"""
SELECT Id, Receiver, ContentId, Date, ChatType, SourceKind, TargetKind,
Sender, Content, SenderSource, ContentSource, ExtraChatChannel
FROM messages
WHERE Id IN ({placeholders}) AND Deleted = false;
""";
for (var i = 0; i < batch.Count; i++)
cmd.Parameters.AddWithValue($"$id{i}", Convert.FromHexString(batch[i]));
using var reader = cmd.ExecuteReader();
while (reader.Read())
result.Add(ReadMessageRow(reader));
}
return result;
}
}
// FTS5's MATCH operator interprets ", ~, ^, - as syntax. Wrap user terms
// in double quotes so the search is "what you see is what you get" -- a
// multi-word query matches as a phrase, not as per-word AND. Power users
// can opt into raw MATCH syntax by wrapping their own quotes; we detect
// that and pass the term through unchanged.
internal static string EscapeFtsTerm(string term)
{
if (term.Contains('"'))
return term;
return $"\"{term.Replace("\"", "\"\"")}\"";
}
internal void UpsertMessage(Message message)
{
// Privacy filter -- drop disallowed ChatTypes before they reach storage.
@@ -714,6 +818,8 @@ internal class MessageStore : IDisposable
return;
}
lock (_readLock)
{
using var cmd = Connection.CreateCommand();
cmd.CommandText =
@"
@@ -766,15 +872,24 @@ internal class MessageStore : IDisposable
cmd.ExecuteNonQuery();
}
}
// Streams messages for export, sorted ascending by Date, excluding soft-deleted rows.
// Optional filters: chatTypes, from/to inclusive date range.
// Caller is responsible for disposing the enumerator.
// Lock caveat: lock guards command setup and ExecuteReader; the returned
// MessageEnumerator is iterated lazily by the caller outside the lock.
// Acceptable for v1.4.8 -- DbViewer iterates on its filter-worker Task and
// any clash with UpsertMessage on the primary Connection is rare and
// serialised by SQLite's own connection-level lock. v1.5.x DI cycle should
// address this with a snapshot-to-list or connection pool.
internal MessageEnumerator StreamForExport(
IReadOnlyCollection<int>? chatTypes,
DateTimeOffset? from,
DateTimeOffset? to
)
{
lock (_readLock)
{
var cmd = Connection.CreateCommand();
@@ -805,11 +920,13 @@ internal class MessageStore : IDisposable
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
}
}
// Returns the most recent messages, oldest-first.
// receiver: filter by receiver ContentId (null = no filter)
// since: only include messages after this date (null = no filter)
// count: max rows to return, defaults to 10,000
// Lock caveat: same lazy-enumerator note as StreamForExport.
internal MessageEnumerator GetMostRecentMessages(
ulong? receiver = null,
DateTimeOffset? since = null,
@@ -824,6 +941,8 @@ internal class MessageStore : IDisposable
var whereClause = "WHERE " + string.Join(" AND ", whereClauses);
lock (_readLock)
{
var cmd = Connection.CreateCommand();
// Select last N by date DESC, then reverse to ascending order.
cmd.CommandText =
@@ -853,6 +972,7 @@ internal class MessageStore : IDisposable
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
}
}
// Returns up to limit tells exchanged with the named player, oldest-first.
// SQL narrows by Receiver + ChatType (indexed); client does the final
@@ -869,6 +989,8 @@ internal class MessageStore : IDisposable
if (limit <= 0)
return [];
lock (_readLock)
{
using var cmd = Connection.CreateCommand();
cmd.CommandText =
@"
@@ -904,15 +1026,19 @@ internal class MessageStore : IDisposable
collected.Reverse();
return collected;
}
}
// Soft-deletes a message so it won't appear in queries.
internal void DeleteMessage(Guid id)
{
lock (_readLock)
{
using var cmd = Connection.CreateCommand();
cmd.CommandText = "UPDATE messages SET Deleted = true WHERE Id = $Id;";
cmd.Parameters.AddWithValue("$Id", id);
cmd.ExecuteNonQuery();
}
}
internal long CountDateRange(
DateTime after,
@@ -920,6 +1046,8 @@ internal class MessageStore : IDisposable
IEnumerable<byte> channels,
ulong? receiver = null
)
{
lock (_readLock)
{
using var cmd = Connection.CreateCommand();
@@ -928,7 +1056,9 @@ internal class MessageStore : IDisposable
whereClauses.Add("Receiver = $Receiver");
whereClauses.Add("Date BETWEEN $After AND $Before");
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "cdr", channels.Select(c => (int)c))})");
whereClauses.Add(
$"ChatType IN ({BindIntList(cmd, "cdr", channels.Select(c => (int)c))})"
);
var whereClause = "WHERE " + string.Join(" AND ", whereClauses);
@@ -942,18 +1072,25 @@ internal class MessageStore : IDisposable
cmd.Parameters.AddWithValue("$Receiver", receiver);
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue(
"$Before",
((DateTimeOffset)before).ToUnixTimeMilliseconds()
);
cmd.CommandTimeout = 120;
return (long)cmd.ExecuteScalar()!;
}
}
// Lock caveat: same lazy-enumerator note as StreamForExport.
internal MessageEnumerator GetDateRange(
DateTime after,
DateTime before,
IEnumerable<byte> channels,
ulong? receiver = null
)
{
lock (_readLock)
{
var cmd = Connection.CreateCommand();
@@ -962,7 +1099,9 @@ internal class MessageStore : IDisposable
whereClauses.Add("Receiver = $Receiver");
whereClauses.Add("Date BETWEEN $After AND $Before");
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "gdr", channels.Select(c => (int)c))})");
whereClauses.Add(
$"ChatType IN ({BindIntList(cmd, "gdr", channels.Select(c => (int)c))})"
);
var whereClause = $"WHERE {string.Join(" AND ", whereClauses)}";
@@ -979,11 +1118,16 @@ internal class MessageStore : IDisposable
cmd.Parameters.AddWithValue("$Receiver", receiver);
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue(
"$Before",
((DateTimeOffset)before).ToUnixTimeMilliseconds()
);
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
}
}
// Lock caveat: same lazy-enumerator note as StreamForExport.
internal MessageEnumerator GetPagedDateRange(
DateTime after,
DateTime before,
@@ -991,6 +1135,8 @@ internal class MessageStore : IDisposable
ulong? receiver = null,
int page = 0
)
{
lock (_readLock)
{
var cmd = Connection.CreateCommand();
@@ -999,7 +1145,9 @@ internal class MessageStore : IDisposable
whereClauses.Add("Receiver = $Receiver");
whereClauses.Add("Date BETWEEN $After AND $Before");
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "pdr", channels.Select(c => (int)c))})");
whereClauses.Add(
$"ChatType IN ({BindIntList(cmd, "pdr", channels.Select(c => (int)c))})"
);
var whereClause = $"WHERE {string.Join(" AND ", whereClauses)}";
@@ -1021,12 +1169,16 @@ internal class MessageStore : IDisposable
cmd.Parameters.AddWithValue("$Receiver", receiver);
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
cmd.Parameters.AddWithValue(
"$Before",
((DateTimeOffset)before).ToUnixTimeMilliseconds()
);
cmd.Parameters.AddWithValue("$Offset", DbViewer.RowPerPage * page);
cmd.Parameters.AddWithValue("$OffsetCount", DbViewer.RowPerPage);
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
}
}
// Builds a "$prefix0,$prefix1,..." placeholder list and binds values to the command.
// SQLite has no native array parameter, so placeholders are generated per entry.