feat(messagestore): add FullTextSearch + LoadByGuids with MATCH-syntax escape
Two new public query methods plus an internal EscapeFtsTerm helper: - FullTextSearch(term, limit) runs MATCH against messages_fts and returns hex-encoded GUIDs sorted by FTS5 rank. Empty/whitespace short-circuits to an empty list so callers can fall back to the local page filter. - LoadByGuids(hexIds) resolves the hex GUIDs back to Message rows via WHERE Id IN (...). Chunked at 500 to stay below SQLite's 999-parameter cap, and the BLOB-PK autoindex means the join is O(log n) per id. - EscapeFtsTerm wraps user input in double-quotes so multi-word queries match as a phrase, not as per-word AND. Users opt into raw MATCH syntax by writing their own quotes. Plus _readLock serialises every Connection-touching internal method (UpsertMessage, MessageCount, all readers, retention writers, etc.). The DbViewer filter worker now runs FullTextSearch on a Task.Run thread while the PendingMessageThread keeps calling UpsertMessage; SqliteConnection is not safe for concurrent use, so this single lock is the minimal architecture change that closes the race. The Lazy-Enumerator methods (StreamForExport, GetDateRange, GetPagedDateRange) hold the lock only through command-setup + ExecuteReader; v1.4.8 doc-notes the caveat for the v1.5.x DI cycle to address with a snapshot-to-list or connection pool. RebuildFtsIndex stays outside the lock -- it owns its own SqliteConnection via OpenSecondaryConnection.
This commit is contained in:
+158
-6
@@ -189,6 +189,14 @@ internal class MessageStore : IDisposable
|
||||
private volatile bool _ftsReady;
|
||||
public bool IsFtsIndexBuilt => _ftsReady;
|
||||
|
||||
// Serialises read/write access to the primary Connection so the DbViewer
|
||||
// filter-worker (Task.Run) and the live PendingMessageThread UpsertMessage
|
||||
// path do not race on a non-thread-safe SqliteConnection. Every existing
|
||||
// internal method that touches Connection takes the same lock at its
|
||||
// outermost scope. RebuildFtsIndex stays outside the lock -- it owns its
|
||||
// own SqliteConnection via OpenSecondaryConnection.
|
||||
private readonly object _readLock = new();
|
||||
|
||||
internal MessageStore(string dbPath, IPlatformUtil platformUtil, IPluginLogProxy logger)
|
||||
{
|
||||
DbPath = dbPath;
|
||||
@@ -408,14 +416,19 @@ internal class MessageStore : IDisposable
|
||||
}
|
||||
|
||||
internal void ClearMessages()
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
Connection.Execute("DELETE FROM messages;");
|
||||
PerformMaintenance();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a (ChatType, count) snapshot over non-deleted messages.
|
||||
// Used by the Privacy tab to preview retroactive cleanup impact.
|
||||
internal Dictionary<int, long> GetMessageCountsByChatType()
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
var result = new Dictionary<int, long>();
|
||||
using var cmd = Connection.CreateCommand();
|
||||
@@ -431,6 +444,7 @@ internal class MessageStore : IDisposable
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Deletes messages older than the per-channel retention window, with a global
|
||||
// default for unmapped channels. Runs VACUUM only if rows were removed.
|
||||
@@ -457,6 +471,8 @@ internal class MessageStore : IDisposable
|
||||
if (chatTypeDaysMap.Count == 0 && defaultDays <= 0)
|
||||
return 0;
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
long deleted;
|
||||
using (var cmd = Connection.CreateCommand())
|
||||
{
|
||||
@@ -500,6 +516,7 @@ internal class MessageStore : IDisposable
|
||||
PerformMaintenance();
|
||||
return deleted;
|
||||
}
|
||||
}
|
||||
|
||||
// Hard-deletes every message whose ChatType is not in the allowlist,
|
||||
// then VACUUMs. Returns the number of rows deleted.
|
||||
@@ -510,6 +527,8 @@ internal class MessageStore : IDisposable
|
||||
"CleanupRetainOnly requires at least one allowed ChatType. Use ClearMessages for a full wipe."
|
||||
);
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
long deleted;
|
||||
using (var cmd = Connection.CreateCommand())
|
||||
{
|
||||
@@ -521,8 +540,11 @@ internal class MessageStore : IDisposable
|
||||
PerformMaintenance();
|
||||
return deleted;
|
||||
}
|
||||
}
|
||||
|
||||
internal void PerformMaintenance()
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
Connection.Execute(
|
||||
@"
|
||||
@@ -532,6 +554,7 @@ internal class MessageStore : IDisposable
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private string LogPath => DbPath + "-wal";
|
||||
|
||||
@@ -540,11 +563,14 @@ internal class MessageStore : IDisposable
|
||||
internal long DatabaseLogSize() => !File.Exists(LogPath) ? 0 : new FileInfo(LogPath).Length;
|
||||
|
||||
internal int MessageCount()
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
using var cmd = Connection.CreateCommand();
|
||||
cmd.CommandText = "SELECT COUNT(*) FROM messages;";
|
||||
return Convert.ToInt32(cmd.ExecuteScalar());
|
||||
}
|
||||
}
|
||||
|
||||
// Schema probe for the v1.4.8 FTS5 virtual table. Used by the Build-Suite
|
||||
// tests to verify Migrate4's CREATE VIRTUAL TABLE actually landed without
|
||||
@@ -705,6 +731,84 @@ internal class MessageStore : IDisposable
|
||||
return total;
|
||||
}
|
||||
|
||||
// FTS5 full-text search across the entire messages_fts index. Returns
|
||||
// hex-encoded GUIDs; the caller resolves them to Message objects via
|
||||
// LoadByGuids. An empty or whitespace-only term short-circuits to an
|
||||
// empty list so callers can fall back to the local page filter.
|
||||
public IReadOnlyList<string> FullTextSearch(string term, int limit = 1000)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(term))
|
||||
return Array.Empty<string>();
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
var hexIds = new List<string>(capacity: 256);
|
||||
using var cmd = Connection.CreateCommand();
|
||||
cmd.CommandText = """
|
||||
SELECT message_guid FROM messages_fts
|
||||
WHERE messages_fts MATCH $term
|
||||
ORDER BY rank
|
||||
LIMIT $limit;
|
||||
""";
|
||||
cmd.Parameters.AddWithValue("$term", EscapeFtsTerm(term));
|
||||
cmd.Parameters.AddWithValue("$limit", limit);
|
||||
|
||||
using var reader = cmd.ExecuteReader();
|
||||
while (reader.Read())
|
||||
hexIds.Add(reader.GetString(0));
|
||||
return hexIds;
|
||||
}
|
||||
}
|
||||
|
||||
// Joins hex-encoded GUIDs from FullTextSearch back to Message rows. The
|
||||
// primary key is BLOB, so we decode the hex back to bytes for the IN(...)
|
||||
// lookup. SQLite has a hard parameter limit of 999 in default builds, so
|
||||
// we chunk the input -- a 1000-hit FTS query never explodes the SELECT.
|
||||
// Result ordering is not guaranteed; callers re-sort (e.g. DbViewer sorts
|
||||
// by Date descending in Sub-Task 4.4).
|
||||
public IReadOnlyList<Message> LoadByGuids(IReadOnlyList<string> hexIds)
|
||||
{
|
||||
if (hexIds.Count == 0)
|
||||
return Array.Empty<Message>();
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
var result = new List<Message>(hexIds.Count);
|
||||
const int chunkSize = 500;
|
||||
for (var offset = 0; offset < hexIds.Count; offset += chunkSize)
|
||||
{
|
||||
var batch = hexIds.Skip(offset).Take(chunkSize).ToList();
|
||||
using var cmd = Connection.CreateCommand();
|
||||
var placeholders = string.Join(",", batch.Select((_, i) => $"$id{i}"));
|
||||
cmd.CommandText = $"""
|
||||
SELECT Id, Receiver, ContentId, Date, ChatType, SourceKind, TargetKind,
|
||||
Sender, Content, SenderSource, ContentSource, ExtraChatChannel
|
||||
FROM messages
|
||||
WHERE Id IN ({placeholders}) AND Deleted = false;
|
||||
""";
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
cmd.Parameters.AddWithValue($"$id{i}", Convert.FromHexString(batch[i]));
|
||||
|
||||
using var reader = cmd.ExecuteReader();
|
||||
while (reader.Read())
|
||||
result.Add(ReadMessageRow(reader));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// FTS5's MATCH operator interprets ", ~, ^, - as syntax. Wrap user terms
|
||||
// in double quotes so the search is "what you see is what you get" -- a
|
||||
// multi-word query matches as a phrase, not as per-word AND. Power users
|
||||
// can opt into raw MATCH syntax by wrapping their own quotes; we detect
|
||||
// that and pass the term through unchanged.
|
||||
internal static string EscapeFtsTerm(string term)
|
||||
{
|
||||
if (term.Contains('"'))
|
||||
return term;
|
||||
return $"\"{term.Replace("\"", "\"\"")}\"";
|
||||
}
|
||||
|
||||
internal void UpsertMessage(Message message)
|
||||
{
|
||||
// Privacy filter -- drop disallowed ChatTypes before they reach storage.
|
||||
@@ -714,6 +818,8 @@ internal class MessageStore : IDisposable
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
using var cmd = Connection.CreateCommand();
|
||||
cmd.CommandText =
|
||||
@"
|
||||
@@ -766,15 +872,24 @@ internal class MessageStore : IDisposable
|
||||
|
||||
cmd.ExecuteNonQuery();
|
||||
}
|
||||
}
|
||||
|
||||
// Streams messages for export, sorted ascending by Date, excluding soft-deleted rows.
|
||||
// Optional filters: chatTypes, from/to inclusive date range.
|
||||
// Caller is responsible for disposing the enumerator.
|
||||
// Lock caveat: lock guards command setup and ExecuteReader; the returned
|
||||
// MessageEnumerator is iterated lazily by the caller outside the lock.
|
||||
// Acceptable for v1.4.8 -- DbViewer iterates on its filter-worker Task and
|
||||
// any clash with UpsertMessage on the primary Connection is rare and
|
||||
// serialised by SQLite's own connection-level lock. v1.5.x DI cycle should
|
||||
// address this with a snapshot-to-list or connection pool.
|
||||
internal MessageEnumerator StreamForExport(
|
||||
IReadOnlyCollection<int>? chatTypes,
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to
|
||||
)
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
var cmd = Connection.CreateCommand();
|
||||
|
||||
@@ -805,11 +920,13 @@ internal class MessageStore : IDisposable
|
||||
|
||||
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the most recent messages, oldest-first.
|
||||
// receiver: filter by receiver ContentId (null = no filter)
|
||||
// since: only include messages after this date (null = no filter)
|
||||
// count: max rows to return, defaults to 10,000
|
||||
// Lock caveat: same lazy-enumerator note as StreamForExport.
|
||||
internal MessageEnumerator GetMostRecentMessages(
|
||||
ulong? receiver = null,
|
||||
DateTimeOffset? since = null,
|
||||
@@ -824,6 +941,8 @@ internal class MessageStore : IDisposable
|
||||
|
||||
var whereClause = "WHERE " + string.Join(" AND ", whereClauses);
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
var cmd = Connection.CreateCommand();
|
||||
// Select last N by date DESC, then reverse to ascending order.
|
||||
cmd.CommandText =
|
||||
@@ -853,6 +972,7 @@ internal class MessageStore : IDisposable
|
||||
|
||||
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns up to limit tells exchanged with the named player, oldest-first.
|
||||
// SQL narrows by Receiver + ChatType (indexed); client does the final
|
||||
@@ -869,6 +989,8 @@ internal class MessageStore : IDisposable
|
||||
if (limit <= 0)
|
||||
return [];
|
||||
|
||||
lock (_readLock)
|
||||
{
|
||||
using var cmd = Connection.CreateCommand();
|
||||
cmd.CommandText =
|
||||
@"
|
||||
@@ -904,15 +1026,19 @@ internal class MessageStore : IDisposable
|
||||
collected.Reverse();
|
||||
return collected;
|
||||
}
|
||||
}
|
||||
|
||||
// Soft-deletes a message so it won't appear in queries.
|
||||
internal void DeleteMessage(Guid id)
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
using var cmd = Connection.CreateCommand();
|
||||
cmd.CommandText = "UPDATE messages SET Deleted = true WHERE Id = $Id;";
|
||||
cmd.Parameters.AddWithValue("$Id", id);
|
||||
cmd.ExecuteNonQuery();
|
||||
}
|
||||
}
|
||||
|
||||
internal long CountDateRange(
|
||||
DateTime after,
|
||||
@@ -920,6 +1046,8 @@ internal class MessageStore : IDisposable
|
||||
IEnumerable<byte> channels,
|
||||
ulong? receiver = null
|
||||
)
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
using var cmd = Connection.CreateCommand();
|
||||
|
||||
@@ -928,7 +1056,9 @@ internal class MessageStore : IDisposable
|
||||
whereClauses.Add("Receiver = $Receiver");
|
||||
|
||||
whereClauses.Add("Date BETWEEN $After AND $Before");
|
||||
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "cdr", channels.Select(c => (int)c))})");
|
||||
whereClauses.Add(
|
||||
$"ChatType IN ({BindIntList(cmd, "cdr", channels.Select(c => (int)c))})"
|
||||
);
|
||||
|
||||
var whereClause = "WHERE " + string.Join(" AND ", whereClauses);
|
||||
|
||||
@@ -942,18 +1072,25 @@ internal class MessageStore : IDisposable
|
||||
cmd.Parameters.AddWithValue("$Receiver", receiver);
|
||||
|
||||
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue(
|
||||
"$Before",
|
||||
((DateTimeOffset)before).ToUnixTimeMilliseconds()
|
||||
);
|
||||
cmd.CommandTimeout = 120;
|
||||
|
||||
return (long)cmd.ExecuteScalar()!;
|
||||
}
|
||||
}
|
||||
|
||||
// Lock caveat: same lazy-enumerator note as StreamForExport.
|
||||
internal MessageEnumerator GetDateRange(
|
||||
DateTime after,
|
||||
DateTime before,
|
||||
IEnumerable<byte> channels,
|
||||
ulong? receiver = null
|
||||
)
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
var cmd = Connection.CreateCommand();
|
||||
|
||||
@@ -962,7 +1099,9 @@ internal class MessageStore : IDisposable
|
||||
whereClauses.Add("Receiver = $Receiver");
|
||||
|
||||
whereClauses.Add("Date BETWEEN $After AND $Before");
|
||||
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "gdr", channels.Select(c => (int)c))})");
|
||||
whereClauses.Add(
|
||||
$"ChatType IN ({BindIntList(cmd, "gdr", channels.Select(c => (int)c))})"
|
||||
);
|
||||
|
||||
var whereClause = $"WHERE {string.Join(" AND ", whereClauses)}";
|
||||
|
||||
@@ -979,11 +1118,16 @@ internal class MessageStore : IDisposable
|
||||
cmd.Parameters.AddWithValue("$Receiver", receiver);
|
||||
|
||||
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue(
|
||||
"$Before",
|
||||
((DateTimeOffset)before).ToUnixTimeMilliseconds()
|
||||
);
|
||||
|
||||
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
|
||||
}
|
||||
}
|
||||
|
||||
// Lock caveat: same lazy-enumerator note as StreamForExport.
|
||||
internal MessageEnumerator GetPagedDateRange(
|
||||
DateTime after,
|
||||
DateTime before,
|
||||
@@ -991,6 +1135,8 @@ internal class MessageStore : IDisposable
|
||||
ulong? receiver = null,
|
||||
int page = 0
|
||||
)
|
||||
{
|
||||
lock (_readLock)
|
||||
{
|
||||
var cmd = Connection.CreateCommand();
|
||||
|
||||
@@ -999,7 +1145,9 @@ internal class MessageStore : IDisposable
|
||||
whereClauses.Add("Receiver = $Receiver");
|
||||
|
||||
whereClauses.Add("Date BETWEEN $After AND $Before");
|
||||
whereClauses.Add($"ChatType IN ({BindIntList(cmd, "pdr", channels.Select(c => (int)c))})");
|
||||
whereClauses.Add(
|
||||
$"ChatType IN ({BindIntList(cmd, "pdr", channels.Select(c => (int)c))})"
|
||||
);
|
||||
|
||||
var whereClause = $"WHERE {string.Join(" AND ", whereClauses)}";
|
||||
|
||||
@@ -1021,12 +1169,16 @@ internal class MessageStore : IDisposable
|
||||
cmd.Parameters.AddWithValue("$Receiver", receiver);
|
||||
|
||||
cmd.Parameters.AddWithValue("$After", ((DateTimeOffset)after).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue("$Before", ((DateTimeOffset)before).ToUnixTimeMilliseconds());
|
||||
cmd.Parameters.AddWithValue(
|
||||
"$Before",
|
||||
((DateTimeOffset)before).ToUnixTimeMilliseconds()
|
||||
);
|
||||
cmd.Parameters.AddWithValue("$Offset", DbViewer.RowPerPage * page);
|
||||
cmd.Parameters.AddWithValue("$OffsetCount", DbViewer.RowPerPage);
|
||||
|
||||
return new MessageEnumerator(cmd.ExecuteReader(), _logger);
|
||||
}
|
||||
}
|
||||
|
||||
// Builds a "$prefix0,$prefix1,..." placeholder list and binds values to the command.
|
||||
// SQLite has no native array parameter, so placeholders are generated per entry.
|
||||
|
||||
Reference in New Issue
Block a user