This commit is contained in:
Aiden
2026-05-11 19:58:14 +10:00
parent 205c90e52e
commit 1629dbc77a
8 changed files with 204 additions and 7 deletions

View File

@@ -18,12 +18,20 @@ PersistenceWriter::~PersistenceWriter()
StopAndFlush(); StopAndFlush();
} }
bool PersistenceWriter::WriteSnapshot(const PersistenceSnapshot& snapshot, std::string& error) const void PersistenceWriter::SetResultCallback(ResultCallback callback)
{
std::lock_guard<std::mutex> lock(mMutex);
mResultCallback = std::move(callback);
}
bool PersistenceWriter::WriteSnapshot(const PersistenceSnapshot& snapshot, std::string& error)
{ {
if (!ValidateSnapshot(snapshot, error)) if (!ValidateSnapshot(snapshot, error))
return false; return false;
return WriteSnapshotThroughSink(snapshot, error); const bool succeeded = WriteSnapshotThroughSink(snapshot, error);
PublishWriteResult(snapshot, succeeded, error, false);
return succeeded;
} }
bool PersistenceWriter::EnqueueSnapshot(const PersistenceSnapshot& snapshot, std::string& error) bool PersistenceWriter::EnqueueSnapshot(const PersistenceSnapshot& snapshot, std::string& error)
@@ -137,6 +145,27 @@ bool PersistenceWriter::WriteSnapshotThroughSink(const PersistenceSnapshot& snap
return true; return true;
} }
void PersistenceWriter::PublishWriteResult(const PersistenceSnapshot& snapshot, bool succeeded, const std::string& errorMessage, bool newerRequestPending)
{
ResultCallback callback;
{
std::lock_guard<std::mutex> lock(mMutex);
callback = mResultCallback;
}
if (!callback)
return;
PersistenceWriteResult result;
result.targetKind = snapshot.targetKind;
result.targetPath = snapshot.targetPath.string();
result.reason = snapshot.reason;
result.succeeded = succeeded;
result.errorMessage = errorMessage;
result.newerRequestPending = newerRequestPending;
callback(result);
}
void PersistenceWriter::StartWorkerLocked() void PersistenceWriter::StartWorkerLocked()
{ {
if (mWorkerRunning) if (mWorkerRunning)
@@ -201,13 +230,16 @@ void PersistenceWriter::WorkerMain()
std::string error; std::string error;
const bool succeeded = WriteSnapshotThroughSink(snapshot, error); const bool succeeded = WriteSnapshotThroughSink(snapshot, error);
bool newerRequestPending = false;
{ {
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);
if (succeeded) if (succeeded)
++mWrittenCount; ++mWrittenCount;
else else
++mFailedCount; ++mFailedCount;
newerRequestPending = PendingCountLocked() > 0;
} }
PublishWriteResult(snapshot, succeeded, error, newerRequestPending);
} }
} }

View File

@@ -21,17 +21,29 @@ struct PersistenceWriterMetrics
uint64_t failedCount = 0; uint64_t failedCount = 0;
}; };
struct PersistenceWriteResult
{
PersistenceTargetKind targetKind = PersistenceTargetKind::RuntimeState;
std::string targetPath;
std::string reason;
bool succeeded = false;
std::string errorMessage;
bool newerRequestPending = false;
};
class PersistenceWriter class PersistenceWriter
{ {
public: public:
using SnapshotSink = std::function<bool(const PersistenceSnapshot&, std::string&)>; using SnapshotSink = std::function<bool(const PersistenceSnapshot&, std::string&)>;
using ResultCallback = std::function<void(const PersistenceWriteResult&)>;
explicit PersistenceWriter( explicit PersistenceWriter(
std::chrono::milliseconds debounceDelay = std::chrono::milliseconds(50), std::chrono::milliseconds debounceDelay = std::chrono::milliseconds(50),
SnapshotSink sink = SnapshotSink()); SnapshotSink sink = SnapshotSink());
~PersistenceWriter(); ~PersistenceWriter();
bool WriteSnapshot(const PersistenceSnapshot& snapshot, std::string& error) const; void SetResultCallback(ResultCallback callback);
bool WriteSnapshot(const PersistenceSnapshot& snapshot, std::string& error);
bool EnqueueSnapshot(const PersistenceSnapshot& snapshot, std::string& error); bool EnqueueSnapshot(const PersistenceSnapshot& snapshot, std::string& error);
void StopAndFlush(); void StopAndFlush();
PersistenceWriterMetrics GetMetrics() const; PersistenceWriterMetrics GetMetrics() const;
@@ -45,12 +57,14 @@ private:
bool ValidateSnapshot(const PersistenceSnapshot& snapshot, std::string& error) const; bool ValidateSnapshot(const PersistenceSnapshot& snapshot, std::string& error) const;
bool WriteSnapshotThroughSink(const PersistenceSnapshot& snapshot, std::string& error) const; bool WriteSnapshotThroughSink(const PersistenceSnapshot& snapshot, std::string& error) const;
void PublishWriteResult(const PersistenceSnapshot& snapshot, bool succeeded, const std::string& errorMessage, bool newerRequestPending);
void StartWorkerLocked(); void StartWorkerLocked();
void WorkerMain(); void WorkerMain();
std::size_t PendingCountLocked() const; std::size_t PendingCountLocked() const;
std::chrono::milliseconds mDebounceDelay; std::chrono::milliseconds mDebounceDelay;
SnapshotSink mSink; SnapshotSink mSink;
ResultCallback mResultCallback;
mutable std::mutex mMutex; mutable std::mutex mMutex;
std::condition_variable mCondition; std::condition_variable mCondition;
std::thread mWorker; std::thread mWorker;

View File

@@ -24,6 +24,21 @@ double GenerateStartupRandom()
return distribution(randomDevice); return distribution(randomDevice);
} }
std::string PersistenceTargetKindName(PersistenceTargetKind targetKind)
{
switch (targetKind)
{
case PersistenceTargetKind::RuntimeState:
return "runtime-state";
case PersistenceTargetKind::StackPreset:
return "stack-preset";
case PersistenceTargetKind::RuntimeConfig:
return "runtime-config";
default:
return "unknown";
}
}
} }
RuntimeStore::RuntimeStore() : RuntimeStore::RuntimeStore() :
@@ -37,6 +52,15 @@ RuntimeStore::RuntimeStore() :
mStartTime(std::chrono::steady_clock::now()), mStartTime(std::chrono::steady_clock::now()),
mLastScanTime((std::chrono::steady_clock::time_point::min)()) mLastScanTime((std::chrono::steady_clock::time_point::min)())
{ {
mPersistenceWriter.SetResultCallback([this](const PersistenceWriteResult& result) {
mHealthTelemetry.RecordPersistenceWriteResult(
result.succeeded,
PersistenceTargetKindName(result.targetKind),
result.targetPath,
result.reason,
result.errorMessage,
result.newerRequestPending);
});
} }
HealthTelemetry& RuntimeStore::GetHealthTelemetry() HealthTelemetry& RuntimeStore::GetHealthTelemetry()

View File

@@ -169,6 +169,44 @@ bool HealthTelemetry::TryRecordRuntimeEventDispatchStats(std::size_t dispatchedE
return true; return true;
} }
void HealthTelemetry::RecordPersistenceWriteResult(bool succeeded, const std::string& targetKind, const std::string& targetPath,
const std::string& reason, const std::string& errorMessage, bool newerRequestPending)
{
std::lock_guard<std::mutex> lock(mMutex);
if (succeeded)
++mPersistence.writeSuccessCount;
else
++mPersistence.writeFailureCount;
mPersistence.lastWriteSucceeded = succeeded;
mPersistence.unsavedChanges = !succeeded || newerRequestPending;
mPersistence.newerRequestPending = newerRequestPending;
mPersistence.lastTargetKind = targetKind;
mPersistence.lastTargetPath = targetPath;
mPersistence.lastReason = reason;
mPersistence.lastErrorMessage = errorMessage;
}
bool HealthTelemetry::TryRecordPersistenceWriteResult(bool succeeded, const std::string& targetKind, const std::string& targetPath,
const std::string& reason, const std::string& errorMessage, bool newerRequestPending)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
if (succeeded)
++mPersistence.writeSuccessCount;
else
++mPersistence.writeFailureCount;
mPersistence.lastWriteSucceeded = succeeded;
mPersistence.unsavedChanges = !succeeded || newerRequestPending;
mPersistence.newerRequestPending = newerRequestPending;
mPersistence.lastTargetKind = targetKind;
mPersistence.lastTargetPath = targetPath;
mPersistence.lastReason = reason;
mPersistence.lastErrorMessage = errorMessage;
return true;
}
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
{ {
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);
@@ -193,6 +231,12 @@ HealthTelemetry::RuntimeEventMetricsSnapshot HealthTelemetry::GetRuntimeEventMet
return mRuntimeEvents; return mRuntimeEvents;
} }
HealthTelemetry::PersistenceSnapshot HealthTelemetry::GetPersistenceSnapshot() const
{
std::lock_guard<std::mutex> lock(mMutex);
return mPersistence;
}
HealthTelemetry::Snapshot HealthTelemetry::GetSnapshot() const HealthTelemetry::Snapshot HealthTelemetry::GetSnapshot() const
{ {
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);
@@ -202,5 +246,6 @@ HealthTelemetry::Snapshot HealthTelemetry::GetSnapshot() const
snapshot.videoIO = mVideoIOStatus; snapshot.videoIO = mVideoIOStatus;
snapshot.performance = mPerformance; snapshot.performance = mPerformance;
snapshot.runtimeEvents = mRuntimeEvents; snapshot.runtimeEvents = mRuntimeEvents;
snapshot.persistence = mPersistence;
return snapshot; return snapshot;
} }

View File

@@ -69,12 +69,26 @@ public:
RuntimeEventDispatchSnapshot dispatch; RuntimeEventDispatchSnapshot dispatch;
}; };
struct PersistenceSnapshot
{
uint64_t writeSuccessCount = 0;
uint64_t writeFailureCount = 0;
bool lastWriteSucceeded = true;
bool unsavedChanges = false;
bool newerRequestPending = false;
std::string lastTargetKind;
std::string lastTargetPath;
std::string lastReason;
std::string lastErrorMessage;
};
struct Snapshot struct Snapshot
{ {
SignalStatusSnapshot signal; SignalStatusSnapshot signal;
VideoIOStatusSnapshot videoIO; VideoIOStatusSnapshot videoIO;
PerformanceSnapshot performance; PerformanceSnapshot performance;
RuntimeEventMetricsSnapshot runtimeEvents; RuntimeEventMetricsSnapshot runtimeEvents;
PersistenceSnapshot persistence;
}; };
HealthTelemetry() = default; HealthTelemetry() = default;
@@ -107,10 +121,16 @@ public:
bool TryRecordRuntimeEventDispatchStats(std::size_t dispatchedEvents, std::size_t handlerInvocations, bool TryRecordRuntimeEventDispatchStats(std::size_t dispatchedEvents, std::size_t handlerInvocations,
std::size_t handlerFailures, double dispatchDurationMilliseconds); std::size_t handlerFailures, double dispatchDurationMilliseconds);
void RecordPersistenceWriteResult(bool succeeded, const std::string& targetKind, const std::string& targetPath,
const std::string& reason, const std::string& errorMessage, bool newerRequestPending);
bool TryRecordPersistenceWriteResult(bool succeeded, const std::string& targetKind, const std::string& targetPath,
const std::string& reason, const std::string& errorMessage, bool newerRequestPending);
SignalStatusSnapshot GetSignalStatusSnapshot() const; SignalStatusSnapshot GetSignalStatusSnapshot() const;
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const; VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
PerformanceSnapshot GetPerformanceSnapshot() const; PerformanceSnapshot GetPerformanceSnapshot() const;
RuntimeEventMetricsSnapshot GetRuntimeEventMetricsSnapshot() const; RuntimeEventMetricsSnapshot GetRuntimeEventMetricsSnapshot() const;
PersistenceSnapshot GetPersistenceSnapshot() const;
Snapshot GetSnapshot() const; Snapshot GetSnapshot() const;
private: private:
@@ -119,4 +139,5 @@ private:
VideoIOStatusSnapshot mVideoIOStatus; VideoIOStatusSnapshot mVideoIOStatus;
PerformanceSnapshot mPerformance; PerformanceSnapshot mPerformance;
RuntimeEventMetricsSnapshot mRuntimeEvents; RuntimeEventMetricsSnapshot mRuntimeEvents;
PersistenceSnapshot mPersistence;
}; };

View File

@@ -7,7 +7,7 @@ Phases 1-5 separate durable state, coordination policy, render-facing snapshots,
## Status ## Status
- Phase 6 design package: proposed. - Phase 6 design package: proposed.
- Phase 6 implementation: Step 3 complete. - Phase 6 implementation: Step 4 complete.
- Current alignment: `RuntimeStore` owns durable serialization, config, package metadata, preset IO, and persistence requests; `CommittedLiveState` owns the current committed/session layer state; and `RuntimeCoordinator` publishes typed persistence requests for persisted mutations. The remaining issue is that actual disk writes are still synchronous store work rather than queued, debounced, atomic background writes. - Current alignment: `RuntimeStore` owns durable serialization, config, package metadata, preset IO, and persistence requests; `CommittedLiveState` owns the current committed/session layer state; and `RuntimeCoordinator` publishes typed persistence requests for persisted mutations. The remaining issue is that actual disk writes are still synchronous store work rather than queued, debounced, atomic background writes.
Current persistence footholds: Current persistence footholds:
@@ -235,9 +235,16 @@ Make disk writes safer and observable.
Initial target: Initial target:
- temp-file then replace - [x] temp-file then replace
- failure returned/published with structured reason - [x] failure returned/published with structured reason
- `HealthTelemetry` receives persistence warning state - [x] `HealthTelemetry` receives persistence warning state
Current implementation:
- `PersistenceWriter::WriteSnapshot(...)` and worker writes use temp-file then `MoveFileExA(..., MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)`.
- `PersistenceWriteResult` reports target kind, target path, reason, success/failure, error message, and whether newer work was pending.
- `RuntimeStore` wires persistence write results into `HealthTelemetry`.
- `HealthTelemetry` records persistence success/failure counts, last target/reason/error, pending-newer-request state, and unsaved-change state.
### Step 5. Wire Coordinator/Event Requests To Writer ### Step 5. Wire Coordinator/Event Requests To Writer

View File

@@ -53,6 +53,29 @@ void TestRuntimeEventTryRecord()
Expect(metrics.queue.oldestEventAgeMilliseconds == 0.0, "queue age is clamped to non-negative values"); Expect(metrics.queue.oldestEventAgeMilliseconds == 0.0, "queue age is clamped to non-negative values");
Expect(metrics.dispatch.lastDispatchDurationMilliseconds == 0.0, "dispatch duration is clamped to non-negative values"); Expect(metrics.dispatch.lastDispatchDurationMilliseconds == 0.0, "dispatch duration is clamped to non-negative values");
} }
void TestPersistenceWriteHealth()
{
HealthTelemetry telemetry;
telemetry.RecordPersistenceWriteResult(false, "runtime-state", "runtime/runtime_state.json", "UpdateLayerParameter",
"disk full", true);
HealthTelemetry::PersistenceSnapshot persistence = telemetry.GetPersistenceSnapshot();
Expect(persistence.writeFailureCount == 1, "persistence health counts write failures");
Expect(!persistence.lastWriteSucceeded, "persistence health records failed write state");
Expect(persistence.unsavedChanges, "persistence health reports unsaved changes after failure");
Expect(persistence.newerRequestPending, "persistence health records pending newer request");
Expect(persistence.lastTargetKind == "runtime-state", "persistence health records target kind");
Expect(persistence.lastReason == "UpdateLayerParameter", "persistence health records reason");
Expect(persistence.lastErrorMessage == "disk full", "persistence health records error message");
Expect(telemetry.TryRecordPersistenceWriteResult(true, "runtime-state", "runtime/runtime_state.json", "flush", "", false),
"try persistence health succeeds when uncontended");
persistence = telemetry.GetPersistenceSnapshot();
Expect(persistence.writeSuccessCount == 1, "persistence health counts write successes");
Expect(persistence.lastWriteSucceeded, "persistence health records successful write state");
Expect(!persistence.unsavedChanges, "persistence health clears unsaved changes after latest successful write with no pending request");
}
} }
int main() int main()
@@ -60,6 +83,7 @@ int main()
TestRuntimeEventQueueMetrics(); TestRuntimeEventQueueMetrics();
TestRuntimeEventDispatchStats(); TestRuntimeEventDispatchStats();
TestRuntimeEventTryRecord(); TestRuntimeEventTryRecord();
TestPersistenceWriteHealth();
if (gFailures != 0) if (gFailures != 0)
{ {

View File

@@ -94,12 +94,42 @@ void TestImmediateRequestsAreNotCoalesced()
"immediate snapshots preserve order"); "immediate snapshots preserve order");
} }
} }
void TestWriteFailureReportsStructuredResult()
{
std::vector<PersistenceWriteResult> results;
PersistenceWriter writer(
std::chrono::milliseconds(1),
[](const PersistenceSnapshot&, std::string& error) {
error = "simulated failure";
return false;
});
writer.SetResultCallback([&results](const PersistenceWriteResult& result) {
results.push_back(result);
});
PersistenceSnapshot snapshot = MakeRuntimeSnapshot("payload");
snapshot.debounceAllowed = false;
snapshot.reason = "failure-test";
std::string error;
Expect(writer.EnqueueSnapshot(snapshot, error), "failing snapshot still enqueues");
writer.StopAndFlush();
Expect(results.size() == 1, "writer reports one failure result");
Expect(!results.empty() && !results[0].succeeded, "writer result records failure");
Expect(!results.empty() && results[0].reason == "failure-test", "writer result preserves reason");
Expect(!results.empty() && results[0].errorMessage == "simulated failure", "writer result preserves error message");
Expect(!results.empty() && !results[0].newerRequestPending, "writer result reports no newer pending request");
Expect(writer.GetMetrics().failedCount == 1, "writer metrics count failed writes");
}
} }
int main() int main()
{ {
TestDebouncedRequestsCoalesceToNewestSnapshot(); TestDebouncedRequestsCoalesceToNewestSnapshot();
TestImmediateRequestsAreNotCoalesced(); TestImmediateRequestsAreNotCoalesced();
TestWriteFailureReportsStructuredResult();
if (gFailures != 0) if (gFailures != 0)
{ {