Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion libsql-ffi/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,16 @@ pub fn build_bundled(out_dir: &str, out_path: &Path) {
let mut sqlean_sources = Vec::new();
for pattern in sqlean_patterns {
let full_pattern = format!("{BUNDLED_DIR}/sqlean/{}", pattern);
sqlean_sources.extend(glob(&full_pattern).unwrap().filter_map(Result::ok));
sqlean_sources.extend(
glob(&full_pattern)
.unwrap()
.filter_map(Result::ok)
// Headers are glob'd in as a side effect but must not
// be passed to `cc::Build::files()`: on clang/macOS
// that turns them into precompiled-header .o files
// which fail to link.
.filter(|p| p.extension().map_or(false, |ext| ext == "c")),
);
}

if cfg!(feature = "sqlean-extension-regexp") {
Expand Down
83 changes: 83 additions & 0 deletions libsql-server/src/http/admin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ where
"/v1/namespaces/:namespace/checkpoint",
post(handle_checkpoint),
)
.route(
"/v1/namespaces/:namespace/reset-replication",
post(handle_reset_replication),
)
.route(
"/v1/namespaces/:namespace/integrity-check",
post(handle_integrity_check),
)
.route("/v1/namespaces/:namespace", delete(handle_delete_namespace))
.route("/v1/namespaces/:namespace/stats", get(stats::handle_stats))
.route(
Expand Down Expand Up @@ -550,6 +558,81 @@ async fn handle_checkpoint<C>(
Ok(())
}

/// Rebuild the replication log for a namespace from its live DB file
/// without touching other namespaces on this pod.
///
/// Use when the replication artifacts (wallog, snapshots/, to_compact/)
/// are corrupt but the live `data` file is intact (verify first with
/// `PRAGMA quick_check`).
///
/// Side effects:
/// - new `log_id` is minted
/// - connected replicas see `LogIncompatible` and must re-bootstrap
/// - live DB data is preserved
/// - metastore config (jwt_key, block_writes, etc.) is preserved
///
/// Other namespaces on this pod are completely unaffected.
#[derive(serde::Serialize)]
struct ResetReplicationResp {
/// Wall-clock duration of the reset, for operator-visible metrics.
elapsed_ms: u64,
}

async fn handle_reset_replication<C>(
State(app_state): State<Arc<AppState<C>>>,
Path(namespace): Path<NamespaceName>,
) -> crate::Result<axum::Json<ResetReplicationResp>> {
let elapsed_ms = app_state.namespaces.reset_replication(namespace).await?;
Ok(axum::Json(ResetReplicationResp { elapsed_ms }))
}

#[derive(serde::Deserialize, Default)]
struct IntegrityCheckReq {
/// If true, run full `PRAGMA integrity_check` (O(DB size), thorough).
/// Default is `PRAGMA quick_check` which is fast and catches the
/// critical corruption classes.
#[serde(default)]
full: bool,
}

#[derive(serde::Serialize)]
struct IntegrityCheckResp {
ok: bool,
/// Raw SQLite diagnostic text. `"ok"` on success, otherwise one or
/// more messages describing integrity issues.
message: String,
/// "quick" or "full", mirrors the `full` request field.
check: &'static str,
}

/// Run `PRAGMA quick_check` (default) or `PRAGMA integrity_check` on a
/// namespace's live data file without touching other namespaces.
///
/// Use this to classify the failure mode before recovery:
/// - `ok` → live DB is fine, any corruption is in wallog/snapshots (Mode A)
/// → caller should use `POST /v1/namespaces/:ns/reset-replication`.
/// - non-"ok" → live DB itself is corrupt (Mode B)
/// → caller should restore from backup, not reset-replication.
///
/// Cheap: ~10ms for quick_check on small-to-medium namespaces.
async fn handle_integrity_check<C>(
State(app_state): State<Arc<AppState<C>>>,
Path(namespace): Path<NamespaceName>,
payload: Option<Json<IntegrityCheckReq>>,
) -> crate::Result<Json<IntegrityCheckResp>> {
let full = payload.map(|p| p.0.full).unwrap_or(false);
let message = app_state
.namespaces
.integrity_check(namespace, full)
.await?;
let ok = message.trim() == "ok";
Ok(Json(IntegrityCheckResp {
ok,
message,
check: if full { "full" } else { "quick" },
}))
}

#[derive(serde::Deserialize)]
struct EnableHeapProfileRequest {
#[serde(default)]
Expand Down
75 changes: 73 additions & 2 deletions libsql-server/src/namespace/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ impl Namespace {
&self.name
}

/// On-disk path of this namespace's files (data, wallog, snapshots/,
/// to_compact/, .sentinel).
pub(crate) fn path(&self) -> &Arc<Path> {
&self.path
}

async fn destroy(mut self) -> anyhow::Result<()> {
self.tasks.shutdown().await;
self.db.destroy();
Expand All @@ -85,14 +91,79 @@ impl Namespace {
Ok(())
}

/// Run `PRAGMA quick_check` (or `integrity_check` if `full=true`) on
/// the namespace's live DB file and return the result string.
///
/// For a healthy DB this returns `"ok"`. Anything else is an
/// integrity diagnostic message from SQLite.
///
/// A catastrophically corrupt DB can fail before PRAGMA runs (e.g.
/// `malformed database schema` raised while the prepared statement
/// is parsing the schema). We normalize that into the same
/// `Ok(String)` return path so callers get a uniform classification
/// signal instead of a server error.
async fn integrity_check(&self, full: bool) -> anyhow::Result<String> {
// Even creating a connection can fail ("malformed database schema")
// when the DB is badly corrupt — that IS an integrity signal so we
// surface it as `Ok(String)` rather than an Err that becomes a 500.
let conn = match self.db.connection_maker().create().await {
Ok(c) => c,
Err(e) => {
return Ok(format!("connection failed: {e}"));
}
};
let pragma = if full { "integrity_check" } else { "quick_check" };
let result = conn.with_raw(move |raw| -> rusqlite::Result<Vec<String>> {
let mut stmt = raw.prepare(&format!("PRAGMA {pragma}"))?;
let mut rows = stmt.query([])?;
let mut out = Vec::new();
while let Some(row) = rows.next()? {
let s: String = row.get(0)?;
out.push(s);
}
Ok(out)
});
match result {
Ok(rows) => Ok(rows.join("\n")),
Err(e) => {
// SQLite surfaces integrity failures as prepare/query errors
// rather than PRAGMA rows. Treat those as integrity signals.
Ok(format!("{e}"))
}
}
}

async fn shutdown(mut self, should_checkpoint: bool) -> anyhow::Result<()> {
self.tasks.shutdown().await;
if should_checkpoint {
self.checkpoint().await?;
}
self.db.shutdown().await?;
if let Err(e) = tokio::fs::remove_file(self.path.join(".sentinel")).await {
tracing::error!("unable to remove .sentinel file: {}", e);
// Historically `.sentinel` was removed unconditionally on graceful
// shutdown. This makes the documented `touch .sentinel + kubectl
// delete pod` operator recovery path silently ineffective, because
// kubectl sends SIGTERM first which invokes this graceful shutdown
// and removes the sentinel before the pod actually stops.
//
// Guard the removal behind `LIBSQL_PRESERVE_SENTINEL_ON_SHUTDOWN`.
// When set, the sentinel survives graceful shutdown, so the next
// namespace init will correctly trigger dirty-recovery from the
// live `data` file.
//
// Default remains: remove (preserves existing behavior for the
// 99% of deployments that don't need this recovery path, now that
// `POST /v1/namespaces/:ns/reset-replication` is the primary
// recovery primitive).
let preserve_sentinel =
std::env::var("LIBSQL_PRESERVE_SENTINEL_ON_SHUTDOWN").is_ok();
if !preserve_sentinel {
if let Err(e) = tokio::fs::remove_file(self.path.join(".sentinel")).await {
tracing::error!("unable to remove .sentinel file: {}", e);
}
} else {
tracing::info!(
"LIBSQL_PRESERVE_SENTINEL_ON_SHUTDOWN set; keeping .sentinel for recovery"
);
}
Ok(())
}
Expand Down
Loading
Loading