commit e1b6b7ddd75ab6ddef210e57db7ebfe1de6a6eef Author: Rene Nochebuena Date: Wed Mar 18 14:06:17 2026 -0600 feat(health): initial stable release v0.9.0 HTTP health check handler with parallel goroutine-per-check execution, 5 s request-derived timeout, and two-level criticality (LevelCritical → 503, LevelDegraded → 200). What's included: - `Checkable` interface (HealthCheck / Name / Priority) and `Level` type with LevelCritical and LevelDegraded constants - `NewHandler(logger, checks...)` returning http.Handler; runs all checks concurrently via buffered channel, returns JSON with per-component status and latency - `ComponentStatus` and `Response` types for the JSON response body Tested-via: todo-api POC integration Reviewed-against: docs/adr/ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..54f5aae --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,26 @@ +{ + "name": "Go", + "image": "mcr.microsoft.com/devcontainers/go:2-1.25-trixie", + "features": { + "ghcr.io/devcontainers-extra/features/claude-code:1": {} + }, + "forwardPorts": [], + "postCreateCommand": "go version", + "customizations": { + "vscode": { + "settings": { + "files.autoSave": "afterDelay", + "files.autoSaveDelay": 1000, + "explorer.compactFolders": false, + "explorer.showEmptyFolders": true + }, + "extensions": [ + "golang.go", + "eamodio.golang-postfix-completion", + "quicktype.quicktype", + "usernamehw.errorlens" + ] + } + }, + "remoteUser": "vscode" +} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..221da82 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +# Binaries +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with go test -c +*.test + +# Output of go build +*.out + +# Dependency directory +vendor/ + +# Go workspace file +go.work +go.work.sum + +# Environment files +.env +.env.* + +# Editor / IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# VCS files +COMMIT.md +RELEASE.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..685eaf1 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,32 @@ +# Changelog + +All notable changes to this module will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this module adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.9.0] - 2026-03-18 + +### Added + +- `Level` type — `int` representing component criticality; zero value is `LevelCritical` +- `LevelCritical` constant (value `0`) — a failing critical component sets overall status to `DOWN` and returns HTTP 503 +- `LevelDegraded` constant (value `1`) — a failing degraded component sets overall status to `DEGRADED` and returns HTTP 200 +- `Checkable` interface — `HealthCheck(ctx context.Context) error`, `Name() string`, `Priority() Level`; implemented by infrastructure components +- `Logger` interface — duck-typed minimal logger (`Debug`, `Info`, `Warn`, `Error`, `WithContext`) satisfied by `logz.Logger` without importing logz +- `ComponentStatus` struct — JSON-serialisable per-component result with fields `status string`, `latency string` (omitempty), and `error string` (omitempty) +- `Response` struct — JSON-serialisable overall response with fields `status string` and `components map[string]ComponentStatus` +- `NewHandler(logger Logger, checks ...Checkable) http.Handler` — constructs an `http.Handler` that runs all registered checks concurrently, collects results via a buffered channel, and writes a JSON `Response` +- Parallel check execution: each `Checkable.HealthCheck` call runs in its own goroutine; a buffered channel sized to the check count prevents goroutine leaks +- Per-request 5-second deadline derived from `context.WithTimeout(r.Context(), 5*time.Second)` and propagated to all check goroutines +- Overall status aggregation: `UP` if all checks pass; `DEGRADED` (HTTP 200) if at least one degraded component fails and no critical component fails; `DOWN` (HTTP 503) if any critical component fails +- Check latency measurement: each goroutine records `time.Since(start)` and includes it as a string in `ComponentStatus` +- `Content-Type: application/json` response header set on every response + +### Design Notes + +- All checks run in parallel goroutines and report through a buffered channel; the buffer is sized exactly to the number of registered checks at construction time, so the handler is guaranteed to drain all results without blocking even if it returns early. +- The two-level criticality model (`LevelCritical` / `LevelDegraded`) gives orchestrators and load balancers a clean binary HTTP signal (200 vs 503) while still surfacing partial degradation in the JSON body for monitoring systems. +- The `Logger` and `Checkable` interfaces are defined entirely within this package using duck typing — no micro-lib module is imported, keeping `health` a pure stdlib package (Tier 0/1) that infra packages can satisfy without a circular dependency. + +[0.9.0]: https://code.nochebuena.dev/go/health/releases/tag/v0.9.0 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..1f1eee5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,54 @@ +# health + +HTTP health check handler with parallel checks, timeouts, and two-level criticality. + +## Purpose + +Provides a single `http.Handler` that interrogates any number of `Checkable` infrastructure components concurrently and returns a JSON response with per-component status and an overall service status. Designed to be mounted at `/health` and consumed by load balancers and orchestrators. + +## Tier & Dependencies + +**Tier 1** — depends only on Go stdlib (`context`, `encoding/json`, `net/http`, `time`). No external or internal module imports. + +## Key Design Decisions + +- **Parallel checks** (ADR-001): every registered component is checked in its own goroutine. A `context.WithTimeout(r.Context(), 5*time.Second)` provides the deadline for the entire request. The handler blocks until all goroutines report via a buffered channel. +- **Two-level criticality** (ADR-002): `LevelCritical` (value `0`) → `DOWN` + HTTP 503 on failure; `LevelDegraded` (value `1`) → `DEGRADED` + HTTP 200 on failure. The zero-value default is `LevelCritical`. +- **`Checkable` interface** (ADR-003): infrastructure components implement `HealthCheck(ctx) error`, `Name() string`, and `Priority() Level`. The health package defines the interface; infra packages satisfy it — not the reverse. +- **Duck-typed `Logger`** (global ADR-001): the `Logger` interface is defined locally in this package. Any logger that matches the method set (including `logz.Logger`) is accepted without an import of `logz`. + +## Patterns + +Construct the handler at app bootstrap and pass all `Checkable` components: + +```go +db := postgres.New(logger, cfg) // satisfies health.Checkable +rdb := redis.New(logger, cfg) // satisfies health.Checkable + +h := health.NewHandler(logger, db, rdb) +router.Get("/health", h) +``` + +Implement `Checkable` on a custom type: + +```go +type myService struct{} + +func (s *myService) HealthCheck(ctx context.Context) error { return s.ping(ctx) } +func (s *myService) Name() string { return "my-service" } +func (s *myService) Priority() health.Level { return health.LevelDegraded } +``` + +## What to Avoid + +- Do not add a global/package-level handler or registry. `NewHandler` is the only constructor; use dependency injection. +- Do not call `NewHandler` with nil logger — the handler will panic on the first request when it calls `logger.WithContext`. +- Do not import infra modules (postgres, mysql, etc.) from this package. The dependency must flow one way: infra → health. +- Do not remove the buffered channel (`make(chan result, len(h.checks))`). Making it unbuffered would leak goroutines if the handler returns before draining all results. + +## Testing Notes + +- `health_test.go` covers: no checks (UP), all UP, critical DOWN (503), degraded DOWN (200), mixed DOWN+DEGRADED (503), parallel execution timing, JSON shape, and context timeout propagation. +- `compliance_test.go` contains compile-time interface satisfaction checks for `Logger` and `Checkable`. Run `go build ./...` to verify them without executing any runtime code. +- Tests use an in-process `httptest.NewRecorder` — no real network or infrastructure required. +- The parallelism test uses a 100 ms delay per check and asserts total elapsed < 300 ms. It is timing-sensitive; extremely slow CI runners may produce false negatives. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0b33b48 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 NOCHEBUENADEV + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..976fe48 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# health + +Stdlib `http.Handler` for service health checks. Runs all checks concurrently and returns a JSON summary. + +## Install + +``` +go get code.nochebuena.dev/go/health +``` + +## Usage + +```go +handler := health.NewHandler(logger, db, cache, queue) +r.Get("/health", handler) +``` + +### Response + +```json +{ + "status": "UP", + "components": { + "db": {"status": "UP", "latency": "1.2ms"}, + "cache": {"status": "DOWN", "latency": "5ms", "error": "connection refused"} + } +} +``` + +| Overall status | HTTP code | Condition | +|---|---|---| +| `UP` | 200 | All checks pass | +| `DEGRADED` | 200 | One or more `LevelDegraded` checks fail; no critical failures | +| `DOWN` | 503 | One or more `LevelCritical` checks fail | + +## Implementing `Checkable` + +```go +func (d *DB) HealthCheck(ctx context.Context) error { return d.pool.PingContext(ctx) } +func (d *DB) Name() string { return "postgres" } +func (d *DB) Priority() health.Level { return health.LevelCritical } +``` + +## Logger + +`health.Logger` is a duck-typed interface satisfied by `logz.Logger` without importing it. + +```go +type Logger interface { + Debug(msg string, args ...any) + Info(msg string, args ...any) + Warn(msg string, args ...any) + Error(msg string, err error, args ...any) + WithContext(ctx context.Context) Logger +} +``` diff --git a/compliance_test.go b/compliance_test.go new file mode 100644 index 0000000..4cd0ec7 --- /dev/null +++ b/compliance_test.go @@ -0,0 +1,27 @@ +package health_test + +import ( + "context" + + "code.nochebuena.dev/go/health" +) + +type testLogger struct{} + +func (t *testLogger) Debug(msg string, args ...any) {} +func (t *testLogger) Info(msg string, args ...any) {} +func (t *testLogger) Warn(msg string, args ...any) {} +func (t *testLogger) Error(msg string, err error, args ...any) {} +func (t *testLogger) WithContext(ctx context.Context) health.Logger { return t } + +// Compile-time check: testLogger satisfies health.Logger. +var _ health.Logger = (*testLogger)(nil) + +type testCheck struct{} + +func (t *testCheck) HealthCheck(ctx context.Context) error { return nil } +func (t *testCheck) Name() string { return "test" } +func (t *testCheck) Priority() health.Level { return health.LevelCritical } + +// Compile-time check: testCheck satisfies health.Checkable. +var _ health.Checkable = (*testCheck)(nil) diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..1512a47 --- /dev/null +++ b/doc.go @@ -0,0 +1,10 @@ +// Package health provides a stdlib http.Handler for service health checks. +// +// Register checkable components and mount the handler at a health endpoint: +// +// handler := health.NewHandler(logger, db, cache, queue) +// r.Get("/health", handler) +// +// The handler runs all checks concurrently with a 5-second timeout. +// It returns HTTP 200 (UP or DEGRADED) or HTTP 503 (DOWN). +package health diff --git a/docs/adr/ADR-001-parallel-checks-with-timeout.md b/docs/adr/ADR-001-parallel-checks-with-timeout.md new file mode 100644 index 0000000..89afe0c --- /dev/null +++ b/docs/adr/ADR-001-parallel-checks-with-timeout.md @@ -0,0 +1,26 @@ +# ADR-001: Parallel Checks with 5-Second Timeout + +**Status:** Accepted +**Date:** 2026-03-18 + +## Context + +A health endpoint must interrogate all registered components (database, cache, queue, etc.) and aggregate their results before responding to the caller. The naive approach — running checks sequentially — means the total response time is the sum of all individual check latencies. Under degraded conditions this can be several seconds, making the health endpoint itself a slow, unreliable probe. + +Additionally, health checks must be bounded. A component that hangs indefinitely must not cause the health handler to hang indefinitely. There must be a hard wall-clock limit. + +## Decision + +All registered `Checkable` components are checked concurrently using one goroutine per check. A `context.WithTimeout` of 5 seconds is derived from the incoming request context and passed to every goroutine. Results are collected from a buffered channel sized to the number of checks; the aggregation loop blocks until all goroutines have delivered exactly one result. + +The 5-second timeout is applied at the `ServeHTTP` level, not per check, so it is the ceiling for the entire health response including JSON encoding. + +The request's own context is used as the parent for the timeout derivation. If the caller cancels its request before 5 seconds (e.g., a probe with a 50 ms deadline), the context cancellation propagates to all running goroutines, and the handler returns before the 5-second ceiling. + +## Consequences + +- **Positive**: Total response time is bounded by the slowest single check (or 5 s), not the sum of all checks. A test with three 100 ms checks completes in ~100 ms, not ~300 ms. +- **Positive**: Hanging checks do not cause the handler to hang indefinitely. +- **Positive**: Caller-side timeouts are respected via context propagation. +- **Negative**: All checks consume resources simultaneously; there is no back-pressure or concurrency limit. For large numbers of checks this could be a concern, but typical services have a small, bounded number of infrastructure components. +- **Note**: The buffered channel of size `len(h.checks)` ensures no goroutine leaks even if the aggregation loop returns early due to panic or timeout — goroutines can still write to the channel without blocking. diff --git a/docs/adr/ADR-002-critical-vs-warning-levels.md b/docs/adr/ADR-002-critical-vs-warning-levels.md new file mode 100644 index 0000000..a80665e --- /dev/null +++ b/docs/adr/ADR-002-critical-vs-warning-levels.md @@ -0,0 +1,33 @@ +# ADR-002: Critical vs Warning Levels + +**Status:** Accepted +**Date:** 2026-03-18 + +## Context + +Not all infrastructure components are equally essential. A relational database that stores primary application state is existentially required; if it is down, the service cannot function and callers should stop sending traffic. A read-through cache or a non-essential third-party integration may be important for performance or full feature availability, but the service can still handle requests without them. + +A health endpoint that returns 503 whenever any non-critical dependency is unavailable will cause load balancers and orchestrators to pull healthy service instances out of rotation unnecessarily, amplifying an outage. + +Conversely, a health endpoint that always returns 200 regardless of component state provides no useful signal to the infrastructure. + +## Decision + +Two levels are defined as a typed integer `Level`: + +- **`LevelCritical` (0)**: The component is essential. If it reports an error, the overall status is `DOWN` and the HTTP response is `503 Service Unavailable`. The name `LevelCritical` is the zero value of the `Level` type, so it is the default when constructing a struct without explicitly setting the field. +- **`LevelDegraded` (1)**: The component is non-essential. If it reports an error, its per-component status is `DEGRADED` and the overall status is `DEGRADED`, but the HTTP response is `200 OK`. + +Aggregation rules: +1. Start with overall status `UP` and HTTP `200`. +2. Any `DOWN` component flips overall to `DOWN` and HTTP to `503`. This state cannot be overridden by a `DEGRADED` result. +3. Any `DEGRADED` component, if the overall is still `UP`, flips it to `DEGRADED` (200 is preserved). + +The per-component status strings (`UP`, `DEGRADED`, `DOWN`) are included in the JSON response regardless of level, allowing monitoring dashboards to distinguish between state of individual components. + +## Consequences + +- **Positive**: Infrastructure (load balancers, Kubernetes readiness probes) gets an honest `503` only when the service is genuinely non-functional. +- **Positive**: Degraded state is surfaced in the response body for observability without triggering traffic removal. +- **Positive**: Infra modules (postgres, mysql, etc.) can declare their own priority by implementing `Priority() Level` — typically `LevelCritical`. +- **Negative**: The binary two-level model does not support finer-grained priorities (e.g., "warn but do not degrade"). Additional levels can be added in future ADRs without breaking existing implementations. diff --git a/docs/adr/ADR-003-checkable-interface.md b/docs/adr/ADR-003-checkable-interface.md new file mode 100644 index 0000000..1a549cc --- /dev/null +++ b/docs/adr/ADR-003-checkable-interface.md @@ -0,0 +1,39 @@ +# ADR-003: Checkable Interface + +**Status:** Accepted +**Date:** 2026-03-18 + +## Context + +The health handler needs to interrogate arbitrary infrastructure components without knowing their concrete types. The options were: + +1. Pass `func(ctx context.Context) error` callbacks directly. +2. Require a shared `Checkable` interface that infrastructure modules must implement. +3. Accept an external registry where components register themselves by name. + +The health module also needs a way to know what to call a component in the JSON output (`name`) and how to treat its failure (`priority`). Without these pieces of metadata, every caller would have to pass them as separate arguments alongside the check function. + +## Decision + +A `Checkable` interface is defined in the `health` package with three methods: + +```go +type Checkable interface { + HealthCheck(ctx context.Context) error + Name() string + Priority() Level +} +``` + +Infrastructure modules (`postgres`, `mysql`, etc.) embed `health.Checkable` in their own `Component` interface and implement all three methods. The `health` package does not import any infrastructure module — the dependency flows inward only: infra → health. + +`Name()` returns a stable string used as the JSON key in the `components` map. `Priority()` returns the `Level` value that governs the HTTP status code logic (ADR-002). `HealthCheck(ctx)` performs the actual probe (e.g., `pool.Ping(ctx)`). + +The handler accepts `...Checkable` as a variadic parameter, so callers can register zero or more components at construction time. No dynamic registration or remove-after-register is supported. + +## Consequences + +- **Positive**: Infrastructure components carry their own health metadata — no out-of-band registration with name strings and level constants at the call site. +- **Positive**: Compile-time safety: if a component does not implement all three methods, the assignment `var _ health.Checkable = myComponent{}` fails. +- **Positive**: The interface is minimal (three methods) and stable; adding a fourth method would be a breaking change and should be versioned. +- **Negative**: Any new type that wants to participate in health checking must implement three methods, not just a single function. For trivial cases (one-off checks) this is more boilerplate than a bare function callback. However, the named interface is preferred because metadata (`Name`, `Priority`) cannot be forgotten. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..8fa281a --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module code.nochebuena.dev/go/health + +go 1.25 diff --git a/health.go b/health.go new file mode 100644 index 0000000..2eb6e56 --- /dev/null +++ b/health.go @@ -0,0 +1,132 @@ +package health + +import ( + "context" + "encoding/json" + "net/http" + "time" +) + +// Level represents the criticality of a component to the overall application health. +type Level int + +const ( + // LevelCritical indicates a component essential for the application. + // If any critical component is DOWN, the overall status is DOWN (503). + LevelCritical Level = iota + // LevelDegraded indicates a component that is important but not essential. + // If a degraded component is DOWN, the overall status is DEGRADED (200). + LevelDegraded +) + +// Checkable is the interface that infrastructure components implement. +type Checkable interface { + HealthCheck(ctx context.Context) error + Name() string + Priority() Level +} + +// Logger is the minimal interface health needs — satisfied by logz.Logger via duck typing. +type Logger interface { + Debug(msg string, args ...any) + Info(msg string, args ...any) + Warn(msg string, args ...any) + Error(msg string, err error, args ...any) + WithContext(ctx context.Context) Logger +} + +// ComponentStatus represents the health state of an individual component. +type ComponentStatus struct { + Status string `json:"status"` + Latency string `json:"latency,omitempty"` + Error string `json:"error,omitempty"` +} + +// Response is the JSON body returned by the health handler. +type Response struct { + Status string `json:"status"` + Components map[string]ComponentStatus `json:"components"` +} + +type handler struct { + logger Logger + checks []Checkable +} + +// NewHandler returns an http.Handler for the health endpoint. +// Runs all checks concurrently with a 5-second timeout. +// Returns 200 (UP/DEGRADED) or 503 (DOWN). +func NewHandler(logger Logger, checks ...Checkable) http.Handler { + return &handler{logger: logger, checks: checks} +} + +func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + logger := h.logger.WithContext(r.Context()) + logger.Debug("health: running checks") + + ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second) + defer cancel() + + type result struct { + name string + status ComponentStatus + priority Level + } + + resChan := make(chan result, len(h.checks)) + + for _, check := range h.checks { + go func(chk Checkable) { + start := time.Now() + err := chk.HealthCheck(ctx) + latency := time.Since(start).String() + + status := "UP" + errMsg := "" + if err != nil { + errMsg = err.Error() + if chk.Priority() == LevelDegraded { + status = "DEGRADED" + } else { + status = "DOWN" + } + } + + resChan <- result{ + name: chk.Name(), + priority: chk.Priority(), + status: ComponentStatus{ + Status: status, + Latency: latency, + Error: errMsg, + }, + } + }(check) + } + + overallStatus := "UP" + httpStatus := http.StatusOK + components := make(map[string]ComponentStatus) + + for range h.checks { + res := <-resChan + components[res.name] = res.status + + switch res.status.Status { + case "DOWN": + overallStatus = "DOWN" + httpStatus = http.StatusServiceUnavailable + case "DEGRADED": + if overallStatus == "UP" { + overallStatus = "DEGRADED" + } + } + } + + resp := Response{Status: overallStatus, Components: components} + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(httpStatus) + if err := json.NewEncoder(w).Encode(resp); err != nil { + logger.Error("health: failed to encode response", err) + } +} diff --git a/health_test.go b/health_test.go new file mode 100644 index 0000000..caff1db --- /dev/null +++ b/health_test.go @@ -0,0 +1,188 @@ +package health + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// --- mock helpers --- + +type mockCheck struct { + name string + priority Level + err error + delay time.Duration +} + +func (m *mockCheck) HealthCheck(ctx context.Context) error { + if m.delay > 0 { + select { + case <-time.After(m.delay): + case <-ctx.Done(): + return ctx.Err() + } + } + return m.err +} +func (m *mockCheck) Name() string { return m.name } +func (m *mockCheck) Priority() Level { return m.priority } + +type noopLogger struct{} + +func (n *noopLogger) Debug(msg string, args ...any) {} +func (n *noopLogger) Info(msg string, args ...any) {} +func (n *noopLogger) Warn(msg string, args ...any) {} +func (n *noopLogger) Error(msg string, err error, args ...any) {} +func (n *noopLogger) WithContext(ctx context.Context) Logger { return n } + +func doRequest(t *testing.T, h http.Handler) (int, Response) { + t.Helper() + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + var resp Response + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + return rec.Code, resp +} + +// --- tests --- + +func TestHandler_NoChecks(t *testing.T) { + h := NewHandler(&noopLogger{}) + code, resp := doRequest(t, h) + if code != http.StatusOK { + t.Errorf("want 200, got %d", code) + } + if resp.Status != "UP" { + t.Errorf("want UP, got %s", resp.Status) + } +} + +func TestHandler_AllUp(t *testing.T) { + h := NewHandler(&noopLogger{}, + &mockCheck{name: "db", priority: LevelCritical}, + &mockCheck{name: "cache", priority: LevelDegraded}, + ) + code, resp := doRequest(t, h) + if code != http.StatusOK { + t.Errorf("want 200, got %d", code) + } + if resp.Status != "UP" { + t.Errorf("want UP, got %s", resp.Status) + } + if resp.Components["db"].Status != "UP" { + t.Errorf("db: want UP, got %s", resp.Components["db"].Status) + } +} + +func TestHandler_CriticalDown(t *testing.T) { + h := NewHandler(&noopLogger{}, + &mockCheck{name: "db", priority: LevelCritical, err: errors.New("connection refused")}, + ) + code, resp := doRequest(t, h) + if code != http.StatusServiceUnavailable { + t.Errorf("want 503, got %d", code) + } + if resp.Status != "DOWN" { + t.Errorf("want DOWN, got %s", resp.Status) + } + if resp.Components["db"].Status != "DOWN" { + t.Errorf("db: want DOWN, got %s", resp.Components["db"].Status) + } +} + +func TestHandler_DegradedDown(t *testing.T) { + h := NewHandler(&noopLogger{}, + &mockCheck{name: "cache", priority: LevelDegraded, err: errors.New("timeout")}, + ) + code, resp := doRequest(t, h) + if code != http.StatusOK { + t.Errorf("want 200, got %d", code) + } + if resp.Status != "DEGRADED" { + t.Errorf("want DEGRADED, got %s", resp.Status) + } + if resp.Components["cache"].Status != "DEGRADED" { + t.Errorf("cache: want DEGRADED, got %s", resp.Components["cache"].Status) + } +} + +func TestHandler_MixedDown(t *testing.T) { + h := NewHandler(&noopLogger{}, + &mockCheck{name: "db", priority: LevelCritical, err: errors.New("down")}, + &mockCheck{name: "cache", priority: LevelDegraded, err: errors.New("down")}, + ) + code, resp := doRequest(t, h) + if code != http.StatusServiceUnavailable { + t.Errorf("want 503, got %d", code) + } + if resp.Status != "DOWN" { + t.Errorf("want DOWN, got %s", resp.Status) + } +} + +func TestHandler_ChecksParallel(t *testing.T) { + delay := 100 * time.Millisecond + h := NewHandler(&noopLogger{}, + &mockCheck{name: "a", priority: LevelCritical, delay: delay}, + &mockCheck{name: "b", priority: LevelCritical, delay: delay}, + &mockCheck{name: "c", priority: LevelCritical, delay: delay}, + ) + start := time.Now() + doRequest(t, h) + elapsed := time.Since(start) + + // parallel: should complete in ~delay, not 3*delay + if elapsed > 3*delay { + t.Errorf("checks do not appear to run in parallel: elapsed %v", elapsed) + } +} + +func TestHandler_JSON_Shape(t *testing.T) { + h := NewHandler(&noopLogger{}, + &mockCheck{name: "db", priority: LevelCritical}, + ) + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if ct := rec.Header().Get("Content-Type"); ct != "application/json" { + t.Errorf("Content-Type: want application/json, got %s", ct) + } + + var resp Response + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("body is not valid JSON: %v", err) + } + if _, ok := resp.Components["db"]; !ok { + t.Error("components map missing 'db' key") + } +} + +func TestHandler_ContextTimeout(t *testing.T) { + // Check that times out faster than the 5s global timeout when client cancels. + h := NewHandler(&noopLogger{}, + &mockCheck{name: "slow", priority: LevelCritical, delay: 10 * time.Second}, + ) + req := httptest.NewRequest(http.MethodGet, "/health", nil) + ctx, cancel := context.WithTimeout(req.Context(), 50*time.Millisecond) + defer cancel() + req = req.WithContext(ctx) + + rec := httptest.NewRecorder() + start := time.Now() + h.ServeHTTP(rec, req) + elapsed := time.Since(start) + + if elapsed > time.Second { + t.Errorf("handler did not respect context timeout: elapsed %v", elapsed) + } +}