feat(telemetry): initial stable release v0.9.0
Single-call OTel SDK bootstrap setting all three global providers (traces → Tempo, metrics → Mimir, logs → Loki) over OTLP gRPC. What's included: - New(ctx, Config): bootstraps TracerProvider, MeterProvider, and LoggerProvider with OTLP gRPC exporters; sets OTel globals - W3C TraceContext + Baggage propagation set globally - Resource tagging: service.name, service.version, deployment.environment merged with SDK defaults - OTLPInsecure bool for development environments without TLS - Sequential rollback on partial initialization failure — no dangling exporters on error - Returns shutdown func(context.Context) error; caller defers in main or wires into launcher BeforeStop - Tier 5 module: must be imported only by application main packages; zero micro-lib dependencies Tested-via: todo-api POC integration Reviewed-against: docs/adr/
This commit is contained in:
26
.devcontainer/devcontainer.json
Normal file
26
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "Go",
|
||||||
|
"image": "mcr.microsoft.com/devcontainers/go:2-1.25-trixie",
|
||||||
|
"features": {
|
||||||
|
"ghcr.io/devcontainers-extra/features/claude-code:1": {}
|
||||||
|
},
|
||||||
|
"forwardPorts": [],
|
||||||
|
"postCreateCommand": "go version",
|
||||||
|
"customizations": {
|
||||||
|
"vscode": {
|
||||||
|
"settings": {
|
||||||
|
"files.autoSave": "afterDelay",
|
||||||
|
"files.autoSaveDelay": 1000,
|
||||||
|
"explorer.compactFolders": false,
|
||||||
|
"explorer.showEmptyFolders": true
|
||||||
|
},
|
||||||
|
"extensions": [
|
||||||
|
"golang.go",
|
||||||
|
"eamodio.golang-postfix-completion",
|
||||||
|
"quicktype.quicktype",
|
||||||
|
"usernamehw.errorlens"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"remoteUser": "vscode"
|
||||||
|
}
|
||||||
38
.gitignore
vendored
Normal file
38
.gitignore
vendored
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Binaries
|
||||||
|
*.exe
|
||||||
|
*.exe~
|
||||||
|
*.dll
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
|
||||||
|
# Test binary, built with go test -c
|
||||||
|
*.test
|
||||||
|
|
||||||
|
# Output of go build
|
||||||
|
*.out
|
||||||
|
|
||||||
|
# Dependency directory
|
||||||
|
vendor/
|
||||||
|
|
||||||
|
# Go workspace file
|
||||||
|
go.work
|
||||||
|
go.work.sum
|
||||||
|
|
||||||
|
# Environment files
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
|
||||||
|
# Editor / IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# VCS files
|
||||||
|
COMMIT.md
|
||||||
|
RELEASE.md
|
||||||
24
CHANGELOG.md
Normal file
24
CHANGELOG.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to this module will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this module adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.9.0] - 2026-03-18
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- `Config` struct — holds OTel bootstrap configuration with env-tag support: `ServiceName` (`OTEL_SERVICE_NAME`, required), `ServiceVersion` (`OTEL_SERVICE_VERSION`, default `unknown`), `Environment` (`OTEL_ENVIRONMENT`, default `development`), `OTLPEndpoint` (`OTEL_EXPORTER_OTLP_ENDPOINT`, required), `OTLPInsecure` (`OTEL_EXPORTER_OTLP_INSECURE`, default `false`)
|
||||||
|
- `New(ctx context.Context, cfg Config) (func(context.Context) error, error)` — bootstraps the full OpenTelemetry SDK by creating three OTLP gRPC exporters and their corresponding SDK providers: `TracerProvider` (traces → Grafana Alloy → Tempo), `MeterProvider` (metrics → Grafana Alloy → Mimir), and `LoggerProvider` (logs → Grafana Alloy → Loki); sets all three OTel globals (`otel.SetTracerProvider`, `otel.SetMeterProvider`, `global.SetLoggerProvider`) and installs W3C TraceContext and Baggage as the global text map propagator; returns `(shutdown, nil)` on success or `(nil, error)` on failure with sequential rollback of any already-created providers
|
||||||
|
- Shutdown function `func(context.Context) error` — returned by `New`; flushes and shuts down all three providers using `errors.Join`, allowing deferred call in `main` or wiring into a launcher `BeforeStop` hook
|
||||||
|
- OTel resource construction — every signal is tagged with `service.name`, `service.version`, and `deployment.environment` attributes, merged with the OTel SDK default resource (SDK version, process info, etc.) using `resource.Merge`
|
||||||
|
- Sequential error rollback — if the metric exporter fails after the trace exporter is created, the trace provider is shut down before returning the error; if the log exporter fails, both the trace and metric providers are shut down; the process never runs with a partial telemetry state
|
||||||
|
|
||||||
|
### Design Notes
|
||||||
|
|
||||||
|
- This module is Tier 5 (application bootstrap only) and must never be imported by framework libraries; those libraries use only the OTel API, which defaults to no-ops until `New` is called and sets the global providers
|
||||||
|
- All three signals share a single OTLP gRPC endpoint, matching the standard Grafana LGTM stack topology where Grafana Alloy receives all signals and fans them out to Tempo, Mimir, and Loki
|
||||||
|
- The module intentionally does not implement `launcher.Component`; the returned shutdown function is deferred directly in `main`, keeping the dependency graph free of `launcher` and the interface as simple as a single function call
|
||||||
|
|
||||||
|
[0.9.0]: https://code.nochebuena.dev/go/telemetry/releases/tag/v0.9.0
|
||||||
86
CLAUDE.md
Normal file
86
CLAUDE.md
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# telemetry
|
||||||
|
|
||||||
|
Bootstraps the full OpenTelemetry SDK (traces, metrics, logs) with OTLP gRPC exporters targeting Grafana Alloy.
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Sets the three OTel global providers so that all micro-libs using the OTel global API auto-instrument without any code changes. Returns a shutdown function that flushes all exporters on process exit. This module is the single place in an application where the OTel SDK is wired up.
|
||||||
|
|
||||||
|
## Tier & Dependencies
|
||||||
|
|
||||||
|
**Tier 5** (application bootstrap only). Must never be imported by framework libraries (Tier 0–4).
|
||||||
|
|
||||||
|
Depends on:
|
||||||
|
- `go.opentelemetry.io/otel` and sub-packages — API and SDK
|
||||||
|
- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`
|
||||||
|
- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`
|
||||||
|
- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc`
|
||||||
|
- `go.opentelemetry.io/otel/sdk/trace`, `.../metric`, `.../log`
|
||||||
|
|
||||||
|
No micro-lib dependencies. No `launcher` dependency — telemetry has no Component lifecycle.
|
||||||
|
|
||||||
|
## Key Design Decisions
|
||||||
|
|
||||||
|
- **Tier 5 / app-only** (ADR-001): Libraries use only the OTel API (no-op default). This module activates the real SDK. Importing it from a library is a mistake.
|
||||||
|
- **Three-signal OTLP bootstrap** (ADR-002): `New(ctx, cfg)` sets up traces → Tempo, metrics → Mimir, logs → Loki, all over a single OTLP gRPC endpoint. W3C TraceContext + Baggage propagation is set globally.
|
||||||
|
- **Global provider strategy** (ADR-003): Libraries call `otel.Tracer(...)` / `otel.Meter(...)` / `global.Logger(...)`. After `telemetry.New`, those calls route to the real SDK with no library changes required.
|
||||||
|
- **No `launcher.Component`**: Telemetry is not a lifecycle component. The caller defers the returned shutdown function directly in `main`. This keeps the module dependency graph minimal and the interface simple.
|
||||||
|
- **Sequential error rollback**: If any exporter fails to initialize, all previously created providers are shut down before the error is returned. The process never runs with a partial telemetry state.
|
||||||
|
|
||||||
|
## Patterns
|
||||||
|
|
||||||
|
**Standard application usage:**
|
||||||
|
|
||||||
|
```go
|
||||||
|
func main() {
|
||||||
|
ctx := context.Background()
|
||||||
|
shutdown, err := telemetry.New(ctx, telemetry.Config{
|
||||||
|
ServiceName: "order-service",
|
||||||
|
ServiceVersion: "1.4.2",
|
||||||
|
Environment: "production",
|
||||||
|
OTLPEndpoint: "alloy:4317",
|
||||||
|
OTLPInsecure: false,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("telemetry: %v", err)
|
||||||
|
}
|
||||||
|
defer shutdown(ctx)
|
||||||
|
|
||||||
|
// Rest of application wiring...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**With launcher (wire shutdown into lifecycle):**
|
||||||
|
|
||||||
|
```go
|
||||||
|
shutdown, err := telemetry.New(ctx, cfg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
lc.BeforeStop(func() error { return shutdown(ctx) })
|
||||||
|
```
|
||||||
|
|
||||||
|
**Config env vars:**
|
||||||
|
|
||||||
|
| Variable | Required | Default | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `OTEL_SERVICE_NAME` | yes | — | Service name in all signals |
|
||||||
|
| `OTEL_SERVICE_VERSION` | no | `unknown` | Deployed version |
|
||||||
|
| `OTEL_ENVIRONMENT` | no | `development` | Deployment environment |
|
||||||
|
| `OTEL_EXPORTER_OTLP_ENDPOINT` | yes | — | OTLP gRPC collector address (e.g. `alloy:4317`) |
|
||||||
|
| `OTEL_EXPORTER_OTLP_INSECURE` | no | `false` | Disable TLS (set `true` for local dev) |
|
||||||
|
|
||||||
|
## What to Avoid
|
||||||
|
|
||||||
|
- Do not import this module from any non-`main` package. Libraries must use only OTel API packages.
|
||||||
|
- Do not call `telemetry.New` more than once per process. Each call overwrites the global providers.
|
||||||
|
- Do not omit the `defer shutdown(ctx)`. Without it, buffered spans and metrics are lost on exit.
|
||||||
|
- Do not use a zero-value `Config`. Both `ServiceName` and `OTLPEndpoint` are required; `New` will return an error if the OTLP connection cannot be established.
|
||||||
|
- Do not wrap this in a `launcher.Component`. The shutdown function pattern is simpler and avoids adding a `launcher` dependency to this module.
|
||||||
|
|
||||||
|
## Testing Notes
|
||||||
|
|
||||||
|
- The test file (`telemetry_test.go`) uses a `fakeCollector` that opens a TCP listener but speaks no gRPC protocol. This is sufficient to test that `New` succeeds and returns a callable shutdown function — the fake server accepts connections so the gRPC dial does not get connection-refused.
|
||||||
|
- Tests that verify global provider replacement (`TestNew_SetsGlobalTracerProvider`, `TestNew_SetsGlobalMeterProvider`) must call `shutdown` in a `t.Cleanup` to restore state for subsequent tests. The short shutdown timeout (200ms) is intentional — the fake server cannot complete a gRPC flush, so errors from `shutdown(ctx)` are expected and ignored.
|
||||||
|
- `newResource` is tested separately (`TestNewResource_Fields`, `TestNewResource_MergesWithDefault`) as a pure function with no I/O.
|
||||||
|
- Do not test against a real Alloy or Tempo instance in unit tests. Use the fake collector pattern.
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 NOCHEBUENADEV
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
70
README.md
Normal file
70
README.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# telemetry
|
||||||
|
|
||||||
|
OTel SDK bootstrap — wires up TracerProvider, MeterProvider, and LoggerProvider with OTLP gRPC exporters pointed at Grafana Alloy (→ Tempo / Mimir / Loki).
|
||||||
|
|
||||||
|
**Tier 5 — import only from application `main` packages. Never import from framework libraries.**
|
||||||
|
|
||||||
|
## Why Tier 5
|
||||||
|
|
||||||
|
Micro-libs use only the OTel API (`go.opentelemetry.io/otel`) which ships a zero-overhead no-op default. This module activates the real SDK and sets the three OTel globals, so all micro-libs using the global API auto-instrument without knowing about this module.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```
|
||||||
|
require code.nochebuena.dev/go/telemetry v0.1.0
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```go
|
||||||
|
func main() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Bootstrap telemetry before any other component.
|
||||||
|
shutdown, err := telemetry.New(ctx, telemetry.Config{
|
||||||
|
ServiceName: "order-service",
|
||||||
|
OTLPEndpoint: "alloy:4317",
|
||||||
|
OTLPInsecure: true, // dev only
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("telemetry: %v", err)
|
||||||
|
}
|
||||||
|
defer shutdown(ctx)
|
||||||
|
|
||||||
|
// Start the rest of the application…
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Config
|
||||||
|
|
||||||
|
| Field | Env var | Required | Default | Description |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| `ServiceName` | `OTEL_SERVICE_NAME` | ✓ | — | Service name in traces/metrics/logs |
|
||||||
|
| `ServiceVersion` | `OTEL_SERVICE_VERSION` | | `unknown` | Deployed version |
|
||||||
|
| `Environment` | `OTEL_ENVIRONMENT` | | `development` | Deployment environment |
|
||||||
|
| `OTLPEndpoint` | `OTEL_EXPORTER_OTLP_ENDPOINT` | ✓ | — | OTLP gRPC address (e.g. `alloy:4317`) |
|
||||||
|
| `OTLPInsecure` | `OTEL_EXPORTER_OTLP_INSECURE` | | `false` | Disable TLS (dev only) |
|
||||||
|
|
||||||
|
## What `New` sets up
|
||||||
|
|
||||||
|
| Signal | SDK | Exporter | Backend |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Traces | `sdktrace.TracerProvider` | OTLP gRPC | Grafana Alloy → Tempo |
|
||||||
|
| Metrics | `sdkmetric.MeterProvider` | OTLP gRPC | Grafana Alloy → Mimir |
|
||||||
|
| Logs | `sdklog.LoggerProvider` | OTLP gRPC | Grafana Alloy → Loki |
|
||||||
|
|
||||||
|
Also sets `otel.SetTextMapPropagator` with W3C TraceContext + Baggage.
|
||||||
|
|
||||||
|
## Shutdown
|
||||||
|
|
||||||
|
The returned `func(context.Context) error` flushes all pending telemetry and shuts down the three providers. Always call it before process exit.
|
||||||
|
|
||||||
|
```go
|
||||||
|
defer func() {
|
||||||
|
shutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := shutdown(shutCtx); err != nil {
|
||||||
|
log.Printf("telemetry shutdown: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
```
|
||||||
19
doc.go
Normal file
19
doc.go
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
// Package telemetry bootstraps the OpenTelemetry SDK with OTLP gRPC exporters.
|
||||||
|
//
|
||||||
|
// It is a Tier-5 module — imported only by application main packages, never by
|
||||||
|
// framework libraries. Micro-libs use only the OTel API (zero-overhead no-op
|
||||||
|
// default). This module activates the real SDK with OTLP exporters so all
|
||||||
|
// micro-libs using the OTel global API auto-instrument without importing telemetry.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// shutdown, err := telemetry.New(ctx, telemetry.Config{
|
||||||
|
// ServiceName: "order-service",
|
||||||
|
// OTLPEndpoint: "alloy:4317",
|
||||||
|
// OTLPInsecure: true,
|
||||||
|
// })
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatalf("telemetry: %v", err)
|
||||||
|
// }
|
||||||
|
// defer shutdown(ctx)
|
||||||
|
package telemetry
|
||||||
33
docs/adr/ADR-001-tier5-app-only.md
Normal file
33
docs/adr/ADR-001-tier5-app-only.md
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# ADR-001: Tier 5 — Application Bootstrap Only
|
||||||
|
|
||||||
|
**Status:** Accepted
|
||||||
|
**Date:** 2026-03-18
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
OpenTelemetry is structured around a separation between the **API** (stable, zero-cost when no SDK is wired) and the **SDK** (the real implementation, with exporters, batch processors, and gRPC connections). Any package can import the OTel API and call `otel.Tracer(...)`, `otel.Meter(...)`, etc. at zero runtime cost — these calls are no-ops until an SDK TracerProvider is set as the global.
|
||||||
|
|
||||||
|
The question is where in the module tier hierarchy the SDK bootstrap belongs. The options are:
|
||||||
|
|
||||||
|
1. Include telemetry bootstrap in each micro-lib that produces signals (e.g., httpserver starts its own SDK).
|
||||||
|
2. Provide a standalone bootstrap module imported only by application `main` packages.
|
||||||
|
|
||||||
|
Option 1 would cause multiple SDK initializations, competing global registrations, and make it impossible for the application to control the exporter endpoint or sampling strategy. It would also force all micro-libs to carry the heavy OTel SDK as a dependency even when the application does not use telemetry.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
The `telemetry` module is **Tier 5** — the same tier as application bootstrap entry points. It must only be imported by application `main` packages (or equivalent wiring code). It must never be imported by:
|
||||||
|
|
||||||
|
- Framework libraries (Tier 0–3)
|
||||||
|
- Transport modules (Tier 4)
|
||||||
|
- Other Tier 5 modules that are not themselves `main`
|
||||||
|
|
||||||
|
Micro-libs use only the OTel API packages (`go.opentelemetry.io/otel`, `go.opentelemetry.io/otel/metric`, `go.opentelemetry.io/otel/log`) which default to no-op providers. When an application imports `telemetry` and calls `telemetry.New(...)`, the three global providers are replaced with real SDK providers, and all micro-libs that use the global API automatically emit signals without any change to their code.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
- No micro-lib needs to import or configure `telemetry`. The OTel no-op default means libraries compile and run correctly in unit tests without any collector present.
|
||||||
|
- Applications that do not call `telemetry.New(...)` produce no signals. This is correct — telemetry is opt-in at the application level.
|
||||||
|
- The `telemetry` module carries heavy SDK dependencies (OTLP gRPC exporters, batch processors). These do not appear in any library's dependency graph.
|
||||||
|
- Code review must reject any PR that imports `telemetry` from a non-`main` package. This is enforced by convention, not by a build tool currently.
|
||||||
|
- There is no `launcher.Component` wrapper for telemetry. The caller is responsible for deferring the shutdown function, which flushes all exporters before process exit.
|
||||||
40
docs/adr/ADR-002-three-signal-otlp-bootstrap.md
Normal file
40
docs/adr/ADR-002-three-signal-otlp-bootstrap.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# ADR-002: Three-Signal OTLP gRPC Bootstrap
|
||||||
|
|
||||||
|
**Status:** Accepted
|
||||||
|
**Date:** 2026-03-18
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
OpenTelemetry defines three observability signals:
|
||||||
|
- **Traces** — distributed trace spans (latency, call graphs)
|
||||||
|
- **Metrics** — counters, gauges, histograms
|
||||||
|
- **Logs** — structured log records correlated with trace context
|
||||||
|
|
||||||
|
The target observability stack is the Grafana LGTM stack: **Loki** (logs), **Grafana** (dashboards), **Tempo** (traces), **Mimir** (metrics), fronted by **Grafana Alloy** as the OTLP collector/router.
|
||||||
|
|
||||||
|
The question is what to bootstrap and how to transport signals to the collector. Options include:
|
||||||
|
- Bootstrap only traces (the most common starting point), add others later.
|
||||||
|
- Bootstrap all three signals in one call, using a shared OTLP gRPC endpoint.
|
||||||
|
- Use per-signal configuration with separate endpoints.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
`telemetry.New(ctx, cfg)` bootstraps all three signals in a single call using a shared OTLP gRPC endpoint (`cfg.OTLPEndpoint`, e.g. `"alloy:4317"`):
|
||||||
|
|
||||||
|
1. **TracerProvider** — `sdktrace.NewTracerProvider` with an OTLP gRPC batch exporter; W3C TraceContext + Baggage propagation set globally via `otel.SetTextMapPropagator`.
|
||||||
|
2. **MeterProvider** — `sdkmetric.NewMeterProvider` with an OTLP gRPC periodic reader.
|
||||||
|
3. **LoggerProvider** — `sdklog.NewLoggerProvider` with an OTLP gRPC batch processor.
|
||||||
|
|
||||||
|
All three providers share one `*resource.Resource` built from `cfg.ServiceName`, `cfg.ServiceVersion`, and `cfg.Environment` (merged with the OTel default resource which contributes `service.instance.id` and SDK metadata).
|
||||||
|
|
||||||
|
Error handling during bootstrap is sequential and rolls back already-created providers: if metric exporter creation fails, the trace provider is shut down before returning the error; if log exporter creation fails, both trace and metric providers are shut down.
|
||||||
|
|
||||||
|
The returned `shutdown` function joins the shutdown of all three providers with `errors.Join`, so a single `defer shutdown(ctx)` flushes and closes all exporters.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
- One `Config` struct covers all three signals. Per-signal endpoint overrides are not supported in the current design. If per-signal routing is needed, Grafana Alloy handles that at the collector level.
|
||||||
|
- `OTLPInsecure: true` disables TLS on all three signal connections simultaneously. This is the expected setting for local development (Alloy runs on localhost or in the same Docker network).
|
||||||
|
- Failing to initialize any one of the three exporters aborts the entire bootstrap. A partially initialized telemetry state (e.g., traces but no metrics) is considered more dangerous than failing fast.
|
||||||
|
- The W3C TraceContext propagator is set globally. Applications that need custom propagators (e.g., B3) must call `otel.SetTextMapPropagator` after `telemetry.New` to override.
|
||||||
|
- All three providers use batch/periodic export. Synchronous export is not available through this bootstrap path.
|
||||||
43
docs/adr/ADR-003-otel-api-vs-sdk-separation.md
Normal file
43
docs/adr/ADR-003-otel-api-vs-sdk-separation.md
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# ADR-003: OTel API vs SDK Separation — Global Provider Strategy
|
||||||
|
|
||||||
|
**Status:** Accepted
|
||||||
|
**Date:** 2026-03-18
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
OpenTelemetry Go has a two-package model:
|
||||||
|
- **API packages** (`go.opentelemetry.io/otel`, `.../otel/metric`, `.../otel/log`) — stable, backward-compatible interfaces. When called with no SDK registered, all operations are no-ops with zero allocation.
|
||||||
|
- **SDK packages** (`go.opentelemetry.io/otel/sdk/...`) — concrete implementations with exporters, processors, samplers. These have real runtime cost and external dependencies.
|
||||||
|
|
||||||
|
Micro-libs (httpserver, httpmw, logz, etc.) need to emit spans, metrics, or log records. They must not carry SDK dependencies. The question is how to connect API calls in libraries to the real SDK without importing SDK packages from libraries.
|
||||||
|
|
||||||
|
The two main strategies are:
|
||||||
|
1. **Explicit injection** — each library accepts a `TracerProvider`, `MeterProvider`, or `LoggerProvider` as a constructor argument, and the application injects the real SDK provider.
|
||||||
|
2. **Global provider** — libraries call `otel.Tracer(...)` / `otel.Meter(...)` / `global.Logger(...)` which consult the process-wide global provider. The application sets that global once at startup.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Use the **OTel global provider** strategy. Micro-libs obtain tracers, meters, and loggers from the OTel global API. `telemetry.New(...)` sets all three globals:
|
||||||
|
|
||||||
|
```go
|
||||||
|
otel.SetTracerProvider(tp) // traces
|
||||||
|
otel.SetMeterProvider(mp) // metrics
|
||||||
|
global.SetLoggerProvider(lp) // logs (go.opentelemetry.io/otel/log/global)
|
||||||
|
```
|
||||||
|
|
||||||
|
This means:
|
||||||
|
- Libraries have zero SDK dependency. They only import `go.opentelemetry.io/otel` (and sub-packages for metric/log API).
|
||||||
|
- Before `telemetry.New` is called, all OTel calls in libraries are no-ops — correct behavior in unit tests and in applications that don't use telemetry.
|
||||||
|
- After `telemetry.New` is called, all OTel calls in libraries automatically route to the real OTLP exporters with no code change required in the libraries.
|
||||||
|
|
||||||
|
Explicit injection was considered but rejected because:
|
||||||
|
- It forces every library constructor to accept provider arguments even when the application doesn't use telemetry.
|
||||||
|
- It makes the calling code more verbose (every `New(logger, cfg, tracerProvider, meterProvider, ...)`) without clear benefit in a single-process application.
|
||||||
|
- The global approach is the design intent of the OTel Go project for application-level bootstrap.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
- The global providers are process-global mutable state. Tests that call `telemetry.New` will affect other tests running in the same process if tests run in parallel. The test suite uses a fake collector and short shutdown timeouts to mitigate this.
|
||||||
|
- If a library is used in a context where the global provider has not been set (e.g., a library test), all OTel calls are no-ops. This is correct and expected.
|
||||||
|
- Applications that use multiple `telemetry.New` calls (e.g., a misconfigured init) will overwrite the globals. Only one call to `telemetry.New` should occur per process.
|
||||||
|
- The `go.opentelemetry.io/otel/log/global` package is a separate import from `go.opentelemetry.io/otel` because the log signal API was stabilized later. Libraries using the log API must import the `log/global` sub-package for `global.SetLoggerProvider`.
|
||||||
35
go.mod
Normal file
35
go.mod
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
module code.nochebuena.dev/go/telemetry
|
||||||
|
|
||||||
|
go 1.25.0
|
||||||
|
|
||||||
|
require (
|
||||||
|
go.opentelemetry.io/otel v1.42.0
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.18.0
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0
|
||||||
|
go.opentelemetry.io/otel/log v0.18.0
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0
|
||||||
|
go.opentelemetry.io/otel/sdk/log v0.18.0
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
|
github.com/go-logr/logr v1.4.3 // indirect
|
||||||
|
github.com/go-logr/stdr v1.2.2 // indirect
|
||||||
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
|
||||||
|
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 // indirect
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0 // indirect
|
||||||
|
go.opentelemetry.io/otel/trace v1.42.0 // indirect
|
||||||
|
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
|
||||||
|
golang.org/x/net v0.51.0 // indirect
|
||||||
|
golang.org/x/sys v0.41.0 // indirect
|
||||||
|
golang.org/x/text v0.34.0 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||||
|
google.golang.org/grpc v1.79.2 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.11 // indirect
|
||||||
|
)
|
||||||
71
go.sum
Normal file
71
go.sum
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||||
|
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||||
|
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||||
|
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||||
|
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||||
|
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||||
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
|
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||||
|
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||||
|
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
|
||||||
|
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.18.0 h1:deI9UQMoGFgrg5iLPgzueqFPHevDl+28YKfSpPTI6rY=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.18.0/go.mod h1:PFx9NgpNUKXdf7J4Q3agRxMs3Y07QhTCVipKmLsMKnU=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0 h1:MdKucPl/HbzckWWEisiNqMPhRrAOQX8r4jTuGr636gk=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0/go.mod h1:RolT8tWtfHcjajEH5wFIZ4Dgh5jpPdFXYV9pTAk/qjc=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 h1:THuZiwpQZuHPul65w4WcwEnkX2QIuMT+UFoOrygtoJw=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0/go.mod h1:J2pvYM5NGHofZ2/Ru6zw/TNWnEQp5crgyDeSrYpXkAw=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0 h1:zWWrB1U6nqhS/k6zYB74CjRpuiitRtLLi68VcgmOEto=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0/go.mod h1:2qXPNBX1OVRC0IwOnfo1ljoid+RD0QK3443EaqVlsOU=
|
||||||
|
go.opentelemetry.io/otel/log v0.18.0 h1:XgeQIIBjZZrliksMEbcwMZefoOSMI1hdjiLEiiB0bAg=
|
||||||
|
go.opentelemetry.io/otel/log v0.18.0/go.mod h1:KEV1kad0NofR3ycsiDH4Yjcoj0+8206I6Ox2QYFSNgI=
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
|
||||||
|
go.opentelemetry.io/otel/sdk/log v0.18.0 h1:n8OyZr7t7otkeTnPTbDNom6rW16TBYGtvyy2Gk6buQw=
|
||||||
|
go.opentelemetry.io/otel/sdk/log v0.18.0/go.mod h1:C0+wxkTwKpOCZLrlJ3pewPiiQwpzycPI/u6W0Z9fuYk=
|
||||||
|
go.opentelemetry.io/otel/sdk/log/logtest v0.18.0 h1:l3mYuPsuBx6UKE47BVcPrZoZ0q/KER57vbj2qkgDLXA=
|
||||||
|
go.opentelemetry.io/otel/sdk/log/logtest v0.18.0/go.mod h1:7cHtiVJpZebB3wybTa4NG+FUo5NPe3PROz1FqB0+qdw=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
|
||||||
|
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
|
||||||
|
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
|
||||||
|
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||||
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||||
|
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||||
|
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||||
|
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
|
||||||
|
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||||
|
golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
|
||||||
|
golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
|
||||||
|
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||||
|
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||||
|
google.golang.org/grpc v1.79.2 h1:fRMD94s2tITpyJGtBBn7MkMseNpOZU8ZxgC3MMBaXRU=
|
||||||
|
google.golang.org/grpc v1.79.2/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
|
||||||
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
129
telemetry.go
Normal file
129
telemetry.go
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
package telemetry
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
otlploggrpc "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
|
||||||
|
otlpmetricgrpc "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||||
|
otlptracegrpc "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||||
|
"go.opentelemetry.io/otel/log/global"
|
||||||
|
"go.opentelemetry.io/otel/propagation"
|
||||||
|
sdklog "go.opentelemetry.io/otel/sdk/log"
|
||||||
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||||
|
"go.opentelemetry.io/otel/sdk/resource"
|
||||||
|
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||||
|
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config holds OTel bootstrap configuration.
|
||||||
|
type Config struct {
|
||||||
|
// ServiceName identifies the service in traces, metrics, and logs.
|
||||||
|
ServiceName string `env:"OTEL_SERVICE_NAME,required"`
|
||||||
|
// ServiceVersion is the deployed version (e.g. "1.4.2").
|
||||||
|
ServiceVersion string `env:"OTEL_SERVICE_VERSION" envDefault:"unknown"`
|
||||||
|
// Environment is the deployment environment (e.g. "production", "staging").
|
||||||
|
Environment string `env:"OTEL_ENVIRONMENT" envDefault:"development"`
|
||||||
|
// OTLPEndpoint is the OTLP gRPC collector address (e.g. "alloy:4317").
|
||||||
|
OTLPEndpoint string `env:"OTEL_EXPORTER_OTLP_ENDPOINT,required"`
|
||||||
|
// OTLPInsecure disables TLS for the OTLP connection. Set true in development.
|
||||||
|
OTLPInsecure bool `env:"OTEL_EXPORTER_OTLP_INSECURE" envDefault:"false"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// New bootstraps the full OTel SDK:
|
||||||
|
// - TracerProvider → OTLP gRPC → Grafana Alloy → Tempo
|
||||||
|
// - MeterProvider → OTLP gRPC → Grafana Alloy → Mimir
|
||||||
|
// - LoggerProvider → OTLP gRPC → Grafana Alloy → Loki
|
||||||
|
//
|
||||||
|
// Sets the three OTel globals so all micro-libs using the global API
|
||||||
|
// auto-instrument without importing this module.
|
||||||
|
//
|
||||||
|
// The returned shutdown function flushes all exporters and must be called
|
||||||
|
// before process exit (defer it in main or wire it into the launcher).
|
||||||
|
// Returns (shutdown, nil) on success, (nil, err) on failure.
|
||||||
|
func New(ctx context.Context, cfg Config) (func(context.Context) error, error) {
|
||||||
|
res := newResource(cfg)
|
||||||
|
|
||||||
|
// --- TracerProvider (traces → Tempo) ---
|
||||||
|
traceOpts := []otlptracegrpc.Option{
|
||||||
|
otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint),
|
||||||
|
}
|
||||||
|
if cfg.OTLPInsecure {
|
||||||
|
traceOpts = append(traceOpts, otlptracegrpc.WithInsecure())
|
||||||
|
}
|
||||||
|
traceExporter, err := otlptracegrpc.New(ctx, traceOpts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tp := sdktrace.NewTracerProvider(
|
||||||
|
sdktrace.WithBatcher(traceExporter),
|
||||||
|
sdktrace.WithResource(res),
|
||||||
|
)
|
||||||
|
otel.SetTracerProvider(tp)
|
||||||
|
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
||||||
|
propagation.TraceContext{},
|
||||||
|
propagation.Baggage{},
|
||||||
|
))
|
||||||
|
|
||||||
|
// --- MeterProvider (metrics → Mimir) ---
|
||||||
|
metricOpts := []otlpmetricgrpc.Option{
|
||||||
|
otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint),
|
||||||
|
}
|
||||||
|
if cfg.OTLPInsecure {
|
||||||
|
metricOpts = append(metricOpts, otlpmetricgrpc.WithInsecure())
|
||||||
|
}
|
||||||
|
metricExporter, err := otlpmetricgrpc.New(ctx, metricOpts...)
|
||||||
|
if err != nil {
|
||||||
|
_ = tp.Shutdown(ctx)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
mp := sdkmetric.NewMeterProvider(
|
||||||
|
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExporter)),
|
||||||
|
sdkmetric.WithResource(res),
|
||||||
|
)
|
||||||
|
otel.SetMeterProvider(mp)
|
||||||
|
|
||||||
|
// --- LoggerProvider (logs → Loki) ---
|
||||||
|
logOpts := []otlploggrpc.Option{
|
||||||
|
otlploggrpc.WithEndpoint(cfg.OTLPEndpoint),
|
||||||
|
}
|
||||||
|
if cfg.OTLPInsecure {
|
||||||
|
logOpts = append(logOpts, otlploggrpc.WithInsecure())
|
||||||
|
}
|
||||||
|
logExporter, err := otlploggrpc.New(ctx, logOpts...)
|
||||||
|
if err != nil {
|
||||||
|
_ = tp.Shutdown(ctx)
|
||||||
|
_ = mp.Shutdown(ctx)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
lp := sdklog.NewLoggerProvider(
|
||||||
|
sdklog.WithProcessor(sdklog.NewBatchProcessor(logExporter)),
|
||||||
|
sdklog.WithResource(res),
|
||||||
|
)
|
||||||
|
global.SetLoggerProvider(lp)
|
||||||
|
|
||||||
|
shutdown := func(ctx context.Context) error {
|
||||||
|
return errors.Join(
|
||||||
|
tp.Shutdown(ctx),
|
||||||
|
mp.Shutdown(ctx),
|
||||||
|
lp.Shutdown(ctx),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return shutdown, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newResource builds an OTel resource with service identity and environment attributes.
|
||||||
|
func newResource(cfg Config) *resource.Resource {
|
||||||
|
r, _ := resource.Merge(
|
||||||
|
resource.Default(),
|
||||||
|
resource.NewWithAttributes(
|
||||||
|
semconv.SchemaURL,
|
||||||
|
semconv.ServiceName(cfg.ServiceName),
|
||||||
|
semconv.ServiceVersion(cfg.ServiceVersion),
|
||||||
|
attribute.String("deployment.environment", cfg.Environment),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return r
|
||||||
|
}
|
||||||
152
telemetry_test.go
Normal file
152
telemetry_test.go
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
package telemetry
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||||
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||||
|
"go.opentelemetry.io/otel/sdk/resource"
|
||||||
|
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeCollector starts a TCP listener that accepts connections but speaks no protocol.
|
||||||
|
// Returns the address and a cleanup function.
|
||||||
|
func fakeCollector(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("fakeCollector: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ln.Close() })
|
||||||
|
// Accept in background so gRPC dial doesn't get refused.
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
conn, err := ln.Accept()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
conn.Close()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return ln.Addr().String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func cfgWith(endpoint string) Config {
|
||||||
|
return Config{
|
||||||
|
ServiceName: "test-service",
|
||||||
|
ServiceVersion: "0.0.1",
|
||||||
|
Environment: "test",
|
||||||
|
OTLPEndpoint: endpoint,
|
||||||
|
OTLPInsecure: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_ShutdownCallable(t *testing.T) {
|
||||||
|
endpoint := fakeCollector(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
shutdown, err := New(ctx, cfgWith(endpoint))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("New: %v", err)
|
||||||
|
}
|
||||||
|
if shutdown == nil {
|
||||||
|
t.Fatal("shutdown func is nil")
|
||||||
|
}
|
||||||
|
// Use a short timeout: the fake server doesn't speak gRPC so flush will fail,
|
||||||
|
// but we only care that shutdown is callable and returns (not that export succeeds).
|
||||||
|
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_ = shutdown(shutCtx) // export errors with fake server are expected
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_SetsGlobalTracerProvider(t *testing.T) {
|
||||||
|
endpoint := fakeCollector(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
shutdown, err := New(ctx, cfgWith(endpoint))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("New: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_ = shutdown(shutCtx)
|
||||||
|
})
|
||||||
|
|
||||||
|
tp := otel.GetTracerProvider()
|
||||||
|
if _, ok := tp.(*sdktrace.TracerProvider); !ok {
|
||||||
|
t.Errorf("expected *sdktrace.TracerProvider, got %T", tp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_SetsGlobalMeterProvider(t *testing.T) {
|
||||||
|
endpoint := fakeCollector(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
shutdown, err := New(ctx, cfgWith(endpoint))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("New: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_ = shutdown(shutCtx)
|
||||||
|
})
|
||||||
|
|
||||||
|
mp := otel.GetMeterProvider()
|
||||||
|
if _, ok := mp.(*sdkmetric.MeterProvider); !ok {
|
||||||
|
t.Errorf("expected *sdkmetric.MeterProvider, got %T", mp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewResource_Fields(t *testing.T) {
|
||||||
|
cfg := Config{
|
||||||
|
ServiceName: "my-service",
|
||||||
|
ServiceVersion: "2.0.0",
|
||||||
|
Environment: "staging",
|
||||||
|
}
|
||||||
|
res := newResource(cfg)
|
||||||
|
|
||||||
|
check := func(key, want string) {
|
||||||
|
t.Helper()
|
||||||
|
for _, kv := range res.Attributes() {
|
||||||
|
if string(kv.Key) == key {
|
||||||
|
if got := kv.Value.AsString(); got != want {
|
||||||
|
t.Errorf("resource[%s]: want %q, got %q", key, want, got)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Errorf("resource attribute %q not found", key)
|
||||||
|
}
|
||||||
|
|
||||||
|
check(string(semconv.ServiceNameKey), "my-service")
|
||||||
|
check(string(semconv.ServiceVersionKey), "2.0.0")
|
||||||
|
check("deployment.environment", "staging")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewResource_MergesWithDefault(t *testing.T) {
|
||||||
|
cfg := Config{ServiceName: "svc"}
|
||||||
|
res := newResource(cfg)
|
||||||
|
if res == nil {
|
||||||
|
t.Fatal("newResource returned nil")
|
||||||
|
}
|
||||||
|
// Default resource contributes service.instance.id and telemetry.sdk.* fields.
|
||||||
|
if len(res.Attributes()) == 0 {
|
||||||
|
t.Error("resource has no attributes")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewResource_IsNotDefault(t *testing.T) {
|
||||||
|
cfg := Config{ServiceName: "unique-service-name"}
|
||||||
|
res := newResource(cfg)
|
||||||
|
def := resource.Default()
|
||||||
|
// Custom resource must differ from the bare default: it has extra attributes.
|
||||||
|
if len(res.Attributes()) <= len(def.Attributes()) {
|
||||||
|
t.Error("newResource added no attributes beyond the default")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user