feat(telemetry): initial stable release v0.9.0

Single-call OTel SDK bootstrap setting all three global providers (traces → Tempo, metrics → Mimir, logs → Loki) over OTLP gRPC.

What's included:
- New(ctx, Config): bootstraps TracerProvider, MeterProvider, and LoggerProvider with OTLP gRPC exporters; sets OTel globals
- W3C TraceContext + Baggage propagation set globally
- Resource tagging: service.name, service.version, deployment.environment merged with SDK defaults
- OTLPInsecure bool for development environments without TLS
- Sequential rollback on partial initialization failure — no dangling exporters on error
- Returns shutdown func(context.Context) error; caller defers in main or wires into launcher BeforeStop
- Tier 5 module: must be imported only by application main packages; zero micro-lib dependencies

Tested-via: todo-api POC integration
Reviewed-against: docs/adr/
This commit is contained in:
2026-03-18 14:13:29 -06:00
commit ed4e9ef161
14 changed files with 787 additions and 0 deletions

152
telemetry_test.go Normal file
View File

@@ -0,0 +1,152 @@
package telemetry
import (
"context"
"net"
"testing"
"time"
"go.opentelemetry.io/otel"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
)
// fakeCollector starts a TCP listener that accepts connections but speaks no protocol.
// Returns the address and a cleanup function.
func fakeCollector(t *testing.T) string {
t.Helper()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("fakeCollector: %v", err)
}
t.Cleanup(func() { ln.Close() })
// Accept in background so gRPC dial doesn't get refused.
go func() {
for {
conn, err := ln.Accept()
if err != nil {
return
}
conn.Close()
}
}()
return ln.Addr().String()
}
func cfgWith(endpoint string) Config {
return Config{
ServiceName: "test-service",
ServiceVersion: "0.0.1",
Environment: "test",
OTLPEndpoint: endpoint,
OTLPInsecure: true,
}
}
func TestNew_ShutdownCallable(t *testing.T) {
endpoint := fakeCollector(t)
ctx := context.Background()
shutdown, err := New(ctx, cfgWith(endpoint))
if err != nil {
t.Fatalf("New: %v", err)
}
if shutdown == nil {
t.Fatal("shutdown func is nil")
}
// Use a short timeout: the fake server doesn't speak gRPC so flush will fail,
// but we only care that shutdown is callable and returns (not that export succeeds).
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
_ = shutdown(shutCtx) // export errors with fake server are expected
}
func TestNew_SetsGlobalTracerProvider(t *testing.T) {
endpoint := fakeCollector(t)
ctx := context.Background()
shutdown, err := New(ctx, cfgWith(endpoint))
if err != nil {
t.Fatalf("New: %v", err)
}
t.Cleanup(func() {
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
_ = shutdown(shutCtx)
})
tp := otel.GetTracerProvider()
if _, ok := tp.(*sdktrace.TracerProvider); !ok {
t.Errorf("expected *sdktrace.TracerProvider, got %T", tp)
}
}
func TestNew_SetsGlobalMeterProvider(t *testing.T) {
endpoint := fakeCollector(t)
ctx := context.Background()
shutdown, err := New(ctx, cfgWith(endpoint))
if err != nil {
t.Fatalf("New: %v", err)
}
t.Cleanup(func() {
shutCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
_ = shutdown(shutCtx)
})
mp := otel.GetMeterProvider()
if _, ok := mp.(*sdkmetric.MeterProvider); !ok {
t.Errorf("expected *sdkmetric.MeterProvider, got %T", mp)
}
}
func TestNewResource_Fields(t *testing.T) {
cfg := Config{
ServiceName: "my-service",
ServiceVersion: "2.0.0",
Environment: "staging",
}
res := newResource(cfg)
check := func(key, want string) {
t.Helper()
for _, kv := range res.Attributes() {
if string(kv.Key) == key {
if got := kv.Value.AsString(); got != want {
t.Errorf("resource[%s]: want %q, got %q", key, want, got)
}
return
}
}
t.Errorf("resource attribute %q not found", key)
}
check(string(semconv.ServiceNameKey), "my-service")
check(string(semconv.ServiceVersionKey), "2.0.0")
check("deployment.environment", "staging")
}
func TestNewResource_MergesWithDefault(t *testing.T) {
cfg := Config{ServiceName: "svc"}
res := newResource(cfg)
if res == nil {
t.Fatal("newResource returned nil")
}
// Default resource contributes service.instance.id and telemetry.sdk.* fields.
if len(res.Attributes()) == 0 {
t.Error("resource has no attributes")
}
}
func TestNewResource_IsNotDefault(t *testing.T) {
cfg := Config{ServiceName: "unique-service-name"}
res := newResource(cfg)
def := resource.Default()
// Custom resource must differ from the bare default: it has extra attributes.
if len(res.Attributes()) <= len(def.Attributes()) {
t.Error("newResource added no attributes beyond the default")
}
}