feat(telemetry): initial implementation — OpenTelemetry traces, metrics, logs (v1.0.0)

Introduces code.nochebuena.dev/einherjar/telemetry — the observability bootstrap
starter for the Einherjar framework. Absorbs the telemetry package from micro-lib,
migrating from OpenCensus to OpenTelemetry SDK v1.42.

Bootstrap functions (not lifecycle.Component — telemetry must be initialized before
the launcher starts, and its shutdown must run after all components stop):
- New(ctx, cfg) (func(context.Context) error, error) — production mode; exports
  traces, metrics, and logs via OTLP over gRPC to the configured endpoint;
  returns a shutdown function to be deferred in main()
- NewConsole(ctx, logger, cfg) (func(context.Context) error, error) — development
  mode; writes structured telemetry to the provided logging.Logger; no network
  dependency; suitable for local development and CI

Config (EINHERJAR_OTEL_* env vars):
  ServiceName(required), ServiceVersion(unknown), Environment(development),
  OTLPEndpoint(required for New), OTLPInsecure(false)

ConsoleConfig (EINHERJAR_OTEL_* env vars):
  ServiceName(required), ServiceVersion(unknown), Environment(development)

- identifiable.go: package-level Module variable (observability.Identifiable) for version
  identification — telemetry bootstraps before the launcher; not registered as a lifecycle component
This commit is contained in:
2026-05-29 16:09:54 +00:00
commit b583af1973
16 changed files with 2177 additions and 0 deletions

139
telemetry.go Normal file
View File

@@ -0,0 +1,139 @@
package telemetry
import (
"context"
"errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otlploggrpc "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
otlpmetricgrpc "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
otlptracegrpc "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/log/global"
"go.opentelemetry.io/otel/propagation"
sdklog "go.opentelemetry.io/otel/sdk/log"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
"code.nochebuena.dev/einherjar/core/xerrors"
)
// New bootstraps the full OTel SDK:
// - TracerProvider → OTLP gRPC → Grafana Alloy → Tempo
// - MeterProvider → OTLP gRPC → Grafana Alloy → Mimir
// - LoggerProvider → OTLP gRPC → Grafana Alloy → Loki
//
// Sets the three OTel globals so all starters using the global API
// auto-instrument without importing this module.
//
// The returned shutdown function flushes all exporters and must be called
// before process exit (defer it in main or wire it into the launcher).
// Returns (shutdown, nil) on success, (nil, err) on failure.
func New(ctx context.Context, cfg Config) (func(context.Context) error, error) {
res := newResource(cfg)
// --- TracerProvider (traces → Tempo) ---
traceOpts := []otlptracegrpc.Option{
otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint),
}
if cfg.OTLPInsecure {
traceOpts = append(traceOpts, otlptracegrpc.WithInsecure())
}
traceExporter, err := otlptracegrpc.New(ctx, traceOpts...)
if err != nil {
return nil, xerrors.Internal("telemetry: trace exporter init failed").WithError(err)
}
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(traceExporter),
sdktrace.WithResource(res),
)
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
// --- MeterProvider (metrics → Mimir) ---
metricOpts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint),
}
if cfg.OTLPInsecure {
metricOpts = append(metricOpts, otlpmetricgrpc.WithInsecure())
}
metricExporter, err := otlpmetricgrpc.New(ctx, metricOpts...)
if err != nil {
_ = tp.Shutdown(ctx)
return nil, xerrors.Internal("telemetry: metric exporter init failed").WithError(err)
}
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExporter)),
sdkmetric.WithResource(res),
)
otel.SetMeterProvider(mp)
// --- LoggerProvider (logs → Loki) ---
logOpts := []otlploggrpc.Option{
otlploggrpc.WithEndpoint(cfg.OTLPEndpoint),
}
if cfg.OTLPInsecure {
logOpts = append(logOpts, otlploggrpc.WithInsecure())
}
logExporter, err := otlploggrpc.New(ctx, logOpts...)
if err != nil {
_ = tp.Shutdown(ctx)
_ = mp.Shutdown(ctx)
return nil, xerrors.Internal("telemetry: log exporter init failed").WithError(err)
}
lp := sdklog.NewLoggerProvider(
sdklog.WithProcessor(sdklog.NewBatchProcessor(logExporter)),
sdklog.WithResource(res),
)
global.SetLoggerProvider(lp)
shutdown := func(ctx context.Context) error {
var errs []error
if err := tp.Shutdown(ctx); err != nil {
errs = append(errs, &providerErr{"trace", err})
}
if err := mp.Shutdown(ctx); err != nil {
errs = append(errs, &providerErr{"metric", err})
}
if err := lp.Shutdown(ctx); err != nil {
errs = append(errs, &providerErr{"log", err})
}
return errors.Join(errs...)
}
return shutdown, nil
}
// providerErr labels a shutdown error with the provider name.
type providerErr struct {
provider string
err error
}
func (e *providerErr) Error() string {
return "telemetry: " + e.provider + " provider shutdown: " + e.err.Error()
}
func (e *providerErr) Unwrap() error { return e.err }
// newResource builds an OTel resource with service identity and environment attributes.
func newResource(cfg Config) *resource.Resource {
// Empty schema URL avoids merge conflicts with resource.Default()'s internal schema URL.
r, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
"",
semconv.ServiceName(cfg.ServiceName),
semconv.ServiceVersion(cfg.ServiceVersion),
attribute.String("deployment.environment", cfg.Environment),
),
)
if err != nil {
return resource.Default()
}
return r
}