package index import ( "bytes" "fmt" "go/ast" "go/doc" "go/parser" "go/printer" "go/token" "io/fs" "os" "path/filepath" "regexp" "sort" "strings" "time" ) // Build walks the Einherjar repository rooted at repoRoot, indexes every // sibling module (any immediate subdirectory containing a go.mod), and // returns an Index ready to be written to disk. // // The mcp module itself is skipped to avoid self-reference. func Build(repoRoot string) (*Index, error) { idx := &Index{ Schema: SchemaVersion, Framework: "einherjar", BuiltAt: time.Now().UTC(), } entries, err := os.ReadDir(repoRoot) if err != nil { return nil, fmt.Errorf("read repo root: %w", err) } for _, e := range entries { if !e.IsDir() { continue } name := e.Name() if strings.HasPrefix(name, ".") || name == "mcp" || name == "vendor" { continue } modDir := filepath.Join(repoRoot, name) if _, err := os.Stat(filepath.Join(modDir, "go.mod")); err != nil { continue } mod, err := buildModule(modDir, name) if err != nil { return nil, fmt.Errorf("module %s: %w", name, err) } idx.Modules = append(idx.Modules, *mod) } return idx, nil } func buildModule(modDir, name string) (*Module, error) { m := &Module{ Name: name, DependsOn: []string{}, Compliance: Compliance{ InterfaceAsserts: []InterfaceAssert{}, Tests: []ComplianceTest{}, }, } if data, err := os.ReadFile(filepath.Join(modDir, "go.mod")); err == nil { m.ImportPath = parseModulePath(data) m.GoVersion = parseGoVersion(data) m.DependsOn = parseDependsOn(data, name) } if data, err := os.ReadFile(filepath.Join(modDir, "README.md")); err == nil { m.Readme = string(data) m.Purpose = extractPurpose(string(data)) m.Examples = extractExamples(name, string(data)) } if data, err := os.ReadFile(filepath.Join(modDir, "CHANGELOG.md")); err == nil { m.Changelog = string(data) } m.Compliance = parseCompliance(name, modDir) adrDir := filepath.Join(modDir, "docs", "adr") if adrs, err := os.ReadDir(adrDir); err == nil { for _, a := range adrs { if a.IsDir() || !strings.HasPrefix(a.Name(), "ADR-") || !strings.HasSuffix(a.Name(), ".md") { continue } body, err := os.ReadFile(filepath.Join(adrDir, a.Name())) if err != nil { continue } id, title := parseADRHeader(a.Name(), body) m.ADRs = append(m.ADRs, ADR{Module: name, ID: id, Title: title, Body: string(body)}) } } if err := indexPackages(modDir, m); err != nil { return nil, err } return m, nil } func indexPackages(modDir string, m *Module) error { return filepath.WalkDir(modDir, func(path string, d fs.DirEntry, err error) error { if err != nil { return nil } if !d.IsDir() { return nil } base := d.Name() if base != filepath.Base(modDir) && (strings.HasPrefix(base, ".") || base == "vendor" || base == "testdata" || base == "docs") { return filepath.SkipDir } fset := token.NewFileSet() pkgs, err := parser.ParseDir(fset, path, func(fi os.FileInfo) bool { return !strings.HasSuffix(fi.Name(), "_test.go") }, parser.ParseComments) if err != nil || len(pkgs) == 0 { return nil } rel, _ := filepath.Rel(modDir, path) if rel == "." { rel = "" } for pkgName, pkg := range pkgs { if pkgName == "main" { continue } subName := pkgName if rel == "" { subName = "" } docPkg := doc.New(pkg, "./", doc.AllDecls) if rel == "" && m.Doc == "" && docPkg.Doc != "" { m.Doc = strings.TrimSpace(docPkg.Doc) } if rel != "" || docPkg.Doc != "" { m.SubPackages = append(m.SubPackages, SubPackage{ Name: subName, ImportPath: joinImport(m.ImportPath, rel), Doc: strings.TrimSpace(docPkg.Doc), }) } collectSymbols(m, subName, modDir, fset, docPkg) } return nil }) } func collectSymbols(m *Module, sub, modDir string, fset *token.FileSet, p *doc.Package) { for _, t := range p.Types { kind := "type" if isInterface(t.Decl) { kind = "interface" } m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, kind, t.Name, t.Doc, t.Decl, fset, modDir)) for _, f := range t.Funcs { m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, "func", f.Name, f.Doc, f.Decl, fset, modDir)) } for _, f := range t.Methods { m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, "method", t.Name+"."+f.Name, f.Doc, f.Decl, fset, modDir)) } } for _, f := range p.Funcs { m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, "func", f.Name, f.Doc, f.Decl, fset, modDir)) } for _, v := range p.Consts { for _, name := range v.Names { m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, "const", name, v.Doc, v.Decl, fset, modDir)) } } for _, v := range p.Vars { for _, name := range v.Names { m.Symbols = append(m.Symbols, newSymbol(m.Name, sub, "var", name, v.Doc, v.Decl, fset, modDir)) } } } func newSymbol(mod, sub, kind, name, docStr string, decl ast.Node, fset *token.FileSet, modDir string) Symbol { pos := fset.Position(decl.Pos()) rel, _ := filepath.Rel(modDir, pos.Filename) return Symbol{ Module: mod, SubPackage: sub, Kind: kind, Name: name, Signature: formatNode(fset, decl), Doc: strings.TrimSpace(docStr), File: rel, Line: pos.Line, } } func formatNode(fset *token.FileSet, node ast.Node) string { var buf bytes.Buffer cfg := printer.Config{Mode: printer.UseSpaces, Tabwidth: 4} if err := cfg.Fprint(&buf, fset, node); err != nil { return "" } s := buf.String() if i := strings.Index(s, "{"); i > 0 && (strings.HasPrefix(s, "func") || strings.HasPrefix(s, "type")) { return strings.TrimSpace(s[:i]) } return strings.TrimSpace(s) } func isInterface(decl *ast.GenDecl) bool { if decl == nil { return false } for _, spec := range decl.Specs { ts, ok := spec.(*ast.TypeSpec) if !ok { continue } if _, ok := ts.Type.(*ast.InterfaceType); ok { return true } } return false } var ( modulePathRe = regexp.MustCompile(`(?m)^module\s+(\S+)`) goVersionRe = regexp.MustCompile(`(?m)^go\s+(\S+)`) adrNameRe = regexp.MustCompile(`^(ADR-\d+)-(.+)\.md$`) h1Re = regexp.MustCompile(`(?m)^#\s+(.+)$`) fenceRe = regexp.MustCompile("(?s)```([a-zA-Z0-9_+\\-]*)\\n(.*?)```") einherjarDepRe = regexp.MustCompile(`code\.nochebuena\.dev/einherjar/([a-zA-Z0-9_-]+)`) ) func parseModulePath(data []byte) string { if m := modulePathRe.FindSubmatch(data); m != nil { return string(m[1]) } return "" } func parseGoVersion(data []byte) string { if m := goVersionRe.FindSubmatch(data); m != nil { return string(m[1]) } return "" } // parseDependsOn extracts the set of einherjar modules referenced by go.mod's // require/replace lines. The module's own name is filtered out so a module // never lists itself as a dependency. func parseDependsOn(data []byte, self string) []string { seen := map[string]bool{} for _, m := range einherjarDepRe.FindAllSubmatch(data, -1) { name := string(m[1]) if name == self { continue } seen[name] = true } out := make([]string, 0, len(seen)) for k := range seen { out = append(out, k) } sort.Strings(out) return out } // parseCompliance parses compliance_test.go (when present) and returns its // interface assertions and test functions. Missing or unparseable files yield // an empty Compliance, not an error — the file is optional. func parseCompliance(modName, modDir string) Compliance { c := Compliance{ InterfaceAsserts: []InterfaceAssert{}, Tests: []ComplianceTest{}, } path := filepath.Join(modDir, "compliance_test.go") data, err := os.ReadFile(path) if err != nil { return c } fset := token.NewFileSet() file, err := parser.ParseFile(fset, path, data, parser.ParseComments) if err != nil { return c } rel, _ := filepath.Rel(modDir, path) for _, decl := range file.Decls { switch d := decl.(type) { case *ast.GenDecl: if d.Tok != token.VAR { continue } for _, spec := range d.Specs { vs, ok := spec.(*ast.ValueSpec) if !ok { continue } if len(vs.Names) != 1 || vs.Names[0].Name != "_" { continue } if vs.Type == nil || len(vs.Values) == 0 { continue } c.InterfaceAsserts = append(c.InterfaceAsserts, InterfaceAssert{ Module: modName, Interface: formatNode(fset, vs.Type), Impl: formatNode(fset, vs.Values[0]), File: rel, Line: fset.Position(vs.Pos()).Line, }) } case *ast.FuncDecl: if d.Recv != nil { continue } if !strings.HasPrefix(d.Name.Name, "Test") { continue } testDoc := "" if d.Doc != nil { testDoc = strings.TrimSpace(d.Doc.Text()) } c.Tests = append(c.Tests, ComplianceTest{ Module: modName, Name: d.Name.Name, Doc: testDoc, File: rel, Line: fset.Position(d.Pos()).Line, }) } } return c } func parseADRHeader(filename string, body []byte) (id, title string) { if m := adrNameRe.FindStringSubmatch(filename); m != nil { id = m[1] title = strings.ReplaceAll(m[2], "-", " ") } if m := h1Re.FindSubmatch(body); m != nil { title = strings.TrimSpace(string(m[1])) } return id, title } // extractPurpose returns the first non-empty, non-heading, non-badge paragraph // from the README — typically the blockquote tagline or opening sentence. func extractPurpose(readme string) string { for _, line := range strings.Split(readme, "\n") { t := strings.TrimSpace(line) if t == "" || strings.HasPrefix(t, "#") || strings.HasPrefix(t, "[!") || strings.HasPrefix(t, "[![") { continue } t = strings.TrimPrefix(t, "> ") t = strings.TrimPrefix(t, ">") if t == "" { continue } return t } return "" } // extractExamples lifts fenced code blocks from a README, attaching them to // the most recent H2/H3 heading as the example title and the best-guess // sub-package (the heading lowercased, matched against known sub-packages // later — or left blank). func extractExamples(module, readme string) []Example { var out []Example lines := strings.Split(readme, "\n") currentHeading := "" for _, l := range lines { t := strings.TrimSpace(l) if strings.HasPrefix(t, "## ") || strings.HasPrefix(t, "### ") { currentHeading = strings.TrimSpace(strings.TrimLeft(t, "# ")) } } _ = currentHeading // headings are walked again below to correlate blocks matches := fenceRe.FindAllStringSubmatchIndex(readme, -1) for _, m := range matches { lang := readme[m[2]:m[3]] code := readme[m[4]:m[5]] title := nearestHeading(readme, m[0]) out = append(out, Example{ Module: module, Title: title, Code: strings.TrimSpace(code), Language: lang, }) } return out } func nearestHeading(readme string, before int) string { prefix := readme[:before] lines := strings.Split(prefix, "\n") for i := len(lines) - 1; i >= 0; i-- { t := strings.TrimSpace(lines[i]) if strings.HasPrefix(t, "## ") || strings.HasPrefix(t, "### ") { return strings.TrimSpace(strings.TrimLeft(t, "# ")) } } return "" } func joinImport(base, rel string) string { if base == "" { return "" } if rel == "" || rel == "." { return base } return base + "/" + filepath.ToSlash(rel) }