Extend telemetry reporting and ingestion

Add extended telemetry functions and server-side support: misc/api.func gains helpers (categorize_error, install timer, detect_gpu) and new reporters for tools, addons, and an extended post_update_to_api with duration, GPU and error_category. misc/data/service.go updated to accept and validate new fields (type: tool/addon, tool_name, parent_ct, gpu_vendor, gpu_passthrough, install_duration, error_category), expand allowed enums, include new fields in UpsertTelemetry and mapping, and add input sanitization. Also add telemetry-ingest.exe binary. These changes enable richer telemetry (tool/addon events, GPU info, durations and categorized errors) and server ingestion/validation for them.
This commit is contained in:
CanbiZ (MickLesk)
2026-02-10 08:14:45 +01:00
parent 6f747ed36d
commit 887a899f24
3 changed files with 460 additions and 27 deletions

View File

@@ -26,6 +26,15 @@ type DashboardData struct {
FailedApps []AppFailure `json:"failed_apps"`
RecentRecords []TelemetryRecord `json:"recent_records"`
DailyStats []DailyStat `json:"daily_stats"`
// Extended metrics
GPUStats []GPUCount `json:"gpu_stats"`
ErrorCategories []ErrorCatCount `json:"error_categories"`
TopTools []ToolCount `json:"top_tools"`
TopAddons []AddonCount `json:"top_addons"`
AvgInstallDuration float64 `json:"avg_install_duration"` // seconds
TotalTools int `json:"total_tools"`
TotalAddons int `json:"total_addons"`
}
type AppCount struct {
@@ -72,6 +81,29 @@ type DailyStat struct {
Failed int `json:"failed"`
}
// Extended metric types
type GPUCount struct {
Vendor string `json:"vendor"`
Passthrough string `json:"passthrough"`
Count int `json:"count"`
}
type ErrorCatCount struct {
Category string `json:"category"`
Count int `json:"count"`
}
type ToolCount struct {
Tool string `json:"tool"`
Count int `json:"count"`
}
type AddonCount struct {
Addon string `json:"addon"`
ParentCT string `json:"parent_ct"`
Count int `json:"count"`
}
// FetchDashboardData retrieves aggregated data from PocketBase
func (p *PBClient) FetchDashboardData(ctx context.Context, days int) (*DashboardData, error) {
if err := p.ensureAuth(ctx); err != nil {
@@ -101,6 +133,13 @@ func (p *PBClient) FetchDashboardData(ctx context.Context, days int) (*Dashboard
dailySuccess := make(map[string]int)
dailyFailed := make(map[string]int)
// Extended metrics maps
gpuCounts := make(map[string]int) // "vendor|passthrough" -> count
errorCatCounts := make(map[string]int) // category -> count
toolCounts := make(map[string]int) // tool_name -> count
addonCounts := make(map[string]int) // addon_name -> count
var totalDuration, durationCount int
for _, r := range records {
data.TotalInstalls++

View File

@@ -62,7 +62,7 @@ type Config struct {
type TelemetryIn struct {
// Required
RandomID string `json:"random_id"` // Session UUID
Type string `json:"type"` // "lxc" or "vm"
Type string `json:"type"` // "lxc", "vm", "tool", "addon"
NSAPP string `json:"nsapp"` // Application name (e.g., "jellyfin")
Status string `json:"status"` // "installing", "success", "failed", "unknown"
@@ -81,6 +81,24 @@ type TelemetryIn struct {
Method string `json:"method,omitempty"` // "default", "advanced"
Error string `json:"error,omitempty"` // Error description (max 120 chars)
ExitCode int `json:"exit_code,omitempty"` // 0-255
// === NEW FIELDS ===
// Tool telemetry (type="tool")
ToolName string `json:"tool_name,omitempty"` // "microcode", "lxc-update", "post-pve-install", etc.
// Addon telemetry (type="addon")
ParentCT string `json:"parent_ct,omitempty"` // Parent container name (e.g., "jellyfin")
// GPU Passthrough stats
GPUVendor string `json:"gpu_vendor,omitempty"` // "intel", "amd", "nvidia"
GPUPassthrough string `json:"gpu_passthrough,omitempty"` // "igpu", "dgpu", "vgpu", "none"
// Performance metrics
InstallDuration int `json:"install_duration,omitempty"` // Seconds
// Error categorization
ErrorCategory string `json:"error_category,omitempty"` // "network", "storage", "dependency", "permission", "timeout", "unknown"
}
// TelemetryOut is sent to PocketBase (matches _dev_telemetry_data collection)
@@ -99,13 +117,25 @@ type TelemetryOut struct {
Method string `json:"method,omitempty"`
Error string `json:"error,omitempty"`
ExitCode int `json:"exit_code,omitempty"`
// Extended fields
ToolName string `json:"tool_name,omitempty"`
ParentCT string `json:"parent_ct,omitempty"`
GPUVendor string `json:"gpu_vendor,omitempty"`
GPUPassthrough string `json:"gpu_passthrough,omitempty"`
InstallDuration int `json:"install_duration,omitempty"`
ErrorCategory string `json:"error_category,omitempty"`
}
// TelemetryStatusUpdate contains only fields needed for status updates
type TelemetryStatusUpdate struct {
Status string `json:"status"`
Error string `json:"error,omitempty"`
ExitCode int `json:"exit_code"`
Status string `json:"status"`
Error string `json:"error,omitempty"`
ExitCode int `json:"exit_code"`
InstallDuration int `json:"install_duration,omitempty"`
ErrorCategory string `json:"error_category,omitempty"`
GPUVendor string `json:"gpu_vendor,omitempty"`
GPUPassthrough string `json:"gpu_passthrough,omitempty"`
}
type PBClient struct {
@@ -332,11 +362,15 @@ func (p *PBClient) UpsertTelemetry(ctx context.Context, payload TelemetryOut) er
return p.CreateTelemetry(ctx, payload)
}
// Update only status, error, and exit_code
// Update only status, error, exit_code, and new metrics fields
update := TelemetryStatusUpdate{
Status: payload.Status,
Error: payload.Error,
ExitCode: payload.ExitCode,
Status: payload.Status,
Error: payload.Error,
ExitCode: payload.ExitCode,
InstallDuration: payload.InstallDuration,
ErrorCategory: payload.ErrorCategory,
GPUVendor: payload.GPUVendor,
GPUPassthrough: payload.GPUPassthrough,
}
return p.UpdateTelemetryStatus(ctx, recordID, update)
}
@@ -491,7 +525,7 @@ func getClientIP(r *http.Request, pt *ProxyTrust) net.IP {
var (
// Allowed values for 'type' field
allowedType = map[string]bool{"lxc": true, "vm": true}
allowedType = map[string]bool{"lxc": true, "vm": true, "tool": true, "addon": true}
// Allowed values for 'status' field
allowedStatus = map[string]bool{"installing": true, "success": true, "failed": true, "unknown": true}
@@ -502,6 +536,18 @@ var (
"fedora": true, "rocky": true, "alma": true, "centos": true,
"opensuse": true, "gentoo": true, "openeuler": true,
}
// Allowed values for 'gpu_vendor' field
allowedGPUVendor = map[string]bool{"intel": true, "amd": true, "nvidia": true, "": true}
// Allowed values for 'gpu_passthrough' field
allowedGPUPassthrough = map[string]bool{"igpu": true, "dgpu": true, "vgpu": true, "none": true, "": true}
// Allowed values for 'error_category' field
allowedErrorCategory = map[string]bool{
"network": true, "storage": true, "dependency": true, "permission": true,
"timeout": true, "config": true, "resource": true, "unknown": true, "": true,
}
)
func sanitizeShort(s string, max int) string {
@@ -529,6 +575,13 @@ func validate(in *TelemetryIn) error {
in.PveVer = sanitizeShort(in.PveVer, 32)
in.Method = sanitizeShort(in.Method, 32)
// Sanitize new fields
in.ToolName = sanitizeShort(in.ToolName, 64)
in.ParentCT = sanitizeShort(in.ParentCT, 64)
in.GPUVendor = strings.ToLower(sanitizeShort(in.GPUVendor, 16))
in.GPUPassthrough = strings.ToLower(sanitizeShort(in.GPUPassthrough, 16))
in.ErrorCategory = strings.ToLower(sanitizeShort(in.ErrorCategory, 32))
// IMPORTANT: "error" must be short and not contain identifiers/logs
in.Error = sanitizeShort(in.Error, 120)
@@ -537,20 +590,36 @@ func validate(in *TelemetryIn) error {
return errors.New("missing required fields: random_id, type, nsapp, status")
}
// Normalize common typos for backwards compatibility
if in.Status == "sucess" {
in.Status = "success"
}
// Validate enums
if !allowedType[in.Type] {
return errors.New("invalid type (must be 'lxc' or 'vm')")
return errors.New("invalid type (must be 'lxc', 'vm', 'tool', or 'addon')")
}
if !allowedStatus[in.Status] {
return errors.New("invalid status")
}
// Validate new enum fields
if !allowedGPUVendor[in.GPUVendor] {
return errors.New("invalid gpu_vendor (must be 'intel', 'amd', 'nvidia', or empty)")
}
if !allowedGPUPassthrough[in.GPUPassthrough] {
return errors.New("invalid gpu_passthrough (must be 'igpu', 'dgpu', 'vgpu', 'none', or empty)")
}
if !allowedErrorCategory[in.ErrorCategory] {
return errors.New("invalid error_category")
}
// For status updates (not installing), skip numeric field validation
// These are only required for initial creation
isUpdate := in.Status != "installing"
// os_type is optional but if provided must be valid
if in.OsType != "" && !allowedOsType[in.OsType] {
// os_type is optional but if provided must be valid (only for lxc/vm)
if (in.Type == "lxc" || in.Type == "vm") && in.OsType != "" && !allowedOsType[in.OsType] {
return errors.New("invalid os_type")
}
@@ -558,7 +627,7 @@ func validate(in *TelemetryIn) error {
// Values like "default", "advanced", "mydefaults-global", "mydefaults-app" are all valid
// Validate numeric ranges (only strict for new records)
if !isUpdate {
if !isUpdate && (in.Type == "lxc" || in.Type == "vm") {
if in.CTType < 0 || in.CTType > 2 {
return errors.New("invalid ct_type (must be 0, 1, or 2)")
}
@@ -575,6 +644,9 @@ func validate(in *TelemetryIn) error {
if in.ExitCode < 0 || in.ExitCode > 255 {
return errors.New("invalid exit_code")
}
if in.InstallDuration < 0 || in.InstallDuration > 86400 {
return errors.New("invalid install_duration (max 24h)")
}
return nil
}
@@ -897,20 +969,26 @@ func main() {
// Map input to PocketBase schema
out := TelemetryOut{
RandomID: in.RandomID,
Type: in.Type,
NSAPP: in.NSAPP,
Status: in.Status,
CTType: in.CTType,
DiskSize: in.DiskSize,
CoreCount: in.CoreCount,
RAMSize: in.RAMSize,
OsType: in.OsType,
OsVersion: in.OsVersion,
PveVer: in.PveVer,
Method: in.Method,
Error: in.Error,
ExitCode: in.ExitCode,
RandomID: in.RandomID,
Type: in.Type,
NSAPP: in.NSAPP,
Status: in.Status,
CTType: in.CTType,
DiskSize: in.DiskSize,
CoreCount: in.CoreCount,
RAMSize: in.RAMSize,
OsType: in.OsType,
OsVersion: in.OsVersion,
PveVer: in.PveVer,
Method: in.Method,
Error: in.Error,
ExitCode: in.ExitCode,
ToolName: in.ToolName,
ParentCT: in.ParentCT,
GPUVendor: in.GPUVendor,
GPUPassthrough: in.GPUPassthrough,
InstallDuration: in.InstallDuration,
ErrorCategory: in.ErrorCategory,
}
_ = computeHash(out) // For future deduplication