feat(telemetry): instrument inference duration and frame age
This commit is contained in:
69
pkg/metrics/metrics.go
Normal file
69
pkg/metrics/metrics.go
Normal file
@ -0,0 +1,69 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
stdout "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/global"
|
||||
"go.opentelemetry.io/otel/metric/unit"
|
||||
controller "go.opentelemetry.io/otel/sdk/metric/controller/basic"
|
||||
processor "go.opentelemetry.io/otel/sdk/metric/processor/basic"
|
||||
"go.opentelemetry.io/otel/sdk/metric/selector/simple"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
var (
|
||||
FrameAge metric.Int64Histogram
|
||||
InferenceDuration metric.Int64Histogram
|
||||
)
|
||||
|
||||
func initMeter(ctx context.Context) func() {
|
||||
zap.S().Info("init telemetry")
|
||||
exporter, err := stdout.New(
|
||||
stdout.WithPrettyPrint(),
|
||||
)
|
||||
if err != nil {
|
||||
zap.S().Panicf("failed to initialize prometheus exporter %v", err)
|
||||
}
|
||||
|
||||
pusher := controller.New(
|
||||
processor.NewFactory(
|
||||
simple.NewWithInexpensiveDistribution(),
|
||||
//simple.NewWithHistogramDistribution(
|
||||
// histogram.WithExplicitBoundaries(
|
||||
// []float64{.005, .5, 1, 2.5, 5, 10, 20, 50, 100},
|
||||
// ),
|
||||
//),
|
||||
exporter,
|
||||
),
|
||||
controller.WithExporter(exporter),
|
||||
)
|
||||
|
||||
if err = pusher.Start(ctx); err != nil {
|
||||
zap.S().Fatalf("starting push controller: %v", err)
|
||||
}
|
||||
global.SetMeterProvider(pusher)
|
||||
return func() {
|
||||
if err := pusher.Stop(ctx); err != nil {
|
||||
zap.S().Fatalf("stopping push controller: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func Init(ctx context.Context) func() {
|
||||
cleaner := initMeter(ctx)
|
||||
|
||||
meter := global.Meter("robocar/rc-steering")
|
||||
|
||||
FrameAge = metric.Must(meter).NewInt64Histogram(
|
||||
"robocar.frame_age",
|
||||
metric.WithUnit(unit.Milliseconds),
|
||||
metric.WithDescription("time before frame processing"))
|
||||
InferenceDuration = metric.Must(meter).NewInt64Histogram(
|
||||
"robocar.inference_duration",
|
||||
metric.WithUnit(unit.Milliseconds),
|
||||
metric.WithDescription("tensorflow inference duration"))
|
||||
|
||||
return cleaner
|
||||
}
|
@ -2,9 +2,11 @@ package steering
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/cyrilix/robocar-base/service"
|
||||
"github.com/cyrilix/robocar-protobuf/go/events"
|
||||
"github.com/cyrilix/robocar-steering-tflite-edgetpu/pkg/metrics"
|
||||
"github.com/disintegration/imaging"
|
||||
mqtt "github.com/eclipse/paho.mqtt.golang"
|
||||
"github.com/golang/protobuf/proto"
|
||||
@ -14,6 +16,7 @@ import (
|
||||
"image"
|
||||
_ "image/jpeg"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
func NewPart(client mqtt.Client, modelPath, steeringTopic, cameraTopic string, edgeVerbosity int, imgWidth, imgHeight, horizon int) *Part {
|
||||
@ -130,6 +133,11 @@ func (p *Part) onFrame(_ mqtt.Client, message mqtt.Message) {
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
frameAge := now - msg.Id.CreatedAt.AsTime().UnixMilli()
|
||||
go metrics.FrameAge.Record(context.Background(), frameAge)
|
||||
|
||||
|
||||
img, _, err := image.Decode(bytes.NewReader(msg.GetFrame()))
|
||||
if err != nil {
|
||||
zap.L().Error("unable to decode frame, skip frame", zap.Error(err))
|
||||
@ -137,6 +145,9 @@ func (p *Part) onFrame(_ mqtt.Client, message mqtt.Message) {
|
||||
}
|
||||
|
||||
steering, confidence, err := p.Value(img)
|
||||
inferenceDuration := time.Now().UnixMilli() - now
|
||||
go metrics.InferenceDuration.Record(context.Background(), inferenceDuration)
|
||||
|
||||
if err != nil {
|
||||
zap.S().Errorw("unable to compute sterring",
|
||||
"frame", msg.GetId().GetId(),
|
||||
|
Reference in New Issue
Block a user