feat(train): add new command to interact with aws and train models
This commit is contained in:
@ -1,20 +1,24 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/cyrilix/robocar-base/cli"
|
||||
"github.com/cyrilix/robocar-tools/data"
|
||||
"github.com/cyrilix/robocar-tools/dkimpt"
|
||||
"github.com/cyrilix/robocar-tools/part"
|
||||
"github.com/cyrilix/robocar-tools/pkg/data"
|
||||
"github.com/cyrilix/robocar-tools/pkg/train"
|
||||
"github.com/cyrilix/robocar-tools/record"
|
||||
"github.com/cyrilix/robocar-tools/video"
|
||||
mqtt "github.com/eclipse/paho.mqtt.golang"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"go.uber.org/zap"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultClientId = "robocar-tools"
|
||||
DefaultClientId = "robocar-tools"
|
||||
DefaultTrainSliceSize = 0
|
||||
)
|
||||
|
||||
@ -27,6 +31,8 @@ func main() {
|
||||
var recordsPath string
|
||||
var trainArchiveName string
|
||||
var trainSliceSize int
|
||||
var bucket, ociImage string
|
||||
var debug bool
|
||||
|
||||
mqttQos := cli.InitIntFlag("MQTT_QOS", 0)
|
||||
_, mqttRetain := os.LookupEnv("MQTT_RETAIN")
|
||||
@ -35,9 +41,20 @@ func main() {
|
||||
fmt.Printf("Usage of %s:\n", os.Args[0])
|
||||
fmt.Printf(" display\n \tDisplay events on live frames\n")
|
||||
fmt.Printf(" record \n \tRecord event for tensorflow training\n")
|
||||
fmt.Printf(" train-archive \n \tGenerate zip archive for training \n")
|
||||
fmt.Printf(" training \n \tManage training\n")
|
||||
fmt.Printf(" import-donkey-records \n \tCopy donkeycar records to new format\n")
|
||||
}
|
||||
|
||||
err := cli.SetIntDefaultValueFromEnv(&trainSliceSize, "RC_TRAIN_SLICE_SIZE", DefaultTrainSliceSize)
|
||||
if err != nil {
|
||||
log.Printf("unable to init TRAIN_SLICE_SIZE: %v", err)
|
||||
}
|
||||
cli.SetDefaultValueFromEnv(&ociImage, "TRAIN_OCI_IMAGE", "")
|
||||
cli.SetDefaultValueFromEnv(&bucket, "TRAIN_BUCKET", "")
|
||||
|
||||
|
||||
flag.BoolVar(&debug, "debug", false, "Display debug logs")
|
||||
|
||||
displayFlags := flag.NewFlagSet("display", flag.ExitOnError)
|
||||
cli.InitMqttFlagSet(displayFlags, DefaultClientId, &mqttBroker, &username, &password, &clientId, &mqttQos, &mqttRetain)
|
||||
displayFlags.StringVar(&frameTopic, "mqtt-topic-frame", os.Getenv("MQTT_TOPIC_FRAME"), "Mqtt topic that contains frame to display, use MQTT_TOPIC_FRAME if args not set")
|
||||
@ -55,17 +72,57 @@ func main() {
|
||||
recordFlags.StringVar(&recordTopic, "mqtt-topic-records", os.Getenv("MQTT_TOPIC_RECORDS"), "Mqtt topic that contains record data for training, use MQTT_TOPIC_RECORDS if args not set")
|
||||
recordFlags.StringVar(&recordsPath, "record-path", os.Getenv("RECORD_PATH"), "Path where to write records files, use RECORD_PATH if args not set")
|
||||
|
||||
trainArchiveFlags := flag.NewFlagSet("train-archive", flag.ExitOnError)
|
||||
err := cli.SetIntDefaultValueFromEnv(&trainSliceSize, "TRAIN_SLICE_SIZE", DefaultTrainSliceSize)
|
||||
if err != nil {
|
||||
log.Printf("unable to parse horizon value arg: %v", err)
|
||||
|
||||
var basedir, destdir string
|
||||
impdkFlags := flag.NewFlagSet("import-donkey-records", flag.ExitOnError)
|
||||
impdkFlags.StringVar(&basedir, "from", "", "source directory")
|
||||
impdkFlags.StringVar(&destdir, "to", "", "destination directory")
|
||||
|
||||
|
||||
trainingFlags := flag.NewFlagSet("training", flag.ExitOnError)
|
||||
trainingFlags.Usage = func(){
|
||||
fmt.Printf("Usage of %s %s:\n", os.Args[0], trainingFlags.Name())
|
||||
fmt.Printf(" list\n \tList existing training jobs\n")
|
||||
fmt.Printf(" archive\n \tBuild tar.gz archive for training\n")
|
||||
fmt.Printf(" run\n \tRun training job\n")
|
||||
}
|
||||
|
||||
var modelPath, roleArn, trainJobName string
|
||||
trainingRunFlags := flag.NewFlagSet("run", flag.ExitOnError)
|
||||
trainingRunFlags.StringVar(&bucket, "bucket", os.Getenv("RC_TRAIN_BUCKET"), "AWS bucket where store data required, use RC_TRAIN_BUCKET if arg not set")
|
||||
trainingRunFlags.StringVar(&recordsPath, "record-path", os.Getenv("RECORD_PATH"), "Input data path where records and img files are stored, use RECORD_PATH if arg not set")
|
||||
trainingRunFlags.StringVar(&modelPath, "output-model-path", "", "Path where to write output model archive")
|
||||
trainingRunFlags.IntVar(&trainSliceSize, "slice-size", trainSliceSize, "Number of record to shift with image, use RC_TRAIN_SLICE_SIZE if args not set")
|
||||
trainingRunFlags.StringVar(&ociImage, "oci-image", os.Getenv("RC_TRAIN_OCI_IMAGE"), "OCI image to run (required), use RC_TRAIN_OCI_IMAGE if args not set")
|
||||
trainingRunFlags.StringVar(&roleArn, "role-arn", os.Getenv("RC_TRAIN_ROLE"), "AWS ARN role to use to run training (required), use RC_TRAIN_ROLE if arg not set")
|
||||
trainingRunFlags.StringVar(&trainJobName, "job-name", "", "Training job name (required)")
|
||||
|
||||
trainingListJobFlags := flag.NewFlagSet("list", flag.ExitOnError)
|
||||
|
||||
trainArchiveFlags := flag.NewFlagSet("archive", flag.ExitOnError)
|
||||
trainArchiveFlags.StringVar(&recordsPath, "record-path", os.Getenv("RECORD_PATH"), "Path where records files are stored, use RECORD_PATH if args not set")
|
||||
trainArchiveFlags.StringVar(&trainArchiveName, "output", os.Getenv("TRAIN_ARCHIVE_NAME"), "Zip archive file name, use TRAIN_ARCHIVE_NAME if args not set")
|
||||
trainArchiveFlags.IntVar(&trainSliceSize, "slice-size", trainSliceSize, "Number of record to shift with image, use TRAIN_SLICE_SIZE if args not set")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
config := zap.NewDevelopmentConfig()
|
||||
if debug {
|
||||
config.Level = zap.NewAtomicLevelAt(zap.DebugLevel)
|
||||
} else {
|
||||
config.Level = zap.NewAtomicLevelAt(zap.InfoLevel)
|
||||
}
|
||||
lgr, err := config.Build()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to init logger: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := lgr.Sync(); err != nil {
|
||||
log.Printf("unable to Sync logger: %v\n", err)
|
||||
}
|
||||
}()
|
||||
zap.ReplaceGlobals(lgr)
|
||||
|
||||
// Switch on the subcommand
|
||||
// Parse the flags for appropriate FlagSet
|
||||
// FlagSet.Parse() requires a set of arguments to parse as input
|
||||
@ -78,7 +135,7 @@ func main() {
|
||||
}
|
||||
client, err := cli.Connect(mqttBroker, username, password, clientId)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to connect to mqtt bus: %v", err)
|
||||
zap.S().Fatalf("unable to connect to mqtt bus: %v", err)
|
||||
}
|
||||
defer client.Disconnect(50)
|
||||
runDisplay(client, framePath, frameTopic, fps, objectsTopic, roadTopic, withObjects, withRoad)
|
||||
@ -93,12 +150,43 @@ func main() {
|
||||
}
|
||||
defer client.Disconnect(50)
|
||||
runRecord(client, recordsPath, recordTopic)
|
||||
case trainArchiveFlags.Name():
|
||||
if err := trainArchiveFlags.Parse(os.Args[2:]); err == flag.ErrHelp {
|
||||
trainArchiveFlags.PrintDefaults()
|
||||
case impdkFlags.Name():
|
||||
if err := impdkFlags.Parse(os.Args[2:]); err == flag.ErrHelp {
|
||||
impdkFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
runTrainArchive(recordsPath, trainArchiveName, 2)
|
||||
runImportDonkeyRecords(basedir, destdir)
|
||||
case trainingFlags.Name():
|
||||
if err := trainingFlags.Parse(os.Args[2:]); err == flag.ErrHelp {
|
||||
trainingFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
switch trainingFlags.Arg(0) {
|
||||
case trainingListJobFlags.Name():
|
||||
if err:= trainingListJobFlags.Parse(os.Args[3:]); err == flag.ErrHelp {
|
||||
trainingListJobFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
runTrainList()
|
||||
case trainingRunFlags.Name():
|
||||
if err := trainingRunFlags.Parse(os.Args[3:]); err == flag.ErrHelp {
|
||||
trainingRunFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
runTraining(bucket, ociImage, roleArn, trainJobName, recordsPath, trainSliceSize, modelPath)
|
||||
case trainArchiveFlags.Name():
|
||||
if err := trainArchiveFlags.Parse(os.Args[3:]); err == flag.ErrHelp {
|
||||
trainArchiveFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
runTrainArchive(recordsPath, trainArchiveName, trainSliceSize)
|
||||
|
||||
|
||||
default:
|
||||
trainingFlags.PrintDefaults()
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
default:
|
||||
flag.PrintDefaults()
|
||||
os.Exit(1)
|
||||
@ -110,7 +198,7 @@ func runRecord(client mqtt.Client, recordsDir, recordTopic string) {
|
||||
|
||||
r, err := record.New(client, recordsDir, recordTopic)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to init record part: %v", err)
|
||||
zap.S().Fatalf("unable to init record part: %v", err)
|
||||
}
|
||||
defer r.Stop()
|
||||
|
||||
@ -118,15 +206,25 @@ func runRecord(client mqtt.Client, recordsDir, recordTopic string) {
|
||||
|
||||
err = r.Start()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to start service: %v", err)
|
||||
zap.S().Fatalf("unable to start service: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func runTrainArchive(basedir, archiveName string, sliceSize int) {
|
||||
|
||||
err := data.BuildArchive(basedir, archiveName, sliceSize)
|
||||
err := data.WriteArchive(basedir, archiveName, sliceSize)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to build archive file %v: %v", archiveName, err)
|
||||
zap.S().Fatalf("unable to build archive file %v: %v", archiveName, err)
|
||||
}
|
||||
}
|
||||
|
||||
func runImportDonkeyRecords(basedir, destdir string) {
|
||||
if destdir == "" || basedir == "" {
|
||||
zap.S().Fatal("invalid arg")
|
||||
}
|
||||
err := dkimpt.ImportDonkeyRecords(basedir, destdir)
|
||||
if err != nil {
|
||||
zap.S().Fatalf("unable to import files from %v to %v: %v", basedir, destdir, err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,6 +250,43 @@ func runDisplay(client mqtt.Client, framePath string, frameTopic string, fps int
|
||||
|
||||
err := p.Start()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to start service: %v", err)
|
||||
zap.S().Fatalf("unable to start service: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func runTraining(bucketName string, ociImage string, roleArn string, jobName, dataDir string, sliceSize int, outputModel string) {
|
||||
l := zap.S()
|
||||
if bucketName == "" {
|
||||
l.Fatalf("no bucket define, see help")
|
||||
}
|
||||
if ociImage == "" {
|
||||
l.Fatalf("no oci image define, see help")
|
||||
}
|
||||
if jobName == "" {
|
||||
l.Fatalf("no job name define, see help")
|
||||
}
|
||||
if dataDir == "" {
|
||||
l.Fatalf("no training data define, see help")
|
||||
}
|
||||
if outputModel == "" {
|
||||
l.Fatalf("no output model path define, see help")
|
||||
}
|
||||
|
||||
if sliceSize != 0 && sliceSize != 2 {
|
||||
l.Fatalf("invalid value for sie-slice, only '0' or '2' are allowed")
|
||||
}
|
||||
|
||||
training := train.New(bucketName, ociImage, roleArn)
|
||||
err := training.TrainDir(context.Background(), jobName, dataDir, sliceSize, outputModel)
|
||||
|
||||
if err != nil {
|
||||
l.Fatalf("unable to run training: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func runTrainList() {
|
||||
err := train.ListJob(context.Background())
|
||||
if err != nil {
|
||||
zap.S().Fatalf("unable to list training jobs: %w", err)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user