Compare commits
4 Commits
feat/text_
...
v0.2.0
Author | SHA1 | Date | |
---|---|---|---|
3766531936 | |||
fe63597ba4 | |||
52c3808d83 | |||
33c16699ae |
12
Dockerfile
12
Dockerfile
@ -1,3 +1,12 @@
|
|||||||
|
FROM docker.io/library/python:3.9-slim AS model
|
||||||
|
|
||||||
|
RUN python3 -m pip install blobconverter
|
||||||
|
|
||||||
|
RUN mkdir -p /models
|
||||||
|
|
||||||
|
RUN blobconverter --zoo-name mobile_object_localizer_192x192 --zoo-type depthai --shaves 6 --version 2021.4 --output-dir /models || echo ""
|
||||||
|
RUN ls /models
|
||||||
|
#######
|
||||||
FROM docker.io/library/python:3.9-slim
|
FROM docker.io/library/python:3.9-slim
|
||||||
|
|
||||||
# Configure piwheels repo to use pre-compiled numpy wheels for arm
|
# Configure piwheels repo to use pre-compiled numpy wheels for arm
|
||||||
@ -7,6 +16,9 @@ RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
|
|||||||
|
|
||||||
RUN pip3 install numpy
|
RUN pip3 install numpy
|
||||||
|
|
||||||
|
RUN mkdir /models
|
||||||
|
|
||||||
|
COPY --from=model /models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob /models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob
|
||||||
ADD requirements.txt requirements.txt
|
ADD requirements.txt requirements.txt
|
||||||
|
|
||||||
RUN pip3 install -r requirements.txt
|
RUN pip3 install -r requirements.txt
|
||||||
|
@ -1,19 +1,25 @@
|
|||||||
"""
|
"""
|
||||||
Publish data from oak-lite device
|
Publish data from oak-lite device
|
||||||
|
|
||||||
Usage: rc-oak-camera [-u USERNAME | --mqtt-username=USERNAME] [--mqtt-password=PASSWORD] [--mqtt-broker=HOSTNAME] \
|
Usage: rc-oak-camera [-u USERNAME | --mqtt-username=USERNAME] [--mqtt-password=PASSWORD] \
|
||||||
[--mqtt-topic-robocar-oak-camera="TOPIC_CAMERA"] [--mqtt-client-id=CLIENT_ID] \
|
[--mqtt-broker-host=HOSTNAME] [--mqtt-broker-port=PORT] \
|
||||||
[-H IMG_HEIGHT | --image-height=IMG_HEIGHT] [-W IMG_WIDTH | --image-width=IMG_width]
|
[--mqtt-topic-robocar-oak-camera="TOPIC_CAMERA"] [--mqtt-topic-robocar-objects="TOPIC_OBJECTS"] \
|
||||||
|
[--mqtt-client-id=CLIENT_ID] \
|
||||||
|
[-H IMG_HEIGHT | --image-height=IMG_HEIGHT] [-W IMG_WIDTH | --image-width=IMG_width] \
|
||||||
|
[-t OBJECTS_THRESHOLD | --objects-threshold=OBJECTS_THRESHOLD]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-h --help Show this screen.
|
-h --help Show this screen.
|
||||||
-u USERID --mqtt-username=USERNAME MQTT user
|
-u USERID --mqtt-username=USERNAME MQTT user
|
||||||
-p PASSWORD --mqtt-password=PASSWORD MQTT password
|
-p PASSWORD --mqtt-password=PASSWORD MQTT password
|
||||||
-b HOSTNAME --mqtt-broker=HOSTNAME MQTT broker host
|
-b HOSTNAME --mqtt-broker-host=HOSTNAME MQTT broker host
|
||||||
|
-P HOSTNAME --mqtt-broker-port=PORT MQTT broker port
|
||||||
-C CLIENT_ID --mqtt-client-id=CLIENT_ID MQTT client id
|
-C CLIENT_ID --mqtt-client-id=CLIENT_ID MQTT client id
|
||||||
-c TOPIC_CAMERA --mqtt-topic-robocar-oak-camera=TOPIC_CAMERA MQTT topic where to publish robocar-oak-camera frames
|
-c TOPIC_CAMERA --mqtt-topic-robocar-oak-camera=TOPIC_CAMERA MQTT topic where to publish robocar-oak-camera frames
|
||||||
|
-o TOPIC_OBJECTS --mqtt-topic-robocar-objects=TOPIC_OBJECTS MQTT topic where to publish objects detection results
|
||||||
-H IMG_HEIGHT --image-height=IMG_HEIGHT IMG_HEIGHT image height
|
-H IMG_HEIGHT --image-height=IMG_HEIGHT IMG_HEIGHT image height
|
||||||
-W IMG_WIDTH --image-width=IMG_width IMG_WIDTH image width
|
-W IMG_WIDTH --image-width=IMG_width IMG_WIDTH image width
|
||||||
|
-t OBJECTS_THRESHOLD --objects-threshold=OBJECTS_THRESHOLD OBJECTS_THRESHOLD threshold to filter objects detected
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -27,13 +33,13 @@ logging.basicConfig(level=logging.INFO)
|
|||||||
default_client_id = "robocar-depthai"
|
default_client_id = "robocar-depthai"
|
||||||
|
|
||||||
|
|
||||||
def init_mqtt_client(broker_host: str, user: str, password: str, client_id: str) -> mqtt.Client:
|
def init_mqtt_client(broker_host: str, broker_port, user: str, password: str, client_id: str) -> mqtt.Client:
|
||||||
logger.info("Start part.py-robocar-oak-camera")
|
logger.info("Start part.py-robocar-oak-camera")
|
||||||
client = mqtt.Client(client_id=client_id, clean_session=True, userdata=None, protocol=mqtt.MQTTv311)
|
client = mqtt.Client(client_id=client_id, clean_session=True, userdata=None, protocol=mqtt.MQTTv311)
|
||||||
|
|
||||||
client.username_pw_set(user, password)
|
client.username_pw_set(user, password)
|
||||||
logger.info("Connect to mqtt broker "+ broker_host)
|
logger.info("Connect to mqtt broker "+ broker_host)
|
||||||
client.connect(host=broker_host, port=1883, keepalive=60)
|
client.connect(host=broker_host, port=broker_port, keepalive=60)
|
||||||
logger.info("Connected to mqtt broker")
|
logger.info("Connected to mqtt broker")
|
||||||
return client
|
return client
|
||||||
|
|
||||||
@ -43,16 +49,21 @@ def execute_from_command_line():
|
|||||||
|
|
||||||
args = docopt(__doc__)
|
args = docopt(__doc__)
|
||||||
|
|
||||||
client = init_mqtt_client(broker_host=get_default_value(args["--mqtt-broker"], "MQTT_BROKER", "localhost"),
|
client = init_mqtt_client(broker_host=get_default_value(args["--mqtt-broker-host"], "MQTT_BROKER_HOST", "localhost"),
|
||||||
|
broker_port=int(get_default_value(args["--mqtt-broker-port"], "MQTT_BROKER_PORT", "1883")),
|
||||||
user=get_default_value(args["--mqtt-username"], "MQTT_USERNAME", ""),
|
user=get_default_value(args["--mqtt-username"], "MQTT_USERNAME", ""),
|
||||||
password=get_default_value(args["--mqtt-password"], "MQTT_PASSWORD", ""),
|
password=get_default_value(args["--mqtt-password"], "MQTT_PASSWORD", ""),
|
||||||
client_id=get_default_value(args["--mqtt-client-id"], "MQTT_CLIENT_ID",
|
client_id=get_default_value(args["--mqtt-client-id"], "MQTT_CLIENT_ID",
|
||||||
default_client_id),
|
default_client_id),
|
||||||
)
|
)
|
||||||
frame_topic = get_default_value(args["--mqtt-topic-robocar-oak-camera"], "MQTT_TOPIC_CAMERA", "/oak/camera_rgb")
|
frame_topic = get_default_value(args["--mqtt-topic-robocar-oak-camera"], "MQTT_TOPIC_CAMERA", "/oak/camera_rgb")
|
||||||
|
objects_topic = get_default_value(args["--mqtt-topic-robocar-objects"], "MQTT_TOPIC_OBJECTS", "/objects")
|
||||||
|
|
||||||
frame_processor = cam.FramePublisher(mqtt_client=client,
|
frame_processor = cam.FramePublisher(mqtt_client=client,
|
||||||
frame_topic=frame_topic,
|
frame_topic=frame_topic,
|
||||||
|
objects_topic=objects_topic,
|
||||||
|
objects_threshold=float(get_default_value(args["--objects-threshold"],
|
||||||
|
"OBJECTS_THRESHOLD", 0.2)),
|
||||||
img_width=int(get_default_value(args["--image-width"], "IMAGE_WIDTH", 160)),
|
img_width=int(get_default_value(args["--image-width"], "IMAGE_WIDTH", 160)),
|
||||||
img_height=int(get_default_value(args["--image-height"], "IMAGE_HEIGHT", 120)))
|
img_height=int(get_default_value(args["--image-height"], "IMAGE_HEIGHT", 120)))
|
||||||
frame_processor.run()
|
frame_processor.run()
|
||||||
|
@ -6,25 +6,22 @@ import events.events_pb2
|
|||||||
|
|
||||||
import depthai as dai
|
import depthai as dai
|
||||||
import cv2
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
NN_PATH = "/models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob"
|
||||||
def to_tensor_result(packet):
|
NN_WIDTH = 192
|
||||||
return {
|
NN_HEIGHT = 192
|
||||||
name: np.array(packet.getLayerFp16(name))
|
|
||||||
for name in [tensor.name for tensor in packet.getRaw().tensors]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def to_planar(frame):
|
|
||||||
return frame.transpose(2, 0, 1).flatten()
|
|
||||||
|
|
||||||
|
|
||||||
class FramePublisher:
|
class FramePublisher:
|
||||||
def __init__(self, mqtt_client: mqtt.Client, frame_topic: str, img_width: int, img_height: int):
|
def __init__(self, mqtt_client: mqtt.Client, frame_topic: str, objects_topic: str, objects_threshold: float,
|
||||||
|
img_width: int, img_height: int):
|
||||||
self._mqtt_client = mqtt_client
|
self._mqtt_client = mqtt_client
|
||||||
self._frame_topic = frame_topic
|
self._frame_topic = frame_topic
|
||||||
|
self._objects_topic = objects_topic
|
||||||
|
self._objects_threshold = objects_threshold
|
||||||
self._img_width = img_width
|
self._img_width = img_width
|
||||||
self._img_height = img_height
|
self._img_height = img_height
|
||||||
self._pipeline = self._configure_pipeline()
|
self._pipeline = self._configure_pipeline()
|
||||||
@ -33,77 +30,30 @@ class FramePublisher:
|
|||||||
logger.info("configure pipeline")
|
logger.info("configure pipeline")
|
||||||
pipeline = dai.Pipeline()
|
pipeline = dai.Pipeline()
|
||||||
|
|
||||||
version = "2021.2"
|
pipeline.setOpenVINOVersion(version=dai.OpenVINO.VERSION_2021_4)
|
||||||
pipeline.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_2)
|
|
||||||
|
|
||||||
# colorCam = pipeline.create(dai.node.ColorCamera)
|
# Define a neural network that will make predictions based on the source frames
|
||||||
# colorCam.setPreviewSize(256, 256)
|
detection_nn = pipeline.create(dai.node.NeuralNetwork)
|
||||||
# colorCam.setVideoSize(1024, 1024) # 4 times larger in both axis
|
detection_nn.setBlobPath(NN_PATH)
|
||||||
# colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
|
detection_nn.setNumPoolFrames(4)
|
||||||
# colorCam.setInterleaved(False)
|
detection_nn.input.setBlocking(False)
|
||||||
# colorCam.setBoardSocket(dai.CameraBoardSocket.RGB)
|
detection_nn.setNumInferenceThreads(2)
|
||||||
# colorCam.setFps(10)
|
|
||||||
#
|
|
||||||
# controlIn = pipeline.create(dai.node.XLinkIn)
|
|
||||||
# controlIn.setStreamName('control')
|
|
||||||
# controlIn.out.link(colorCam.inputControl)
|
|
||||||
#
|
|
||||||
# cam_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
# cam_xout.setStreamName('video')
|
|
||||||
# colorCam.video.link(cam_xout.input)
|
|
||||||
|
|
||||||
# ---------------------------------------
|
xout_nn = pipeline.create(dai.node.XLinkOut)
|
||||||
# 1st stage NN - text-detection
|
xout_nn.setStreamName("nn")
|
||||||
# ---------------------------------------
|
xout_nn.input.setBlocking(False)
|
||||||
|
|
||||||
nn = pipeline.create(dai.node.NeuralNetwork)
|
|
||||||
nn.setBlobPath(
|
|
||||||
blobconverter.from_zoo(name="east_text_detection_256x256", zoo_type="depthai", shaves=6, version=version))
|
|
||||||
colorCam.preview.link(nn.input)
|
|
||||||
|
|
||||||
nn_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
nn_xout.setStreamName('detections')
|
|
||||||
nn.out.link(nn_xout.input)
|
|
||||||
|
|
||||||
# ---------------------------------------
|
|
||||||
# 2nd stage NN - text-recognition-0012
|
|
||||||
# ---------------------------------------
|
|
||||||
|
|
||||||
|
# Resize image
|
||||||
manip = pipeline.create(dai.node.ImageManip)
|
manip = pipeline.create(dai.node.ImageManip)
|
||||||
manip.setWaitForConfigInput(True)
|
manip.initialConfig.setResize(NN_WIDTH, NN_HEIGHT)
|
||||||
|
manip.initialConfig.setFrameType(dai.ImgFrame.Type.RGB888p)
|
||||||
manip_img = pipeline.create(dai.node.XLinkIn)
|
manip.initialConfig.setKeepAspectRatio(False)
|
||||||
manip_img.setStreamName('manip_img')
|
|
||||||
manip_img.out.link(manip.inputImage)
|
|
||||||
|
|
||||||
manip_cfg = pipeline.create(dai.node.XLinkIn)
|
|
||||||
manip_cfg.setStreamName('manip_cfg')
|
|
||||||
manip_cfg.out.link(manip.inputConfig)
|
|
||||||
|
|
||||||
manip_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
manip_xout.setStreamName('manip_out')
|
|
||||||
|
|
||||||
nn2 = pipeline.create(dai.node.NeuralNetwork)
|
|
||||||
nn2.setBlobPath(blobconverter.from_zoo(name="text-recognition-0012", shaves=6, version=version))
|
|
||||||
nn2.setNumInferenceThreads(2)
|
|
||||||
manip.out.link(nn2.input)
|
|
||||||
manip.out.link(manip_xout.input)
|
|
||||||
|
|
||||||
nn2_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
nn2_xout.setStreamName("recognitions")
|
|
||||||
nn2.out.link(nn2_xout.input)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cam_rgb = pipeline.create(dai.node.ColorCamera)
|
cam_rgb = pipeline.create(dai.node.ColorCamera)
|
||||||
xout_rgb = pipeline.create(dai.node.XLinkOut)
|
xout_rgb = pipeline.create(dai.node.XLinkOut)
|
||||||
|
|
||||||
xout_rgb.setStreamName("rgb")
|
xout_rgb.setStreamName("rgb")
|
||||||
|
|
||||||
|
|
||||||
# Properties
|
# Properties
|
||||||
cam_rgb.setBoardSocket(dai.CameraBoardSocket.RGB)
|
cam_rgb.setBoardSocket(dai.CameraBoardSocket.RGB)
|
||||||
cam_rgb.setPreviewSize(width=self._img_width, height=self._img_height)
|
cam_rgb.setPreviewSize(width=self._img_width, height=self._img_height)
|
||||||
@ -111,156 +61,18 @@ class FramePublisher:
|
|||||||
cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
|
cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
|
||||||
cam_rgb.setFps(30)
|
cam_rgb.setFps(30)
|
||||||
|
|
||||||
# Linking
|
# Link preview to manip and manip to nn
|
||||||
|
cam_rgb.preview.link(manip.inputImage)
|
||||||
|
manip.out.link(detection_nn.input)
|
||||||
|
|
||||||
|
# Linking to output
|
||||||
cam_rgb.preview.link(xout_rgb.input)
|
cam_rgb.preview.link(xout_rgb.input)
|
||||||
|
detection_nn.out.link(xout_nn.input)
|
||||||
|
|
||||||
logger.info("pipeline configured")
|
logger.info("pipeline configured")
|
||||||
return pipeline
|
return pipeline
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
|
||||||
with dai.Device(self._pipeline) as device:
|
|
||||||
q_vid = device.getOutputQueue("video", 4, blocking=False)
|
|
||||||
# This should be set to block, but would get to some extreme queuing/latency!
|
|
||||||
q_det = device.getOutputQueue("detections", 4, blocking=False)
|
|
||||||
|
|
||||||
q_rec = device.getOutputQueue("recognitions", 4, blocking=True)
|
|
||||||
|
|
||||||
q_manip_img = device.getInputQueue("manip_img")
|
|
||||||
q_manip_cfg = device.getInputQueue("manip_cfg")
|
|
||||||
q_manip_out = device.getOutputQueue("manip_out", 4, blocking=False)
|
|
||||||
|
|
||||||
controlQueue = device.getInputQueue('control')
|
|
||||||
|
|
||||||
frame = None
|
|
||||||
cropped_stacked = None
|
|
||||||
rotated_rectangles = []
|
|
||||||
rec_pushed = 0
|
|
||||||
rec_received = 0
|
|
||||||
host_sync = HostSeqSync()
|
|
||||||
|
|
||||||
characters = '0123456789abcdefghijklmnopqrstuvwxyz#'
|
|
||||||
codec = CTCCodec(characters)
|
|
||||||
|
|
||||||
ctrl = dai.CameraControl()
|
|
||||||
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.CONTINUOUS_VIDEO)
|
|
||||||
ctrl.setAutoFocusTrigger()
|
|
||||||
controlQueue.send(ctrl)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
vid_in = q_vid.tryGet()
|
|
||||||
if vid_in is not None:
|
|
||||||
host_sync.add_msg(vid_in)
|
|
||||||
|
|
||||||
# Multiple recognition results may be available, read until queue is empty
|
|
||||||
while True:
|
|
||||||
in_rec = q_rec.tryGet()
|
|
||||||
if in_rec is None:
|
|
||||||
break
|
|
||||||
rec_data = bboxes = np.array(in_rec.getFirstLayerFp16()).reshape(30, 1, 37)
|
|
||||||
decoded_text = codec.decode(rec_data)[0]
|
|
||||||
pos = rotated_rectangles[rec_received]
|
|
||||||
print("{:2}: {:20}".format(rec_received, decoded_text),
|
|
||||||
"center({:3},{:3}) size({:3},{:3}) angle{:5.1f} deg".format(
|
|
||||||
int(pos[0][0]), int(pos[0][1]), pos[1][0], pos[1][1], pos[2]))
|
|
||||||
# Draw the text on the right side of 'cropped_stacked' - placeholder
|
|
||||||
if cropped_stacked is not None:
|
|
||||||
cv2.putText(cropped_stacked, decoded_text,
|
|
||||||
(120 + 10, 32 * rec_received + 24),
|
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
|
||||||
cv2.imshow('cropped_stacked', cropped_stacked)
|
|
||||||
rec_received += 1
|
|
||||||
|
|
||||||
if cv2.waitKey(1) == ord('q'):
|
|
||||||
break
|
|
||||||
|
|
||||||
if rec_received >= rec_pushed:
|
|
||||||
in_det = q_det.tryGet()
|
|
||||||
if in_det is not None:
|
|
||||||
frame = host_sync.get_msg(in_det.getSequenceNum()).getCvFrame().copy()
|
|
||||||
|
|
||||||
scores, geom1, geom2 = to_tensor_result(in_det).values()
|
|
||||||
scores = np.reshape(scores, (1, 1, 64, 64))
|
|
||||||
geom1 = np.reshape(geom1, (1, 4, 64, 64))
|
|
||||||
geom2 = np.reshape(geom2, (1, 1, 64, 64))
|
|
||||||
|
|
||||||
bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2)
|
|
||||||
boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles))
|
|
||||||
rotated_rectangles = [
|
|
||||||
east.get_cv_rotated_rect(bbox, angle * -1)
|
|
||||||
for (bbox, angle) in zip(boxes, angles)
|
|
||||||
]
|
|
||||||
|
|
||||||
rec_received = 0
|
|
||||||
rec_pushed = len(rotated_rectangles)
|
|
||||||
if rec_pushed:
|
|
||||||
print("====== Pushing for recognition, count:", rec_pushed)
|
|
||||||
cropped_stacked = None
|
|
||||||
for idx, rotated_rect in enumerate(rotated_rectangles):
|
|
||||||
# Detections are done on 256x256 frames, we are sending back 1024x1024
|
|
||||||
# That's why we multiply center and size values by 4
|
|
||||||
rotated_rect[0][0] = rotated_rect[0][0] * 4
|
|
||||||
rotated_rect[0][1] = rotated_rect[0][1] * 4
|
|
||||||
rotated_rect[1][0] = rotated_rect[1][0] * 4
|
|
||||||
rotated_rect[1][1] = rotated_rect[1][1] * 4
|
|
||||||
|
|
||||||
# Draw detection crop area on input frame
|
|
||||||
points = np.int0(cv2.boxPoints(rotated_rect))
|
|
||||||
print(rotated_rect)
|
|
||||||
cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1,
|
|
||||||
lineType=cv2.LINE_8)
|
|
||||||
|
|
||||||
# TODO make it work taking args like in OpenCV:
|
|
||||||
# rr = ((256, 256), (128, 64), 30)
|
|
||||||
rr = dai.RotatedRect()
|
|
||||||
rr.center.x = rotated_rect[0][0]
|
|
||||||
rr.center.y = rotated_rect[0][1]
|
|
||||||
rr.size.width = rotated_rect[1][0]
|
|
||||||
rr.size.height = rotated_rect[1][1]
|
|
||||||
rr.angle = rotated_rect[2]
|
|
||||||
cfg = dai.ImageManipConfig()
|
|
||||||
cfg.setCropRotatedRect(rr, False)
|
|
||||||
cfg.setResize(120, 32)
|
|
||||||
# Send frame and config to device
|
|
||||||
if idx == 0:
|
|
||||||
w, h, c = frame.shape
|
|
||||||
imgFrame = dai.ImgFrame()
|
|
||||||
imgFrame.setData(to_planar(frame))
|
|
||||||
imgFrame.setType(dai.ImgFrame.Type.BGR888p)
|
|
||||||
imgFrame.setWidth(w)
|
|
||||||
imgFrame.setHeight(h)
|
|
||||||
q_manip_img.send(imgFrame)
|
|
||||||
else:
|
|
||||||
cfg.setReusePreviousImage(True)
|
|
||||||
q_manip_cfg.send(cfg)
|
|
||||||
|
|
||||||
# Get manipulated image from the device
|
|
||||||
transformed = q_manip_out.get().getCvFrame()
|
|
||||||
|
|
||||||
rec_placeholder_img = np.zeros((32, 200, 3), np.uint8)
|
|
||||||
transformed = np.hstack((transformed, rec_placeholder_img))
|
|
||||||
if cropped_stacked is None:
|
|
||||||
cropped_stacked = transformed
|
|
||||||
else:
|
|
||||||
cropped_stacked = np.vstack((cropped_stacked, transformed))
|
|
||||||
|
|
||||||
if cropped_stacked is not None:
|
|
||||||
cv2.imshow('cropped_stacked', cropped_stacked)
|
|
||||||
|
|
||||||
if frame is not None:
|
|
||||||
cv2.imshow('frame', frame)
|
|
||||||
|
|
||||||
key = cv2.waitKey(1)
|
|
||||||
if key == ord('q'):
|
|
||||||
break
|
|
||||||
elif key == ord('t'):
|
|
||||||
print("Autofocus trigger (and disable continuous)")
|
|
||||||
ctrl = dai.CameraControl()
|
|
||||||
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.AUTO)
|
|
||||||
ctrl.setAutoFocusTrigger()
|
|
||||||
controlQueue.send(ctrl)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Connect to device and start pipeline
|
# Connect to device and start pipeline
|
||||||
with dai.Device(self._pipeline) as device:
|
with dai.Device(self._pipeline) as device:
|
||||||
logger.info('MxId: %s', device.getDeviceInfo().getMxId())
|
logger.info('MxId: %s', device.getDeviceInfo().getMxId())
|
||||||
@ -272,7 +84,8 @@ class FramePublisher:
|
|||||||
device.startPipeline()
|
device.startPipeline()
|
||||||
# Queues
|
# Queues
|
||||||
queue_size = 4
|
queue_size = 4
|
||||||
q_rgb = device.getOutputQueue("rgb", maxSize=queue_size, blocking=False)
|
q_rgb = device.getOutputQueue(name="rgb", maxSize=queue_size, blocking=False)
|
||||||
|
q_nn = device.getOutputQueue(name="nn", maxSize=queue_size, blocking=False)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@ -297,5 +110,42 @@ class FramePublisher:
|
|||||||
qos=0,
|
qos=0,
|
||||||
retain=False)
|
retain=False)
|
||||||
|
|
||||||
|
in_nn = q_nn.get()
|
||||||
|
|
||||||
|
# get outputs
|
||||||
|
detection_boxes = np.array(in_nn.getLayerFp16("ExpandDims")).reshape((100, 4))
|
||||||
|
detection_scores = np.array(in_nn.getLayerFp16("ExpandDims_2")).reshape((100,))
|
||||||
|
|
||||||
|
# keep boxes bigger than threshold
|
||||||
|
mask = detection_scores >= self._objects_threshold
|
||||||
|
boxes = detection_boxes[mask]
|
||||||
|
scores = detection_scores[mask]
|
||||||
|
|
||||||
|
if boxes.shape[0] > 0:
|
||||||
|
objects_msg = events.events_pb2.ObjectsMessage()
|
||||||
|
objs = []
|
||||||
|
for i in range(boxes.shape[0]):
|
||||||
|
bbox = boxes[i]
|
||||||
|
logger.debug("new object detected: %s", str(bbox))
|
||||||
|
o = events.events_pb2.Object()
|
||||||
|
o.type = events.events_pb2.TypeObject.ANY
|
||||||
|
o.top = bbox[0].astype(float)
|
||||||
|
o.right = bbox[3].astype(float)
|
||||||
|
o.bottom = bbox[2].astype(float)
|
||||||
|
o.left = bbox[1].astype(float)
|
||||||
|
o.confidence = scores[i].astype(float)
|
||||||
|
objs.append(o)
|
||||||
|
objects_msg.objects.extend(objs)
|
||||||
|
|
||||||
|
objects_msg.frame_ref.name = frame_msg.id.name
|
||||||
|
objects_msg.frame_ref.id = frame_msg.id.id
|
||||||
|
objects_msg.frame_ref.created_at.FromDatetime(now)
|
||||||
|
|
||||||
|
logger.debug("publish object event to %s", self._frame_topic)
|
||||||
|
self._mqtt_client.publish(topic=self._objects_topic,
|
||||||
|
payload=objects_msg.SerializeToString(),
|
||||||
|
qos=0,
|
||||||
|
retain=False)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("unexpected error: %s", str(e))
|
logger.exception("unexpected error: %s", str(e))
|
||||||
|
232
camera/east.py
232
camera/east.py
@ -1,232 +0,0 @@
|
|||||||
import cv2
|
|
||||||
import depthai
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
_conf_threshold = 0.5
|
|
||||||
|
|
||||||
|
|
||||||
def get_cv_rotated_rect(bbox, angle):
|
|
||||||
x0, y0, x1, y1 = bbox
|
|
||||||
width = abs(x0 - x1)
|
|
||||||
height = abs(y0 - y1)
|
|
||||||
x = x0 + width * 0.5
|
|
||||||
y = y0 + height * 0.5
|
|
||||||
return [x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle)
|
|
||||||
|
|
||||||
|
|
||||||
def rotated_Rectangle(bbox, angle):
|
|
||||||
X0, Y0, X1, Y1 = bbox
|
|
||||||
width = abs(X0 - X1)
|
|
||||||
height = abs(Y0 - Y1)
|
|
||||||
x = int(X0 + width * 0.5)
|
|
||||||
y = int(Y0 + height * 0.5)
|
|
||||||
|
|
||||||
pt1_1 = (int(x + width / 2), int(y + height / 2))
|
|
||||||
pt2_1 = (int(x + width / 2), int(y - height / 2))
|
|
||||||
pt3_1 = (int(x - width / 2), int(y - height / 2))
|
|
||||||
pt4_1 = (int(x - width / 2), int(y + height / 2))
|
|
||||||
|
|
||||||
t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)],
|
|
||||||
[np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)],
|
|
||||||
[0, 0, 1]])
|
|
||||||
|
|
||||||
tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]])
|
|
||||||
tmp_pt1_2 = np.dot(t, tmp_pt1_1)
|
|
||||||
pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]])
|
|
||||||
tmp_pt2_2 = np.dot(t, tmp_pt2_1)
|
|
||||||
pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]])
|
|
||||||
tmp_pt3_2 = np.dot(t, tmp_pt3_1)
|
|
||||||
pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]])
|
|
||||||
tmp_pt4_2 = np.dot(t, tmp_pt4_1)
|
|
||||||
pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0]))
|
|
||||||
|
|
||||||
points = np.array([pt1_2, pt2_2, pt3_2, pt4_2])
|
|
||||||
|
|
||||||
return points
|
|
||||||
|
|
||||||
|
|
||||||
def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3):
|
|
||||||
# if there are no boxes, return an empty list
|
|
||||||
if len(boxes) == 0:
|
|
||||||
return [], []
|
|
||||||
|
|
||||||
# if the bounding boxes are integers, convert them to floats -- this
|
|
||||||
# is important since we'll be doing a bunch of divisions
|
|
||||||
if boxes.dtype.kind == "i":
|
|
||||||
boxes = boxes.astype("float")
|
|
||||||
|
|
||||||
# initialize the list of picked indexes
|
|
||||||
pick = []
|
|
||||||
|
|
||||||
# grab the coordinates of the bounding boxes
|
|
||||||
x1 = boxes[:, 0]
|
|
||||||
y1 = boxes[:, 1]
|
|
||||||
x2 = boxes[:, 2]
|
|
||||||
y2 = boxes[:, 3]
|
|
||||||
|
|
||||||
# compute the area of the bounding boxes and grab the indexes to sort
|
|
||||||
# (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate)
|
|
||||||
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
|
||||||
idxs = y2
|
|
||||||
|
|
||||||
# if probabilities are provided, sort on them instead
|
|
||||||
if probs is not None:
|
|
||||||
idxs = probs
|
|
||||||
|
|
||||||
# sort the indexes
|
|
||||||
idxs = np.argsort(idxs)
|
|
||||||
|
|
||||||
# keep looping while some indexes still remain in the indexes list
|
|
||||||
while len(idxs) > 0:
|
|
||||||
# grab the last index in the indexes list and add the index value to the list of picked indexes
|
|
||||||
last = len(idxs) - 1
|
|
||||||
i = idxs[last]
|
|
||||||
pick.append(i)
|
|
||||||
|
|
||||||
# find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates
|
|
||||||
# for the end of the bounding box
|
|
||||||
xx1 = np.maximum(x1[i], x1[idxs[:last]])
|
|
||||||
yy1 = np.maximum(y1[i], y1[idxs[:last]])
|
|
||||||
xx2 = np.minimum(x2[i], x2[idxs[:last]])
|
|
||||||
yy2 = np.minimum(y2[i], y2[idxs[:last]])
|
|
||||||
|
|
||||||
# compute the width and height of the bounding box
|
|
||||||
w = np.maximum(0, xx2 - xx1 + 1)
|
|
||||||
h = np.maximum(0, yy2 - yy1 + 1)
|
|
||||||
|
|
||||||
# compute the ratio of overlap
|
|
||||||
overlap = (w * h) / area[idxs[:last]]
|
|
||||||
|
|
||||||
# delete all indexes from the index list that have overlap greater than the provided overlap threshold
|
|
||||||
idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))
|
|
||||||
|
|
||||||
# return only the bounding boxes that were picked
|
|
||||||
return boxes[pick].astype("int"), angles[pick]
|
|
||||||
|
|
||||||
|
|
||||||
def decode_predictions(scores, geometry1, geometry2):
|
|
||||||
# grab the number of rows and columns from the scores volume, then
|
|
||||||
# initialize our set of bounding box rectangles and corresponding
|
|
||||||
# confidence scores
|
|
||||||
(numRows, numCols) = scores.shape[2:4]
|
|
||||||
rects = []
|
|
||||||
confidences = []
|
|
||||||
angles = []
|
|
||||||
|
|
||||||
# loop over the number of rows
|
|
||||||
for y in range(0, numRows):
|
|
||||||
# extract the scores (probabilities), followed by the
|
|
||||||
# geometrical data used to derive potential bounding box
|
|
||||||
# coordinates that surround text
|
|
||||||
scoresData = scores[0, 0, y]
|
|
||||||
xData0 = geometry1[0, 0, y]
|
|
||||||
xData1 = geometry1[0, 1, y]
|
|
||||||
xData2 = geometry1[0, 2, y]
|
|
||||||
xData3 = geometry1[0, 3, y]
|
|
||||||
anglesData = geometry2[0, 0, y]
|
|
||||||
|
|
||||||
# loop over the number of columns
|
|
||||||
for x in range(0, numCols):
|
|
||||||
# if our score does not have sufficient probability,
|
|
||||||
# ignore it
|
|
||||||
if scoresData[x] < _conf_threshold:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# compute the offset factor as our resulting feature
|
|
||||||
# maps will be 4x smaller than the input image
|
|
||||||
(offsetX, offsetY) = (x * 4.0, y * 4.0)
|
|
||||||
|
|
||||||
# extract the rotation angle for the prediction and
|
|
||||||
# then compute the sin and cosine
|
|
||||||
angle = anglesData[x]
|
|
||||||
cos = np.cos(angle)
|
|
||||||
sin = np.sin(angle)
|
|
||||||
|
|
||||||
# use the geometry volume to derive the width and height
|
|
||||||
# of the bounding box
|
|
||||||
h = xData0[x] + xData2[x]
|
|
||||||
w = xData1[x] + xData3[x]
|
|
||||||
|
|
||||||
# compute both the starting and ending (x, y)-coordinates
|
|
||||||
# for the text prediction bounding box
|
|
||||||
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
|
|
||||||
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
|
|
||||||
startX = int(endX - w)
|
|
||||||
startY = int(endY - h)
|
|
||||||
|
|
||||||
# add the bounding box coordinates and probability score
|
|
||||||
# to our respective lists
|
|
||||||
rects.append((startX, startY, endX, endY))
|
|
||||||
confidences.append(scoresData[x])
|
|
||||||
angles.append(angle)
|
|
||||||
|
|
||||||
# return a tuple of the bounding boxes and associated confidences
|
|
||||||
return (rects, confidences, angles)
|
|
||||||
|
|
||||||
|
|
||||||
def decode_east(nnet_packet, **kwargs):
|
|
||||||
scores = nnet_packet.get_tensor(0)
|
|
||||||
geometry1 = nnet_packet.get_tensor(1)
|
|
||||||
geometry2 = nnet_packet.get_tensor(2)
|
|
||||||
bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2
|
|
||||||
)
|
|
||||||
boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles))
|
|
||||||
boxesangles = (boxes, angles)
|
|
||||||
return boxesangles
|
|
||||||
|
|
||||||
|
|
||||||
def show_east(boxesangles, frame, **kwargs):
|
|
||||||
bboxes = boxesangles[0]
|
|
||||||
angles = boxesangles[1]
|
|
||||||
for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles):
|
|
||||||
width = abs(X0 - X1)
|
|
||||||
height = abs(Y0 - Y1)
|
|
||||||
cX = int(X0 + width * 0.5)
|
|
||||||
cY = int(Y0 + height * 0.5)
|
|
||||||
|
|
||||||
rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1))
|
|
||||||
points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1)
|
|
||||||
cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8)
|
|
||||||
|
|
||||||
return frame
|
|
||||||
|
|
||||||
|
|
||||||
def order_points(pts):
|
|
||||||
rect = np.zeros((4, 2), dtype="float32")
|
|
||||||
s = pts.sum(axis=1)
|
|
||||||
rect[0] = pts[np.argmin(s)]
|
|
||||||
rect[2] = pts[np.argmax(s)]
|
|
||||||
diff = np.diff(pts, axis=1)
|
|
||||||
rect[1] = pts[np.argmin(diff)]
|
|
||||||
rect[3] = pts[np.argmax(diff)]
|
|
||||||
return rect
|
|
||||||
|
|
||||||
|
|
||||||
def four_point_transform(image, pts):
|
|
||||||
rect = order_points(pts)
|
|
||||||
(tl, tr, br, bl) = rect
|
|
||||||
|
|
||||||
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
|
||||||
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
|
||||||
maxWidth = max(int(widthA), int(widthB))
|
|
||||||
|
|
||||||
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
|
||||||
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
|
||||||
maxHeight = max(int(heightA), int(heightB))
|
|
||||||
|
|
||||||
dst = np.array([
|
|
||||||
[0, 0],
|
|
||||||
[maxWidth - 1, 0],
|
|
||||||
[maxWidth - 1, maxHeight - 1],
|
|
||||||
[0, maxHeight - 1]], dtype="float32")
|
|
||||||
|
|
||||||
M = cv2.getPerspectiveTransform(rect, dst)
|
|
||||||
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
|
||||||
|
|
||||||
return warped
|
|
@ -1,61 +0,0 @@
|
|||||||
|
|
||||||
class HostSeqSync:
|
|
||||||
def __init__(self):
|
|
||||||
self.imfFrames = []
|
|
||||||
|
|
||||||
def add_msg(self, msg):
|
|
||||||
self.imfFrames.append(msg)
|
|
||||||
|
|
||||||
def get_msg(self, target_seq):
|
|
||||||
for i, imgFrame in enumerate(self.imfFrames):
|
|
||||||
if target_seq == imgFrame.getSequenceNum():
|
|
||||||
self.imfFrames = self.imfFrames[i:]
|
|
||||||
break
|
|
||||||
return self.imfFrames[0]
|
|
||||||
|
|
||||||
|
|
||||||
class CTCCodec(object):
|
|
||||||
""" Convert between text-label and text-index """
|
|
||||||
|
|
||||||
def __init__(self, characters):
|
|
||||||
# characters (str): set of the possible characters.
|
|
||||||
dict_character = list(characters)
|
|
||||||
|
|
||||||
self.dict = {}
|
|
||||||
for i, char in enumerate(dict_character):
|
|
||||||
self.dict[char] = i + 1
|
|
||||||
|
|
||||||
self.characters = dict_character
|
|
||||||
# print(self.characters)
|
|
||||||
# input()
|
|
||||||
|
|
||||||
def decode(self, preds):
|
|
||||||
""" convert text-index into text-label. """
|
|
||||||
texts = []
|
|
||||||
index = 0
|
|
||||||
# Select max probabilty (greedy decoding) then decode index to character
|
|
||||||
preds = preds.astype(np.float16)
|
|
||||||
preds_index = np.argmax(preds, 2)
|
|
||||||
preds_index = preds_index.transpose(1, 0)
|
|
||||||
preds_index_reshape = preds_index.reshape(-1)
|
|
||||||
preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0])
|
|
||||||
|
|
||||||
for l in preds_sizes:
|
|
||||||
t = preds_index_reshape[index:index + l]
|
|
||||||
|
|
||||||
# NOTE: t might be zero size
|
|
||||||
if t.shape[0] == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
char_list = []
|
|
||||||
for i in range(l):
|
|
||||||
# removing repeated characters and blank.
|
|
||||||
if not (i > 0 and t[i - 1] == t[i]):
|
|
||||||
if self.characters[t[i]] != '#':
|
|
||||||
char_list.append(self.characters[t[i]])
|
|
||||||
text = ''.join(char_list)
|
|
||||||
texts.append(text)
|
|
||||||
|
|
||||||
index += l
|
|
||||||
|
|
||||||
return texts
|
|
229
east.py
229
east.py
@ -1,229 +0,0 @@
|
|||||||
import cv2
|
|
||||||
import depthai
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
_conf_threshold = 0.5
|
|
||||||
|
|
||||||
def get_cv_rotated_rect(bbox, angle):
|
|
||||||
x0, y0, x1, y1 = bbox
|
|
||||||
width = abs(x0 - x1)
|
|
||||||
height = abs(y0 - y1)
|
|
||||||
x = x0 + width * 0.5
|
|
||||||
y = y0 + height * 0.5
|
|
||||||
return ([x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle))
|
|
||||||
|
|
||||||
def rotated_Rectangle(bbox, angle):
|
|
||||||
X0, Y0, X1, Y1 = bbox
|
|
||||||
width = abs(X0 - X1)
|
|
||||||
height = abs(Y0 - Y1)
|
|
||||||
x = int(X0 + width * 0.5)
|
|
||||||
y = int(Y0 + height * 0.5)
|
|
||||||
|
|
||||||
pt1_1 = (int(x + width / 2), int(y + height / 2))
|
|
||||||
pt2_1 = (int(x + width / 2), int(y - height / 2))
|
|
||||||
pt3_1 = (int(x - width / 2), int(y - height / 2))
|
|
||||||
pt4_1 = (int(x - width / 2), int(y + height / 2))
|
|
||||||
|
|
||||||
t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)],
|
|
||||||
[np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)],
|
|
||||||
[0, 0, 1]])
|
|
||||||
|
|
||||||
tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]])
|
|
||||||
tmp_pt1_2 = np.dot(t, tmp_pt1_1)
|
|
||||||
pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]])
|
|
||||||
tmp_pt2_2 = np.dot(t, tmp_pt2_1)
|
|
||||||
pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]])
|
|
||||||
tmp_pt3_2 = np.dot(t, tmp_pt3_1)
|
|
||||||
pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0]))
|
|
||||||
|
|
||||||
tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]])
|
|
||||||
tmp_pt4_2 = np.dot(t, tmp_pt4_1)
|
|
||||||
pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0]))
|
|
||||||
|
|
||||||
points = np.array([pt1_2, pt2_2, pt3_2, pt4_2])
|
|
||||||
|
|
||||||
return points
|
|
||||||
|
|
||||||
|
|
||||||
def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3):
|
|
||||||
# if there are no boxes, return an empty list
|
|
||||||
if len(boxes) == 0:
|
|
||||||
return [], []
|
|
||||||
|
|
||||||
# if the bounding boxes are integers, convert them to floats -- this
|
|
||||||
# is important since we'll be doing a bunch of divisions
|
|
||||||
if boxes.dtype.kind == "i":
|
|
||||||
boxes = boxes.astype("float")
|
|
||||||
|
|
||||||
# initialize the list of picked indexes
|
|
||||||
pick = []
|
|
||||||
|
|
||||||
# grab the coordinates of the bounding boxes
|
|
||||||
x1 = boxes[:, 0]
|
|
||||||
y1 = boxes[:, 1]
|
|
||||||
x2 = boxes[:, 2]
|
|
||||||
y2 = boxes[:, 3]
|
|
||||||
|
|
||||||
# compute the area of the bounding boxes and grab the indexes to sort
|
|
||||||
# (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate)
|
|
||||||
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
|
||||||
idxs = y2
|
|
||||||
|
|
||||||
# if probabilities are provided, sort on them instead
|
|
||||||
if probs is not None:
|
|
||||||
idxs = probs
|
|
||||||
|
|
||||||
# sort the indexes
|
|
||||||
idxs = np.argsort(idxs)
|
|
||||||
|
|
||||||
# keep looping while some indexes still remain in the indexes list
|
|
||||||
while len(idxs) > 0:
|
|
||||||
# grab the last index in the indexes list and add the index value to the list of picked indexes
|
|
||||||
last = len(idxs) - 1
|
|
||||||
i = idxs[last]
|
|
||||||
pick.append(i)
|
|
||||||
|
|
||||||
# find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates for the end of the bounding box
|
|
||||||
xx1 = np.maximum(x1[i], x1[idxs[:last]])
|
|
||||||
yy1 = np.maximum(y1[i], y1[idxs[:last]])
|
|
||||||
xx2 = np.minimum(x2[i], x2[idxs[:last]])
|
|
||||||
yy2 = np.minimum(y2[i], y2[idxs[:last]])
|
|
||||||
|
|
||||||
# compute the width and height of the bounding box
|
|
||||||
w = np.maximum(0, xx2 - xx1 + 1)
|
|
||||||
h = np.maximum(0, yy2 - yy1 + 1)
|
|
||||||
|
|
||||||
# compute the ratio of overlap
|
|
||||||
overlap = (w * h) / area[idxs[:last]]
|
|
||||||
|
|
||||||
# delete all indexes from the index list that have overlap greater than the provided overlap threshold
|
|
||||||
idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))
|
|
||||||
|
|
||||||
# return only the bounding boxes that were picked
|
|
||||||
return boxes[pick].astype("int"), angles[pick]
|
|
||||||
|
|
||||||
|
|
||||||
def decode_predictions(scores, geometry1, geometry2):
|
|
||||||
# grab the number of rows and columns from the scores volume, then
|
|
||||||
# initialize our set of bounding box rectangles and corresponding
|
|
||||||
# confidence scores
|
|
||||||
(numRows, numCols) = scores.shape[2:4]
|
|
||||||
rects = []
|
|
||||||
confidences = []
|
|
||||||
angles = []
|
|
||||||
|
|
||||||
# loop over the number of rows
|
|
||||||
for y in range(0, numRows):
|
|
||||||
# extract the scores (probabilities), followed by the
|
|
||||||
# geometrical data used to derive potential bounding box
|
|
||||||
# coordinates that surround text
|
|
||||||
scoresData = scores[0, 0, y]
|
|
||||||
xData0 = geometry1[0, 0, y]
|
|
||||||
xData1 = geometry1[0, 1, y]
|
|
||||||
xData2 = geometry1[0, 2, y]
|
|
||||||
xData3 = geometry1[0, 3, y]
|
|
||||||
anglesData = geometry2[0, 0, y]
|
|
||||||
|
|
||||||
# loop over the number of columns
|
|
||||||
for x in range(0, numCols):
|
|
||||||
# if our score does not have sufficient probability,
|
|
||||||
# ignore it
|
|
||||||
if scoresData[x] < _conf_threshold:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# compute the offset factor as our resulting feature
|
|
||||||
# maps will be 4x smaller than the input image
|
|
||||||
(offsetX, offsetY) = (x * 4.0, y * 4.0)
|
|
||||||
|
|
||||||
# extract the rotation angle for the prediction and
|
|
||||||
# then compute the sin and cosine
|
|
||||||
angle = anglesData[x]
|
|
||||||
cos = np.cos(angle)
|
|
||||||
sin = np.sin(angle)
|
|
||||||
|
|
||||||
# use the geometry volume to derive the width and height
|
|
||||||
# of the bounding box
|
|
||||||
h = xData0[x] + xData2[x]
|
|
||||||
w = xData1[x] + xData3[x]
|
|
||||||
|
|
||||||
# compute both the starting and ending (x, y)-coordinates
|
|
||||||
# for the text prediction bounding box
|
|
||||||
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
|
|
||||||
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
|
|
||||||
startX = int(endX - w)
|
|
||||||
startY = int(endY - h)
|
|
||||||
|
|
||||||
# add the bounding box coordinates and probability score
|
|
||||||
# to our respective lists
|
|
||||||
rects.append((startX, startY, endX, endY))
|
|
||||||
confidences.append(scoresData[x])
|
|
||||||
angles.append(angle)
|
|
||||||
|
|
||||||
# return a tuple of the bounding boxes and associated confidences
|
|
||||||
return (rects, confidences, angles)
|
|
||||||
|
|
||||||
|
|
||||||
def decode_east(nnet_packet, **kwargs):
|
|
||||||
scores = nnet_packet.get_tensor(0)
|
|
||||||
geometry1 = nnet_packet.get_tensor(1)
|
|
||||||
geometry2 = nnet_packet.get_tensor(2)
|
|
||||||
bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2
|
|
||||||
)
|
|
||||||
boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles))
|
|
||||||
boxesangles = (boxes, angles)
|
|
||||||
return boxesangles
|
|
||||||
|
|
||||||
|
|
||||||
def show_east(boxesangles, frame, **kwargs):
|
|
||||||
bboxes = boxesangles[0]
|
|
||||||
angles = boxesangles[1]
|
|
||||||
for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles):
|
|
||||||
width = abs(X0 - X1)
|
|
||||||
height = abs(Y0 - Y1)
|
|
||||||
cX = int(X0 + width * 0.5)
|
|
||||||
cY = int(Y0 + height * 0.5)
|
|
||||||
|
|
||||||
rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1))
|
|
||||||
points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1)
|
|
||||||
cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8)
|
|
||||||
|
|
||||||
return frame
|
|
||||||
|
|
||||||
|
|
||||||
def order_points(pts):
|
|
||||||
rect = np.zeros((4, 2), dtype="float32")
|
|
||||||
s = pts.sum(axis=1)
|
|
||||||
rect[0] = pts[np.argmin(s)]
|
|
||||||
rect[2] = pts[np.argmax(s)]
|
|
||||||
diff = np.diff(pts, axis=1)
|
|
||||||
rect[1] = pts[np.argmin(diff)]
|
|
||||||
rect[3] = pts[np.argmax(diff)]
|
|
||||||
return rect
|
|
||||||
|
|
||||||
|
|
||||||
def four_point_transform(image, pts):
|
|
||||||
rect = order_points(pts)
|
|
||||||
(tl, tr, br, bl) = rect
|
|
||||||
|
|
||||||
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
|
||||||
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
|
||||||
maxWidth = max(int(widthA), int(widthB))
|
|
||||||
|
|
||||||
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
|
||||||
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
|
||||||
maxHeight = max(int(heightA), int(heightB))
|
|
||||||
|
|
||||||
dst = np.array([
|
|
||||||
[0, 0],
|
|
||||||
[maxWidth - 1, 0],
|
|
||||||
[maxWidth - 1, maxHeight - 1],
|
|
||||||
[0, maxHeight - 1]], dtype="float32")
|
|
||||||
|
|
||||||
M = cv2.getPerspectiveTransform(rect, dst)
|
|
||||||
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
|
||||||
|
|
||||||
return warped
|
|
@ -14,7 +14,7 @@ _sym_db = _symbol_database.Default()
|
|||||||
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x65vents/events.proto\x12\x0erobocar.events\x1a\x1fgoogle/protobuf/timestamp.proto\"T\n\x08\x46rameRef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12.\n\ncreated_at\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"C\n\x0c\x46rameMessage\x12$\n\x02id\x18\x01 \x01(\x0b\x32\x18.robocar.events.FrameRef\x12\r\n\x05\x66rame\x18\x02 \x01(\x0c\"d\n\x0fSteeringMessage\x12\x10\n\x08steering\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"d\n\x0fThrottleMessage\x12\x10\n\x08throttle\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"A\n\x10\x44riveModeMessage\x12-\n\ndrive_mode\x18\x01 \x01(\x0e\x32\x19.robocar.events.DriveMode\"f\n\x0eObjectsMessage\x12\'\n\x07objects\x18\x01 \x03(\x0b\x32\x16.robocar.events.Object\x12+\n\tframe_ref\x18\x02 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x80\x01\n\x06Object\x12(\n\x04type\x18\x01 \x01(\x0e\x32\x1a.robocar.events.TypeObject\x12\x0c\n\x04left\x18\x02 \x01(\x05\x12\x0b\n\x03top\x18\x03 \x01(\x05\x12\r\n\x05right\x18\x04 \x01(\x05\x12\x0e\n\x06\x62ottom\x18\x05 \x01(\x05\x12\x12\n\nconfidence\x18\x06 \x01(\x02\"&\n\x13SwitchRecordMessage\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\"\x8c\x01\n\x0bRoadMessage\x12&\n\x07\x63ontour\x18\x01 \x03(\x0b\x32\x15.robocar.events.Point\x12(\n\x07\x65llipse\x18\x02 \x01(\x0b\x32\x17.robocar.events.Ellipse\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x1d\n\x05Point\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05\"r\n\x07\x45llipse\x12%\n\x06\x63\x65nter\x18\x01 \x01(\x0b\x32\x15.robocar.events.Point\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0e\n\x06height\x18\x03 \x01(\x05\x12\r\n\x05\x61ngle\x18\x04 \x01(\x02\x12\x12\n\nconfidence\x18\x05 \x01(\x02\"\x82\x01\n\rRecordMessage\x12+\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1c.robocar.events.FrameMessage\x12\x31\n\x08steering\x18\x02 \x01(\x0b\x32\x1f.robocar.events.SteeringMessage\x12\x11\n\trecordSet\x18\x03 \x01(\t*-\n\tDriveMode\x12\x0b\n\x07INVALID\x10\x00\x12\x08\n\x04USER\x10\x01\x12\t\n\x05PILOT\x10\x02*2\n\nTypeObject\x12\x07\n\x03\x41NY\x10\x00\x12\x07\n\x03\x43\x41R\x10\x01\x12\x08\n\x04\x42UMP\x10\x02\x12\x08\n\x04PLOT\x10\x03\x42\nZ\x08./eventsb\x06proto3')
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x65vents/events.proto\x12\x0erobocar.events\x1a\x1fgoogle/protobuf/timestamp.proto\"T\n\x08\x46rameRef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12.\n\ncreated_at\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"C\n\x0c\x46rameMessage\x12$\n\x02id\x18\x01 \x01(\x0b\x32\x18.robocar.events.FrameRef\x12\r\n\x05\x66rame\x18\x02 \x01(\x0c\"d\n\x0fSteeringMessage\x12\x10\n\x08steering\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"d\n\x0fThrottleMessage\x12\x10\n\x08throttle\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"A\n\x10\x44riveModeMessage\x12-\n\ndrive_mode\x18\x01 \x01(\x0e\x32\x19.robocar.events.DriveMode\"f\n\x0eObjectsMessage\x12\'\n\x07objects\x18\x01 \x03(\x0b\x32\x16.robocar.events.Object\x12+\n\tframe_ref\x18\x02 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x80\x01\n\x06Object\x12(\n\x04type\x18\x01 \x01(\x0e\x32\x1a.robocar.events.TypeObject\x12\x0c\n\x04left\x18\x02 \x01(\x02\x12\x0b\n\x03top\x18\x03 \x01(\x02\x12\r\n\x05right\x18\x04 \x01(\x02\x12\x0e\n\x06\x62ottom\x18\x05 \x01(\x02\x12\x12\n\nconfidence\x18\x06 \x01(\x02\"&\n\x13SwitchRecordMessage\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\"\x8c\x01\n\x0bRoadMessage\x12&\n\x07\x63ontour\x18\x01 \x03(\x0b\x32\x15.robocar.events.Point\x12(\n\x07\x65llipse\x18\x02 \x01(\x0b\x32\x17.robocar.events.Ellipse\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x1d\n\x05Point\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05\"r\n\x07\x45llipse\x12%\n\x06\x63\x65nter\x18\x01 \x01(\x0b\x32\x15.robocar.events.Point\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0e\n\x06height\x18\x03 \x01(\x05\x12\r\n\x05\x61ngle\x18\x04 \x01(\x02\x12\x12\n\nconfidence\x18\x05 \x01(\x02\"\x82\x01\n\rRecordMessage\x12+\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1c.robocar.events.FrameMessage\x12\x31\n\x08steering\x18\x02 \x01(\x0b\x32\x1f.robocar.events.SteeringMessage\x12\x11\n\trecordSet\x18\x03 \x01(\t*-\n\tDriveMode\x12\x0b\n\x07INVALID\x10\x00\x12\x08\n\x04USER\x10\x01\x12\t\n\x05PILOT\x10\x02*2\n\nTypeObject\x12\x07\n\x03\x41NY\x10\x00\x12\x07\n\x03\x43\x41R\x10\x01\x12\x08\n\x04\x42UMP\x10\x02\x12\x08\n\x04PLOT\x10\x03\x42\nZ\x08./eventsb\x06proto3')
|
||||||
|
|
||||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
||||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'events.events_pb2', globals())
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'events.events_pb2', globals())
|
||||||
|
274
main.py
274
main.py
@ -1,274 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
import depthai as dai
|
|
||||||
import east
|
|
||||||
import blobconverter
|
|
||||||
|
|
||||||
class HostSeqSync:
|
|
||||||
def __init__(self):
|
|
||||||
self.imfFrames = []
|
|
||||||
def add_msg(self, msg):
|
|
||||||
self.imfFrames.append(msg)
|
|
||||||
def get_msg(self, target_seq):
|
|
||||||
for i, imgFrame in enumerate(self.imfFrames):
|
|
||||||
if target_seq == imgFrame.getSequenceNum():
|
|
||||||
self.imfFrames = self.imfFrames[i:]
|
|
||||||
break
|
|
||||||
return self.imfFrames[0]
|
|
||||||
|
|
||||||
pipeline = dai.Pipeline()
|
|
||||||
version = "2021.2"
|
|
||||||
pipeline.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_2)
|
|
||||||
|
|
||||||
colorCam = pipeline.create(dai.node.ColorCamera)
|
|
||||||
colorCam.setPreviewSize(256, 256)
|
|
||||||
colorCam.setVideoSize(1024, 1024) # 4 times larger in both axis
|
|
||||||
colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
|
|
||||||
colorCam.setInterleaved(False)
|
|
||||||
colorCam.setBoardSocket(dai.CameraBoardSocket.RGB)
|
|
||||||
colorCam.setFps(10)
|
|
||||||
|
|
||||||
controlIn = pipeline.create(dai.node.XLinkIn)
|
|
||||||
controlIn.setStreamName('control')
|
|
||||||
controlIn.out.link(colorCam.inputControl)
|
|
||||||
|
|
||||||
cam_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
cam_xout.setStreamName('video')
|
|
||||||
colorCam.video.link(cam_xout.input)
|
|
||||||
|
|
||||||
# ---------------------------------------
|
|
||||||
# 1st stage NN - text-detection
|
|
||||||
# ---------------------------------------
|
|
||||||
|
|
||||||
nn = pipeline.create(dai.node.NeuralNetwork)
|
|
||||||
nn.setBlobPath(blobconverter.from_zoo(name="east_text_detection_256x256",zoo_type="depthai",shaves=6, version=version))
|
|
||||||
colorCam.preview.link(nn.input)
|
|
||||||
|
|
||||||
nn_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
nn_xout.setStreamName('detections')
|
|
||||||
nn.out.link(nn_xout.input)
|
|
||||||
|
|
||||||
# ---------------------------------------
|
|
||||||
# 2nd stage NN - text-recognition-0012
|
|
||||||
# ---------------------------------------
|
|
||||||
|
|
||||||
manip = pipeline.create(dai.node.ImageManip)
|
|
||||||
manip.setWaitForConfigInput(True)
|
|
||||||
|
|
||||||
manip_img = pipeline.create(dai.node.XLinkIn)
|
|
||||||
manip_img.setStreamName('manip_img')
|
|
||||||
manip_img.out.link(manip.inputImage)
|
|
||||||
|
|
||||||
manip_cfg = pipeline.create(dai.node.XLinkIn)
|
|
||||||
manip_cfg.setStreamName('manip_cfg')
|
|
||||||
manip_cfg.out.link(manip.inputConfig)
|
|
||||||
|
|
||||||
manip_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
manip_xout.setStreamName('manip_out')
|
|
||||||
|
|
||||||
nn2 = pipeline.create(dai.node.NeuralNetwork)
|
|
||||||
nn2.setBlobPath(blobconverter.from_zoo(name="text-recognition-0012", shaves=6, version=version))
|
|
||||||
nn2.setNumInferenceThreads(2)
|
|
||||||
manip.out.link(nn2.input)
|
|
||||||
manip.out.link(manip_xout.input)
|
|
||||||
|
|
||||||
nn2_xout = pipeline.create(dai.node.XLinkOut)
|
|
||||||
nn2_xout.setStreamName("recognitions")
|
|
||||||
nn2.out.link(nn2_xout.input)
|
|
||||||
|
|
||||||
def to_tensor_result(packet):
|
|
||||||
return {
|
|
||||||
name: np.array(packet.getLayerFp16(name))
|
|
||||||
for name in [tensor.name for tensor in packet.getRaw().tensors]
|
|
||||||
}
|
|
||||||
|
|
||||||
def to_planar(frame):
|
|
||||||
return frame.transpose(2, 0, 1).flatten()
|
|
||||||
|
|
||||||
with dai.Device(pipeline) as device:
|
|
||||||
q_vid = device.getOutputQueue("video", 4, blocking=False)
|
|
||||||
# This should be set to block, but would get to some extreme queuing/latency!
|
|
||||||
q_det = device.getOutputQueue("detections", 4, blocking=False)
|
|
||||||
|
|
||||||
q_rec = device.getOutputQueue("recognitions", 4, blocking=True)
|
|
||||||
|
|
||||||
q_manip_img = device.getInputQueue("manip_img")
|
|
||||||
q_manip_cfg = device.getInputQueue("manip_cfg")
|
|
||||||
q_manip_out = device.getOutputQueue("manip_out", 4, blocking=False)
|
|
||||||
|
|
||||||
controlQueue = device.getInputQueue('control')
|
|
||||||
|
|
||||||
frame = None
|
|
||||||
cropped_stacked = None
|
|
||||||
rotated_rectangles = []
|
|
||||||
rec_pushed = 0
|
|
||||||
rec_received = 0
|
|
||||||
host_sync = HostSeqSync()
|
|
||||||
|
|
||||||
class CTCCodec(object):
|
|
||||||
""" Convert between text-label and text-index """
|
|
||||||
def __init__(self, characters):
|
|
||||||
# characters (str): set of the possible characters.
|
|
||||||
dict_character = list(characters)
|
|
||||||
|
|
||||||
self.dict = {}
|
|
||||||
for i, char in enumerate(dict_character):
|
|
||||||
self.dict[char] = i + 1
|
|
||||||
|
|
||||||
self.characters = dict_character
|
|
||||||
#print(self.characters)
|
|
||||||
#input()
|
|
||||||
def decode(self, preds):
|
|
||||||
""" convert text-index into text-label. """
|
|
||||||
texts = []
|
|
||||||
index = 0
|
|
||||||
# Select max probabilty (greedy decoding) then decode index to character
|
|
||||||
preds = preds.astype(np.float16)
|
|
||||||
preds_index = np.argmax(preds, 2)
|
|
||||||
preds_index = preds_index.transpose(1, 0)
|
|
||||||
preds_index_reshape = preds_index.reshape(-1)
|
|
||||||
preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0])
|
|
||||||
|
|
||||||
for l in preds_sizes:
|
|
||||||
t = preds_index_reshape[index:index + l]
|
|
||||||
|
|
||||||
# NOTE: t might be zero size
|
|
||||||
if t.shape[0] == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
char_list = []
|
|
||||||
for i in range(l):
|
|
||||||
# removing repeated characters and blank.
|
|
||||||
if not (i > 0 and t[i - 1] == t[i]):
|
|
||||||
if self.characters[t[i]] != '#':
|
|
||||||
char_list.append(self.characters[t[i]])
|
|
||||||
text = ''.join(char_list)
|
|
||||||
texts.append(text)
|
|
||||||
|
|
||||||
index += l
|
|
||||||
|
|
||||||
return texts
|
|
||||||
|
|
||||||
characters = '0123456789abcdefghijklmnopqrstuvwxyz#'
|
|
||||||
codec = CTCCodec(characters)
|
|
||||||
|
|
||||||
ctrl = dai.CameraControl()
|
|
||||||
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.CONTINUOUS_VIDEO)
|
|
||||||
ctrl.setAutoFocusTrigger()
|
|
||||||
controlQueue.send(ctrl)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
vid_in = q_vid.tryGet()
|
|
||||||
if vid_in is not None:
|
|
||||||
host_sync.add_msg(vid_in)
|
|
||||||
|
|
||||||
# Multiple recognition results may be available, read until queue is empty
|
|
||||||
while True:
|
|
||||||
in_rec = q_rec.tryGet()
|
|
||||||
if in_rec is None:
|
|
||||||
break
|
|
||||||
rec_data = bboxes = np.array(in_rec.getFirstLayerFp16()).reshape(30,1,37)
|
|
||||||
decoded_text = codec.decode(rec_data)[0]
|
|
||||||
pos = rotated_rectangles[rec_received]
|
|
||||||
print("{:2}: {:20}".format(rec_received, decoded_text),
|
|
||||||
"center({:3},{:3}) size({:3},{:3}) angle{:5.1f} deg".format(
|
|
||||||
int(pos[0][0]), int(pos[0][1]), pos[1][0], pos[1][1], pos[2]))
|
|
||||||
# Draw the text on the right side of 'cropped_stacked' - placeholder
|
|
||||||
if cropped_stacked is not None:
|
|
||||||
cv2.putText(cropped_stacked, decoded_text,
|
|
||||||
(120 + 10 , 32 * rec_received + 24),
|
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
|
|
||||||
cv2.imshow('cropped_stacked', cropped_stacked)
|
|
||||||
rec_received += 1
|
|
||||||
|
|
||||||
if cv2.waitKey(1) == ord('q'):
|
|
||||||
break
|
|
||||||
|
|
||||||
if rec_received >= rec_pushed:
|
|
||||||
in_det = q_det.tryGet()
|
|
||||||
if in_det is not None:
|
|
||||||
frame = host_sync.get_msg(in_det.getSequenceNum()).getCvFrame().copy()
|
|
||||||
|
|
||||||
scores, geom1, geom2 = to_tensor_result(in_det).values()
|
|
||||||
scores = np.reshape(scores, (1, 1, 64, 64))
|
|
||||||
geom1 = np.reshape(geom1, (1, 4, 64, 64))
|
|
||||||
geom2 = np.reshape(geom2, (1, 1, 64, 64))
|
|
||||||
|
|
||||||
bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2)
|
|
||||||
boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles))
|
|
||||||
rotated_rectangles = [
|
|
||||||
east.get_cv_rotated_rect(bbox, angle * -1)
|
|
||||||
for (bbox, angle) in zip(boxes, angles)
|
|
||||||
]
|
|
||||||
|
|
||||||
rec_received = 0
|
|
||||||
rec_pushed = len(rotated_rectangles)
|
|
||||||
if rec_pushed:
|
|
||||||
print("====== Pushing for recognition, count:", rec_pushed)
|
|
||||||
cropped_stacked = None
|
|
||||||
for idx, rotated_rect in enumerate(rotated_rectangles):
|
|
||||||
# Detections are done on 256x256 frames, we are sending back 1024x1024
|
|
||||||
# That's why we multiply center and size values by 4
|
|
||||||
rotated_rect[0][0] = rotated_rect[0][0] * 4
|
|
||||||
rotated_rect[0][1] = rotated_rect[0][1] * 4
|
|
||||||
rotated_rect[1][0] = rotated_rect[1][0] * 4
|
|
||||||
rotated_rect[1][1] = rotated_rect[1][1] * 4
|
|
||||||
|
|
||||||
# Draw detection crop area on input frame
|
|
||||||
points = np.int0(cv2.boxPoints(rotated_rect))
|
|
||||||
print(rotated_rect)
|
|
||||||
cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8)
|
|
||||||
|
|
||||||
# TODO make it work taking args like in OpenCV:
|
|
||||||
# rr = ((256, 256), (128, 64), 30)
|
|
||||||
rr = dai.RotatedRect()
|
|
||||||
rr.center.x = rotated_rect[0][0]
|
|
||||||
rr.center.y = rotated_rect[0][1]
|
|
||||||
rr.size.width = rotated_rect[1][0]
|
|
||||||
rr.size.height = rotated_rect[1][1]
|
|
||||||
rr.angle = rotated_rect[2]
|
|
||||||
cfg = dai.ImageManipConfig()
|
|
||||||
cfg.setCropRotatedRect(rr, False)
|
|
||||||
cfg.setResize(120, 32)
|
|
||||||
# Send frame and config to device
|
|
||||||
if idx == 0:
|
|
||||||
w,h,c = frame.shape
|
|
||||||
imgFrame = dai.ImgFrame()
|
|
||||||
imgFrame.setData(to_planar(frame))
|
|
||||||
imgFrame.setType(dai.ImgFrame.Type.BGR888p)
|
|
||||||
imgFrame.setWidth(w)
|
|
||||||
imgFrame.setHeight(h)
|
|
||||||
q_manip_img.send(imgFrame)
|
|
||||||
else:
|
|
||||||
cfg.setReusePreviousImage(True)
|
|
||||||
q_manip_cfg.send(cfg)
|
|
||||||
|
|
||||||
# Get manipulated image from the device
|
|
||||||
transformed = q_manip_out.get().getCvFrame()
|
|
||||||
|
|
||||||
rec_placeholder_img = np.zeros((32, 200, 3), np.uint8)
|
|
||||||
transformed = np.hstack((transformed, rec_placeholder_img))
|
|
||||||
if cropped_stacked is None:
|
|
||||||
cropped_stacked = transformed
|
|
||||||
else:
|
|
||||||
cropped_stacked = np.vstack((cropped_stacked, transformed))
|
|
||||||
|
|
||||||
if cropped_stacked is not None:
|
|
||||||
cv2.imshow('cropped_stacked', cropped_stacked)
|
|
||||||
|
|
||||||
if frame is not None:
|
|
||||||
cv2.imshow('frame', frame)
|
|
||||||
|
|
||||||
key = cv2.waitKey(1)
|
|
||||||
if key == ord('q'):
|
|
||||||
break
|
|
||||||
elif key == ord('t'):
|
|
||||||
print("Autofocus trigger (and disable continuous)")
|
|
||||||
ctrl = dai.CameraControl()
|
|
||||||
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.AUTO)
|
|
||||||
ctrl.setAutoFocusTrigger()
|
|
||||||
controlQueue.send(ctrl)
|
|
@ -1,9 +1,8 @@
|
|||||||
paho-mqtt~=1.6.1
|
paho-mqtt~=1.6.1
|
||||||
docopt~=0.6.2
|
docopt~=0.6.2
|
||||||
depthai==2.14.1.0
|
depthai==2.17.2.0
|
||||||
opencv-python~=4.5.5.62
|
opencv-python==4.6.0.66
|
||||||
google~=3.0.0
|
google~=3.0.0
|
||||||
google-api-core~=2.4.0
|
google-api-core~=2.4.0
|
||||||
setuptools==60.5.0
|
setuptools==60.5.0
|
||||||
protobuf3
|
blobconverter==1.3.0
|
||||||
blobconverter>=1.2.9
|
|
Reference in New Issue
Block a user