Compare commits
	
		
			4 Commits
		
	
	
		
			feat/text_
			...
			3766531936
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3766531936 | |||
| fe63597ba4 | |||
| 52c3808d83 | |||
| 33c16699ae | 
							
								
								
									
										12
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,3 +1,12 @@ | ||||
| FROM docker.io/library/python:3.9-slim AS model | ||||
|  | ||||
| RUN python3 -m pip install blobconverter | ||||
|  | ||||
| RUN mkdir -p /models | ||||
|  | ||||
| RUN blobconverter --zoo-name mobile_object_localizer_192x192 --zoo-type depthai --shaves 6 --version 2021.4 --output-dir /models || echo "" | ||||
| RUN ls /models | ||||
| ####### | ||||
| FROM docker.io/library/python:3.9-slim | ||||
|  | ||||
| # Configure piwheels repo to use pre-compiled numpy wheels for arm | ||||
| @@ -7,6 +16,9 @@ RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 | ||||
|  | ||||
| RUN pip3 install numpy | ||||
|  | ||||
| RUN mkdir /models | ||||
|  | ||||
| COPY --from=model /models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob /models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob | ||||
| ADD requirements.txt requirements.txt | ||||
|  | ||||
| RUN pip3 install -r requirements.txt | ||||
|   | ||||
| @@ -1,19 +1,25 @@ | ||||
| """ | ||||
| Publish data from oak-lite device | ||||
|  | ||||
| Usage: rc-oak-camera [-u USERNAME | --mqtt-username=USERNAME] [--mqtt-password=PASSWORD] [--mqtt-broker=HOSTNAME] \ | ||||
|     [--mqtt-topic-robocar-oak-camera="TOPIC_CAMERA"] [--mqtt-client-id=CLIENT_ID] \ | ||||
|     [-H IMG_HEIGHT | --image-height=IMG_HEIGHT] [-W IMG_WIDTH | --image-width=IMG_width] | ||||
| Usage: rc-oak-camera [-u USERNAME | --mqtt-username=USERNAME] [--mqtt-password=PASSWORD] \ | ||||
|     [--mqtt-broker-host=HOSTNAME] [--mqtt-broker-port=PORT] \ | ||||
|     [--mqtt-topic-robocar-oak-camera="TOPIC_CAMERA"] [--mqtt-topic-robocar-objects="TOPIC_OBJECTS"] \ | ||||
|     [--mqtt-client-id=CLIENT_ID] \ | ||||
|     [-H IMG_HEIGHT | --image-height=IMG_HEIGHT] [-W IMG_WIDTH | --image-width=IMG_width] \ | ||||
|     [-t OBJECTS_THRESHOLD | --objects-threshold=OBJECTS_THRESHOLD] | ||||
|  | ||||
| Options: | ||||
| -h --help                                               Show this screen. | ||||
| -u USERID --mqtt-username=USERNAME                      MQTT user | ||||
| -p PASSWORD --mqtt-password=PASSWORD                    MQTT password | ||||
| -b HOSTNAME --mqtt-broker=HOSTNAME                      MQTT broker host | ||||
| -b HOSTNAME --mqtt-broker-host=HOSTNAME                 MQTT broker host | ||||
| -P HOSTNAME --mqtt-broker-port=PORT                     MQTT broker port | ||||
| -C CLIENT_ID --mqtt-client-id=CLIENT_ID                 MQTT client id | ||||
| -c TOPIC_CAMERA --mqtt-topic-robocar-oak-camera=TOPIC_CAMERA        MQTT topic where to publish robocar-oak-camera frames | ||||
| -o TOPIC_OBJECTS --mqtt-topic-robocar-objects=TOPIC_OBJECTS         MQTT topic where to publish objects detection results | ||||
| -H IMG_HEIGHT --image-height=IMG_HEIGHT                 IMG_HEIGHT image height | ||||
| -W IMG_WIDTH --image-width=IMG_width                    IMG_WIDTH image width | ||||
| -t OBJECTS_THRESHOLD --objects-threshold=OBJECTS_THRESHOLD    OBJECTS_THRESHOLD threshold to filter objects detected | ||||
| """ | ||||
| import logging | ||||
| import os | ||||
| @@ -27,13 +33,13 @@ logging.basicConfig(level=logging.INFO) | ||||
| default_client_id = "robocar-depthai" | ||||
|  | ||||
|  | ||||
| def init_mqtt_client(broker_host: str, user: str, password: str, client_id: str) -> mqtt.Client: | ||||
| def init_mqtt_client(broker_host: str, broker_port, user: str, password: str, client_id: str) -> mqtt.Client: | ||||
|     logger.info("Start part.py-robocar-oak-camera") | ||||
|     client = mqtt.Client(client_id=client_id, clean_session=True, userdata=None, protocol=mqtt.MQTTv311) | ||||
|  | ||||
|     client.username_pw_set(user, password) | ||||
|     logger.info("Connect to mqtt broker "+ broker_host) | ||||
|     client.connect(host=broker_host, port=1883, keepalive=60) | ||||
|     client.connect(host=broker_host, port=broker_port, keepalive=60) | ||||
|     logger.info("Connected to mqtt broker") | ||||
|     return client | ||||
|  | ||||
| @@ -43,16 +49,21 @@ def execute_from_command_line(): | ||||
|  | ||||
|     args = docopt(__doc__) | ||||
|  | ||||
|     client = init_mqtt_client(broker_host=get_default_value(args["--mqtt-broker"], "MQTT_BROKER", "localhost"), | ||||
|     client = init_mqtt_client(broker_host=get_default_value(args["--mqtt-broker-host"], "MQTT_BROKER_HOST", "localhost"), | ||||
|                               broker_port=int(get_default_value(args["--mqtt-broker-port"], "MQTT_BROKER_PORT", "1883")), | ||||
|                               user=get_default_value(args["--mqtt-username"], "MQTT_USERNAME", ""), | ||||
|                               password=get_default_value(args["--mqtt-password"], "MQTT_PASSWORD", ""), | ||||
|                               client_id=get_default_value(args["--mqtt-client-id"], "MQTT_CLIENT_ID", | ||||
|                                                           default_client_id), | ||||
|                               ) | ||||
|     frame_topic = get_default_value(args["--mqtt-topic-robocar-oak-camera"], "MQTT_TOPIC_CAMERA", "/oak/camera_rgb") | ||||
|     objects_topic = get_default_value(args["--mqtt-topic-robocar-objects"], "MQTT_TOPIC_OBJECTS", "/objects") | ||||
|  | ||||
|     frame_processor = cam.FramePublisher(mqtt_client=client, | ||||
|                                          frame_topic=frame_topic, | ||||
|                                          objects_topic=objects_topic, | ||||
|                                          objects_threshold=float(get_default_value(args["--objects-threshold"], | ||||
|                                                                                    "OBJECTS_THRESHOLD", 0.2)), | ||||
|                                          img_width=int(get_default_value(args["--image-width"], "IMAGE_WIDTH", 160)), | ||||
|                                          img_height=int(get_default_value(args["--image-height"], "IMAGE_HEIGHT", 120))) | ||||
|     frame_processor.run() | ||||
|   | ||||
| @@ -6,25 +6,22 @@ import events.events_pb2 | ||||
|  | ||||
| import depthai as dai | ||||
| import cv2 | ||||
| import numpy as np | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def to_tensor_result(packet): | ||||
|     return { | ||||
|         name: np.array(packet.getLayerFp16(name)) | ||||
|         for name in [tensor.name for tensor in packet.getRaw().tensors] | ||||
|     } | ||||
|  | ||||
|  | ||||
| def to_planar(frame): | ||||
|     return frame.transpose(2, 0, 1).flatten() | ||||
| NN_PATH = "/models/mobile_object_localizer_192x192_openvino_2021.4_6shave.blob" | ||||
| NN_WIDTH = 192 | ||||
| NN_HEIGHT = 192 | ||||
|  | ||||
|  | ||||
| class FramePublisher: | ||||
|     def __init__(self, mqtt_client: mqtt.Client, frame_topic: str, img_width: int, img_height: int): | ||||
|     def __init__(self, mqtt_client: mqtt.Client, frame_topic: str, objects_topic: str, objects_threshold: float, | ||||
|                  img_width: int, img_height: int): | ||||
|         self._mqtt_client = mqtt_client | ||||
|         self._frame_topic = frame_topic | ||||
|         self._objects_topic = objects_topic | ||||
|         self._objects_threshold = objects_threshold | ||||
|         self._img_width = img_width | ||||
|         self._img_height = img_height | ||||
|         self._pipeline = self._configure_pipeline() | ||||
| @@ -33,77 +30,30 @@ class FramePublisher: | ||||
|         logger.info("configure pipeline") | ||||
|         pipeline = dai.Pipeline() | ||||
|  | ||||
|         version = "2021.2" | ||||
|         pipeline.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_2) | ||||
|         pipeline.setOpenVINOVersion(version=dai.OpenVINO.VERSION_2021_4) | ||||
|  | ||||
|         # colorCam = pipeline.create(dai.node.ColorCamera) | ||||
|         # colorCam.setPreviewSize(256, 256) | ||||
|         # colorCam.setVideoSize(1024, 1024)  # 4 times larger in both axis | ||||
|         # colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) | ||||
|         # colorCam.setInterleaved(False) | ||||
|         # colorCam.setBoardSocket(dai.CameraBoardSocket.RGB) | ||||
|         # colorCam.setFps(10) | ||||
|         # | ||||
|         # controlIn = pipeline.create(dai.node.XLinkIn) | ||||
|         # controlIn.setStreamName('control') | ||||
|         # controlIn.out.link(colorCam.inputControl) | ||||
|         # | ||||
|         # cam_xout = pipeline.create(dai.node.XLinkOut) | ||||
|         # cam_xout.setStreamName('video') | ||||
|         # colorCam.video.link(cam_xout.input) | ||||
|         # Define a neural network that will make predictions based on the source frames | ||||
|         detection_nn = pipeline.create(dai.node.NeuralNetwork) | ||||
|         detection_nn.setBlobPath(NN_PATH) | ||||
|         detection_nn.setNumPoolFrames(4) | ||||
|         detection_nn.input.setBlocking(False) | ||||
|         detection_nn.setNumInferenceThreads(2) | ||||
|  | ||||
|         # --------------------------------------- | ||||
|         # 1st stage NN - text-detection | ||||
|         # --------------------------------------- | ||||
|  | ||||
|         nn = pipeline.create(dai.node.NeuralNetwork) | ||||
|         nn.setBlobPath( | ||||
|             blobconverter.from_zoo(name="east_text_detection_256x256", zoo_type="depthai", shaves=6, version=version)) | ||||
|         colorCam.preview.link(nn.input) | ||||
|  | ||||
|         nn_xout = pipeline.create(dai.node.XLinkOut) | ||||
|         nn_xout.setStreamName('detections') | ||||
|         nn.out.link(nn_xout.input) | ||||
|  | ||||
|         # --------------------------------------- | ||||
|         # 2nd stage NN - text-recognition-0012 | ||||
|         # --------------------------------------- | ||||
|         xout_nn = pipeline.create(dai.node.XLinkOut) | ||||
|         xout_nn.setStreamName("nn") | ||||
|         xout_nn.input.setBlocking(False) | ||||
|  | ||||
|         # Resize image | ||||
|         manip = pipeline.create(dai.node.ImageManip) | ||||
|         manip.setWaitForConfigInput(True) | ||||
|  | ||||
|         manip_img = pipeline.create(dai.node.XLinkIn) | ||||
|         manip_img.setStreamName('manip_img') | ||||
|         manip_img.out.link(manip.inputImage) | ||||
|  | ||||
|         manip_cfg = pipeline.create(dai.node.XLinkIn) | ||||
|         manip_cfg.setStreamName('manip_cfg') | ||||
|         manip_cfg.out.link(manip.inputConfig) | ||||
|  | ||||
|         manip_xout = pipeline.create(dai.node.XLinkOut) | ||||
|         manip_xout.setStreamName('manip_out') | ||||
|  | ||||
|         nn2 = pipeline.create(dai.node.NeuralNetwork) | ||||
|         nn2.setBlobPath(blobconverter.from_zoo(name="text-recognition-0012", shaves=6, version=version)) | ||||
|         nn2.setNumInferenceThreads(2) | ||||
|         manip.out.link(nn2.input) | ||||
|         manip.out.link(manip_xout.input) | ||||
|  | ||||
|         nn2_xout = pipeline.create(dai.node.XLinkOut) | ||||
|         nn2_xout.setStreamName("recognitions") | ||||
|         nn2.out.link(nn2_xout.input) | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|         manip.initialConfig.setResize(NN_WIDTH, NN_HEIGHT) | ||||
|         manip.initialConfig.setFrameType(dai.ImgFrame.Type.RGB888p) | ||||
|         manip.initialConfig.setKeepAspectRatio(False) | ||||
|  | ||||
|         cam_rgb = pipeline.create(dai.node.ColorCamera) | ||||
|         xout_rgb = pipeline.create(dai.node.XLinkOut) | ||||
|  | ||||
|         xout_rgb.setStreamName("rgb") | ||||
|  | ||||
|  | ||||
|         # Properties | ||||
|         cam_rgb.setBoardSocket(dai.CameraBoardSocket.RGB) | ||||
|         cam_rgb.setPreviewSize(width=self._img_width, height=self._img_height) | ||||
| @@ -111,156 +61,18 @@ class FramePublisher: | ||||
|         cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) | ||||
|         cam_rgb.setFps(30) | ||||
|  | ||||
|         # Linking | ||||
|         # Link preview to manip and manip to nn | ||||
|         cam_rgb.preview.link(manip.inputImage) | ||||
|         manip.out.link(detection_nn.input) | ||||
|  | ||||
|         # Linking to output | ||||
|         cam_rgb.preview.link(xout_rgb.input) | ||||
|         detection_nn.out.link(xout_nn.input) | ||||
|  | ||||
|         logger.info("pipeline configured") | ||||
|         return pipeline | ||||
|  | ||||
|     def run(self): | ||||
|  | ||||
|         with dai.Device(self._pipeline) as device: | ||||
|             q_vid = device.getOutputQueue("video", 4, blocking=False) | ||||
|             # This should be set to block, but would get to some extreme queuing/latency! | ||||
|             q_det = device.getOutputQueue("detections", 4, blocking=False) | ||||
|  | ||||
|             q_rec = device.getOutputQueue("recognitions", 4, blocking=True) | ||||
|  | ||||
|             q_manip_img = device.getInputQueue("manip_img") | ||||
|             q_manip_cfg = device.getInputQueue("manip_cfg") | ||||
|             q_manip_out = device.getOutputQueue("manip_out", 4, blocking=False) | ||||
|  | ||||
|             controlQueue = device.getInputQueue('control') | ||||
|  | ||||
|             frame = None | ||||
|             cropped_stacked = None | ||||
|             rotated_rectangles = [] | ||||
|             rec_pushed = 0 | ||||
|             rec_received = 0 | ||||
|             host_sync = HostSeqSync() | ||||
|  | ||||
|             characters = '0123456789abcdefghijklmnopqrstuvwxyz#' | ||||
|             codec = CTCCodec(characters) | ||||
|  | ||||
|             ctrl = dai.CameraControl() | ||||
|             ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.CONTINUOUS_VIDEO) | ||||
|             ctrl.setAutoFocusTrigger() | ||||
|             controlQueue.send(ctrl) | ||||
|  | ||||
|             while True: | ||||
|                 vid_in = q_vid.tryGet() | ||||
|                 if vid_in is not None: | ||||
|                     host_sync.add_msg(vid_in) | ||||
|  | ||||
|                 # Multiple recognition results may be available, read until queue is empty | ||||
|                 while True: | ||||
|                     in_rec = q_rec.tryGet() | ||||
|                     if in_rec is None: | ||||
|                         break | ||||
|                     rec_data = bboxes = np.array(in_rec.getFirstLayerFp16()).reshape(30, 1, 37) | ||||
|                     decoded_text = codec.decode(rec_data)[0] | ||||
|                     pos = rotated_rectangles[rec_received] | ||||
|                     print("{:2}: {:20}".format(rec_received, decoded_text), | ||||
|                           "center({:3},{:3}) size({:3},{:3}) angle{:5.1f} deg".format( | ||||
|                               int(pos[0][0]), int(pos[0][1]), pos[1][0], pos[1][1], pos[2])) | ||||
|                     # Draw the text on the right side of 'cropped_stacked' - placeholder | ||||
|                     if cropped_stacked is not None: | ||||
|                         cv2.putText(cropped_stacked, decoded_text, | ||||
|                                     (120 + 10, 32 * rec_received + 24), | ||||
|                                     cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) | ||||
|                         cv2.imshow('cropped_stacked', cropped_stacked) | ||||
|                     rec_received += 1 | ||||
|  | ||||
|                 if cv2.waitKey(1) == ord('q'): | ||||
|                     break | ||||
|  | ||||
|                 if rec_received >= rec_pushed: | ||||
|                     in_det = q_det.tryGet() | ||||
|                     if in_det is not None: | ||||
|                         frame = host_sync.get_msg(in_det.getSequenceNum()).getCvFrame().copy() | ||||
|  | ||||
|                         scores, geom1, geom2 = to_tensor_result(in_det).values() | ||||
|                         scores = np.reshape(scores, (1, 1, 64, 64)) | ||||
|                         geom1 = np.reshape(geom1, (1, 4, 64, 64)) | ||||
|                         geom2 = np.reshape(geom2, (1, 1, 64, 64)) | ||||
|  | ||||
|                         bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2) | ||||
|                         boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) | ||||
|                         rotated_rectangles = [ | ||||
|                             east.get_cv_rotated_rect(bbox, angle * -1) | ||||
|                             for (bbox, angle) in zip(boxes, angles) | ||||
|                         ] | ||||
|  | ||||
|                         rec_received = 0 | ||||
|                         rec_pushed = len(rotated_rectangles) | ||||
|                         if rec_pushed: | ||||
|                             print("====== Pushing for recognition, count:", rec_pushed) | ||||
|                         cropped_stacked = None | ||||
|                         for idx, rotated_rect in enumerate(rotated_rectangles): | ||||
|                             # Detections are done on 256x256 frames, we are sending back 1024x1024 | ||||
|                             # That's why we multiply center and size values by 4 | ||||
|                             rotated_rect[0][0] = rotated_rect[0][0] * 4 | ||||
|                             rotated_rect[0][1] = rotated_rect[0][1] * 4 | ||||
|                             rotated_rect[1][0] = rotated_rect[1][0] * 4 | ||||
|                             rotated_rect[1][1] = rotated_rect[1][1] * 4 | ||||
|  | ||||
|                             # Draw detection crop area on input frame | ||||
|                             points = np.int0(cv2.boxPoints(rotated_rect)) | ||||
|                             print(rotated_rect) | ||||
|                             cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, | ||||
|                                           lineType=cv2.LINE_8) | ||||
|  | ||||
|                             # TODO make it work taking args like in OpenCV: | ||||
|                             # rr = ((256, 256), (128, 64), 30) | ||||
|                             rr = dai.RotatedRect() | ||||
|                             rr.center.x = rotated_rect[0][0] | ||||
|                             rr.center.y = rotated_rect[0][1] | ||||
|                             rr.size.width = rotated_rect[1][0] | ||||
|                             rr.size.height = rotated_rect[1][1] | ||||
|                             rr.angle = rotated_rect[2] | ||||
|                             cfg = dai.ImageManipConfig() | ||||
|                             cfg.setCropRotatedRect(rr, False) | ||||
|                             cfg.setResize(120, 32) | ||||
|                             # Send frame and config to device | ||||
|                             if idx == 0: | ||||
|                                 w, h, c = frame.shape | ||||
|                                 imgFrame = dai.ImgFrame() | ||||
|                                 imgFrame.setData(to_planar(frame)) | ||||
|                                 imgFrame.setType(dai.ImgFrame.Type.BGR888p) | ||||
|                                 imgFrame.setWidth(w) | ||||
|                                 imgFrame.setHeight(h) | ||||
|                                 q_manip_img.send(imgFrame) | ||||
|                             else: | ||||
|                                 cfg.setReusePreviousImage(True) | ||||
|                             q_manip_cfg.send(cfg) | ||||
|  | ||||
|                             # Get manipulated image from the device | ||||
|                             transformed = q_manip_out.get().getCvFrame() | ||||
|  | ||||
|                             rec_placeholder_img = np.zeros((32, 200, 3), np.uint8) | ||||
|                             transformed = np.hstack((transformed, rec_placeholder_img)) | ||||
|                             if cropped_stacked is None: | ||||
|                                 cropped_stacked = transformed | ||||
|                             else: | ||||
|                                 cropped_stacked = np.vstack((cropped_stacked, transformed)) | ||||
|  | ||||
|                 if cropped_stacked is not None: | ||||
|                     cv2.imshow('cropped_stacked', cropped_stacked) | ||||
|  | ||||
|                 if frame is not None: | ||||
|                     cv2.imshow('frame', frame) | ||||
|  | ||||
|                 key = cv2.waitKey(1) | ||||
|                 if key == ord('q'): | ||||
|                     break | ||||
|                 elif key == ord('t'): | ||||
|                     print("Autofocus trigger (and disable continuous)") | ||||
|                     ctrl = dai.CameraControl() | ||||
|                     ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.AUTO) | ||||
|                     ctrl.setAutoFocusTrigger() | ||||
|                     controlQueue.send(ctrl) | ||||
|  | ||||
|  | ||||
|  | ||||
|         # Connect to device and start pipeline | ||||
|         with dai.Device(self._pipeline) as device: | ||||
|             logger.info('MxId: %s', device.getDeviceInfo().getMxId()) | ||||
| @@ -272,7 +84,8 @@ class FramePublisher: | ||||
|             device.startPipeline() | ||||
|             # Queues | ||||
|             queue_size = 4 | ||||
|             q_rgb = device.getOutputQueue("rgb", maxSize=queue_size, blocking=False) | ||||
|             q_rgb = device.getOutputQueue(name="rgb", maxSize=queue_size, blocking=False) | ||||
|             q_nn = device.getOutputQueue(name="nn", maxSize=queue_size, blocking=False) | ||||
|  | ||||
|             while True: | ||||
|                 try: | ||||
| @@ -297,5 +110,42 @@ class FramePublisher: | ||||
|                                               qos=0, | ||||
|                                               retain=False) | ||||
|  | ||||
|                     in_nn = q_nn.get() | ||||
|  | ||||
|                     # get outputs | ||||
|                     detection_boxes = np.array(in_nn.getLayerFp16("ExpandDims")).reshape((100, 4)) | ||||
|                     detection_scores = np.array(in_nn.getLayerFp16("ExpandDims_2")).reshape((100,)) | ||||
|  | ||||
|                     # keep boxes bigger than threshold | ||||
|                     mask = detection_scores >= self._objects_threshold | ||||
|                     boxes = detection_boxes[mask] | ||||
|                     scores = detection_scores[mask] | ||||
|  | ||||
|                     if boxes.shape[0] > 0: | ||||
|                         objects_msg = events.events_pb2.ObjectsMessage() | ||||
|                         objs = [] | ||||
|                         for i in range(boxes.shape[0]): | ||||
|                             bbox = boxes[i] | ||||
|                             logger.debug("new object detected: %s", str(bbox)) | ||||
|                             o = events.events_pb2.Object() | ||||
|                             o.type = events.events_pb2.TypeObject.ANY | ||||
|                             o.top = bbox[0].astype(float) | ||||
|                             o.right = bbox[3].astype(float) | ||||
|                             o.bottom = bbox[2].astype(float) | ||||
|                             o.left = bbox[1].astype(float) | ||||
|                             o.confidence = scores[i].astype(float) | ||||
|                             objs.append(o) | ||||
|                         objects_msg.objects.extend(objs) | ||||
|  | ||||
|                         objects_msg.frame_ref.name = frame_msg.id.name | ||||
|                         objects_msg.frame_ref.id = frame_msg.id.id | ||||
|                         objects_msg.frame_ref.created_at.FromDatetime(now) | ||||
|  | ||||
|                         logger.debug("publish object event to %s", self._frame_topic) | ||||
|                         self._mqtt_client.publish(topic=self._objects_topic, | ||||
|                                                   payload=objects_msg.SerializeToString(), | ||||
|                                                   qos=0, | ||||
|                                                   retain=False) | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.exception("unexpected error: %s", str(e)) | ||||
|   | ||||
							
								
								
									
										232
									
								
								camera/east.py
									
									
									
									
									
								
							
							
						
						
									
										232
									
								
								camera/east.py
									
									
									
									
									
								
							| @@ -1,232 +0,0 @@ | ||||
| import cv2 | ||||
| import depthai | ||||
| import numpy as np | ||||
|  | ||||
| _conf_threshold = 0.5 | ||||
|  | ||||
|  | ||||
| def get_cv_rotated_rect(bbox, angle): | ||||
|     x0, y0, x1, y1 = bbox | ||||
|     width = abs(x0 - x1) | ||||
|     height = abs(y0 - y1) | ||||
|     x = x0 + width * 0.5 | ||||
|     y = y0 + height * 0.5 | ||||
|     return [x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle) | ||||
|  | ||||
|  | ||||
| def rotated_Rectangle(bbox, angle): | ||||
|     X0, Y0, X1, Y1 = bbox | ||||
|     width = abs(X0 - X1) | ||||
|     height = abs(Y0 - Y1) | ||||
|     x = int(X0 + width * 0.5) | ||||
|     y = int(Y0 + height * 0.5) | ||||
|  | ||||
|     pt1_1 = (int(x + width / 2), int(y + height / 2)) | ||||
|     pt2_1 = (int(x + width / 2), int(y - height / 2)) | ||||
|     pt3_1 = (int(x - width / 2), int(y - height / 2)) | ||||
|     pt4_1 = (int(x - width / 2), int(y + height / 2)) | ||||
|  | ||||
|     t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)], | ||||
|                   [np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)], | ||||
|                   [0, 0, 1]]) | ||||
|  | ||||
|     tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]]) | ||||
|     tmp_pt1_2 = np.dot(t, tmp_pt1_1) | ||||
|     pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0])) | ||||
|  | ||||
|     tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]]) | ||||
|     tmp_pt2_2 = np.dot(t, tmp_pt2_1) | ||||
|     pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0])) | ||||
|  | ||||
|     tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]]) | ||||
|     tmp_pt3_2 = np.dot(t, tmp_pt3_1) | ||||
|     pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0])) | ||||
|  | ||||
|     tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]]) | ||||
|     tmp_pt4_2 = np.dot(t, tmp_pt4_1) | ||||
|     pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0])) | ||||
|  | ||||
|     points = np.array([pt1_2, pt2_2, pt3_2, pt4_2]) | ||||
|  | ||||
|     return points | ||||
|  | ||||
|  | ||||
| def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3): | ||||
|     # if there are no boxes, return an empty list | ||||
|     if len(boxes) == 0: | ||||
|         return [], [] | ||||
|  | ||||
|     # if the bounding boxes are integers, convert them to floats -- this | ||||
|     # is important since we'll be doing a bunch of divisions | ||||
|     if boxes.dtype.kind == "i": | ||||
|         boxes = boxes.astype("float") | ||||
|  | ||||
|     # initialize the list of picked indexes | ||||
|     pick = [] | ||||
|  | ||||
|     # grab the coordinates of the bounding boxes | ||||
|     x1 = boxes[:, 0] | ||||
|     y1 = boxes[:, 1] | ||||
|     x2 = boxes[:, 2] | ||||
|     y2 = boxes[:, 3] | ||||
|  | ||||
|     # compute the area of the bounding boxes and grab the indexes to sort | ||||
|     # (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate) | ||||
|     area = (x2 - x1 + 1) * (y2 - y1 + 1) | ||||
|     idxs = y2 | ||||
|  | ||||
|     # if probabilities are provided, sort on them instead | ||||
|     if probs is not None: | ||||
|         idxs = probs | ||||
|  | ||||
|     # sort the indexes | ||||
|     idxs = np.argsort(idxs) | ||||
|  | ||||
|     # keep looping while some indexes still remain in the indexes list | ||||
|     while len(idxs) > 0: | ||||
|         # grab the last index in the indexes list and add the index value to the list of picked indexes | ||||
|         last = len(idxs) - 1 | ||||
|         i = idxs[last] | ||||
|         pick.append(i) | ||||
|  | ||||
|         # find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates | ||||
|         # for the end of the bounding box | ||||
|         xx1 = np.maximum(x1[i], x1[idxs[:last]]) | ||||
|         yy1 = np.maximum(y1[i], y1[idxs[:last]]) | ||||
|         xx2 = np.minimum(x2[i], x2[idxs[:last]]) | ||||
|         yy2 = np.minimum(y2[i], y2[idxs[:last]]) | ||||
|  | ||||
|         # compute the width and height of the bounding box | ||||
|         w = np.maximum(0, xx2 - xx1 + 1) | ||||
|         h = np.maximum(0, yy2 - yy1 + 1) | ||||
|  | ||||
|         # compute the ratio of overlap | ||||
|         overlap = (w * h) / area[idxs[:last]] | ||||
|  | ||||
|         # delete all indexes from the index list that have overlap greater than the provided overlap threshold | ||||
|         idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) | ||||
|  | ||||
|     # return only the bounding boxes that were picked | ||||
|     return boxes[pick].astype("int"), angles[pick] | ||||
|  | ||||
|  | ||||
| def decode_predictions(scores, geometry1, geometry2): | ||||
|     # grab the number of rows and columns from the scores volume, then | ||||
|     # initialize our set of bounding box rectangles and corresponding | ||||
|     # confidence scores | ||||
|     (numRows, numCols) = scores.shape[2:4] | ||||
|     rects = [] | ||||
|     confidences = [] | ||||
|     angles = [] | ||||
|  | ||||
|     # loop over the number of rows | ||||
|     for y in range(0, numRows): | ||||
|         # extract the scores (probabilities), followed by the | ||||
|         # geometrical data used to derive potential bounding box | ||||
|         # coordinates that surround text | ||||
|         scoresData = scores[0, 0, y] | ||||
|         xData0 = geometry1[0, 0, y] | ||||
|         xData1 = geometry1[0, 1, y] | ||||
|         xData2 = geometry1[0, 2, y] | ||||
|         xData3 = geometry1[0, 3, y] | ||||
|         anglesData = geometry2[0, 0, y] | ||||
|  | ||||
|         # loop over the number of columns | ||||
|         for x in range(0, numCols): | ||||
|             # if our score does not have sufficient probability, | ||||
|             # ignore it | ||||
|             if scoresData[x] < _conf_threshold: | ||||
|                 continue | ||||
|  | ||||
|             # compute the offset factor as our resulting feature | ||||
|             # maps will be 4x smaller than the input image | ||||
|             (offsetX, offsetY) = (x * 4.0, y * 4.0) | ||||
|  | ||||
|             # extract the rotation angle for the prediction and | ||||
|             # then compute the sin and cosine | ||||
|             angle = anglesData[x] | ||||
|             cos = np.cos(angle) | ||||
|             sin = np.sin(angle) | ||||
|  | ||||
|             # use the geometry volume to derive the width and height | ||||
|             # of the bounding box | ||||
|             h = xData0[x] + xData2[x] | ||||
|             w = xData1[x] + xData3[x] | ||||
|  | ||||
|             # compute both the starting and ending (x, y)-coordinates | ||||
|             # for the text prediction bounding box | ||||
|             endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) | ||||
|             endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) | ||||
|             startX = int(endX - w) | ||||
|             startY = int(endY - h) | ||||
|  | ||||
|             # add the bounding box coordinates and probability score | ||||
|             # to our respective lists | ||||
|             rects.append((startX, startY, endX, endY)) | ||||
|             confidences.append(scoresData[x]) | ||||
|             angles.append(angle) | ||||
|  | ||||
|     # return a tuple of the bounding boxes and associated confidences | ||||
|     return (rects, confidences, angles) | ||||
|  | ||||
|  | ||||
| def decode_east(nnet_packet, **kwargs): | ||||
|     scores = nnet_packet.get_tensor(0) | ||||
|     geometry1 = nnet_packet.get_tensor(1) | ||||
|     geometry2 = nnet_packet.get_tensor(2) | ||||
|     bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2 | ||||
|                                                ) | ||||
|     boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) | ||||
|     boxesangles = (boxes, angles) | ||||
|     return boxesangles | ||||
|  | ||||
|  | ||||
| def show_east(boxesangles, frame, **kwargs): | ||||
|     bboxes = boxesangles[0] | ||||
|     angles = boxesangles[1] | ||||
|     for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles): | ||||
|         width = abs(X0 - X1) | ||||
|         height = abs(Y0 - Y1) | ||||
|         cX = int(X0 + width * 0.5) | ||||
|         cY = int(Y0 + height * 0.5) | ||||
|  | ||||
|         rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1)) | ||||
|         points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1) | ||||
|         cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8) | ||||
|  | ||||
|     return frame | ||||
|  | ||||
|  | ||||
| def order_points(pts): | ||||
|     rect = np.zeros((4, 2), dtype="float32") | ||||
|     s = pts.sum(axis=1) | ||||
|     rect[0] = pts[np.argmin(s)] | ||||
|     rect[2] = pts[np.argmax(s)] | ||||
|     diff = np.diff(pts, axis=1) | ||||
|     rect[1] = pts[np.argmin(diff)] | ||||
|     rect[3] = pts[np.argmax(diff)] | ||||
|     return rect | ||||
|  | ||||
|  | ||||
| def four_point_transform(image, pts): | ||||
|     rect = order_points(pts) | ||||
|     (tl, tr, br, bl) = rect | ||||
|  | ||||
|     widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | ||||
|     widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | ||||
|     maxWidth = max(int(widthA), int(widthB)) | ||||
|  | ||||
|     heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | ||||
|     heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | ||||
|     maxHeight = max(int(heightA), int(heightB)) | ||||
|  | ||||
|     dst = np.array([ | ||||
|         [0, 0], | ||||
|         [maxWidth - 1, 0], | ||||
|         [maxWidth - 1, maxHeight - 1], | ||||
|         [0, maxHeight - 1]], dtype="float32") | ||||
|  | ||||
|     M = cv2.getPerspectiveTransform(rect, dst) | ||||
|     warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | ||||
|  | ||||
|     return warped | ||||
| @@ -1,61 +0,0 @@ | ||||
|  | ||||
| class HostSeqSync: | ||||
|     def __init__(self): | ||||
|         self.imfFrames = [] | ||||
|  | ||||
|     def add_msg(self, msg): | ||||
|         self.imfFrames.append(msg) | ||||
|  | ||||
|     def get_msg(self, target_seq): | ||||
|         for i, imgFrame in enumerate(self.imfFrames): | ||||
|             if target_seq == imgFrame.getSequenceNum(): | ||||
|                 self.imfFrames = self.imfFrames[i:] | ||||
|                 break | ||||
|         return self.imfFrames[0] | ||||
|  | ||||
|  | ||||
| class CTCCodec(object): | ||||
|     """ Convert between text-label and text-index """ | ||||
|  | ||||
|     def __init__(self, characters): | ||||
|         # characters (str): set of the possible characters. | ||||
|         dict_character = list(characters) | ||||
|  | ||||
|         self.dict = {} | ||||
|         for i, char in enumerate(dict_character): | ||||
|             self.dict[char] = i + 1 | ||||
|  | ||||
|         self.characters = dict_character | ||||
|         # print(self.characters) | ||||
|         # input() | ||||
|  | ||||
|     def decode(self, preds): | ||||
|         """ convert text-index into text-label. """ | ||||
|         texts = [] | ||||
|         index = 0 | ||||
|         # Select max probabilty (greedy decoding) then decode index to character | ||||
|         preds = preds.astype(np.float16) | ||||
|         preds_index = np.argmax(preds, 2) | ||||
|         preds_index = preds_index.transpose(1, 0) | ||||
|         preds_index_reshape = preds_index.reshape(-1) | ||||
|         preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0]) | ||||
|  | ||||
|         for l in preds_sizes: | ||||
|             t = preds_index_reshape[index:index + l] | ||||
|  | ||||
|             # NOTE: t might be zero size | ||||
|             if t.shape[0] == 0: | ||||
|                 continue | ||||
|  | ||||
|             char_list = [] | ||||
|             for i in range(l): | ||||
|                 # removing repeated characters and blank. | ||||
|                 if not (i > 0 and t[i - 1] == t[i]): | ||||
|                     if self.characters[t[i]] != '#': | ||||
|                         char_list.append(self.characters[t[i]]) | ||||
|             text = ''.join(char_list) | ||||
|             texts.append(text) | ||||
|  | ||||
|             index += l | ||||
|  | ||||
|         return texts | ||||
							
								
								
									
										229
									
								
								east.py
									
									
									
									
									
								
							
							
						
						
									
										229
									
								
								east.py
									
									
									
									
									
								
							| @@ -1,229 +0,0 @@ | ||||
| import cv2 | ||||
| import depthai | ||||
| import numpy as np | ||||
|  | ||||
| _conf_threshold = 0.5 | ||||
|  | ||||
| def get_cv_rotated_rect(bbox, angle): | ||||
|     x0, y0, x1, y1 = bbox | ||||
|     width = abs(x0 - x1) | ||||
|     height = abs(y0 - y1) | ||||
|     x = x0 + width * 0.5 | ||||
|     y = y0 + height * 0.5 | ||||
|     return ([x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle)) | ||||
|  | ||||
| def rotated_Rectangle(bbox, angle): | ||||
|     X0, Y0, X1, Y1 = bbox | ||||
|     width = abs(X0 - X1) | ||||
|     height = abs(Y0 - Y1) | ||||
|     x = int(X0 + width * 0.5) | ||||
|     y = int(Y0 + height * 0.5) | ||||
|  | ||||
|     pt1_1 = (int(x + width / 2), int(y + height / 2)) | ||||
|     pt2_1 = (int(x + width / 2), int(y - height / 2)) | ||||
|     pt3_1 = (int(x - width / 2), int(y - height / 2)) | ||||
|     pt4_1 = (int(x - width / 2), int(y + height / 2)) | ||||
|  | ||||
|     t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)], | ||||
|                   [np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)], | ||||
|                   [0, 0, 1]]) | ||||
|  | ||||
|     tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]]) | ||||
|     tmp_pt1_2 = np.dot(t, tmp_pt1_1) | ||||
|     pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0])) | ||||
|  | ||||
|     tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]]) | ||||
|     tmp_pt2_2 = np.dot(t, tmp_pt2_1) | ||||
|     pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0])) | ||||
|  | ||||
|     tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]]) | ||||
|     tmp_pt3_2 = np.dot(t, tmp_pt3_1) | ||||
|     pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0])) | ||||
|  | ||||
|     tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]]) | ||||
|     tmp_pt4_2 = np.dot(t, tmp_pt4_1) | ||||
|     pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0])) | ||||
|  | ||||
|     points = np.array([pt1_2, pt2_2, pt3_2, pt4_2]) | ||||
|  | ||||
|     return points | ||||
|  | ||||
|  | ||||
| def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3): | ||||
|     # if there are no boxes, return an empty list | ||||
|     if len(boxes) == 0: | ||||
|         return [], [] | ||||
|  | ||||
|     # if the bounding boxes are integers, convert them to floats -- this | ||||
|     # is important since we'll be doing a bunch of divisions | ||||
|     if boxes.dtype.kind == "i": | ||||
|         boxes = boxes.astype("float") | ||||
|  | ||||
|     # initialize the list of picked indexes | ||||
|     pick = [] | ||||
|  | ||||
|     # grab the coordinates of the bounding boxes | ||||
|     x1 = boxes[:, 0] | ||||
|     y1 = boxes[:, 1] | ||||
|     x2 = boxes[:, 2] | ||||
|     y2 = boxes[:, 3] | ||||
|  | ||||
|     # compute the area of the bounding boxes and grab the indexes to sort | ||||
|     # (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate) | ||||
|     area = (x2 - x1 + 1) * (y2 - y1 + 1) | ||||
|     idxs = y2 | ||||
|  | ||||
|     # if probabilities are provided, sort on them instead | ||||
|     if probs is not None: | ||||
|         idxs = probs | ||||
|  | ||||
|     # sort the indexes | ||||
|     idxs = np.argsort(idxs) | ||||
|  | ||||
|     # keep looping while some indexes still remain in the indexes list | ||||
|     while len(idxs) > 0: | ||||
|         # grab the last index in the indexes list and add the index value to the list of picked indexes | ||||
|         last = len(idxs) - 1 | ||||
|         i = idxs[last] | ||||
|         pick.append(i) | ||||
|  | ||||
|         # find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates for the end of the bounding box | ||||
|         xx1 = np.maximum(x1[i], x1[idxs[:last]]) | ||||
|         yy1 = np.maximum(y1[i], y1[idxs[:last]]) | ||||
|         xx2 = np.minimum(x2[i], x2[idxs[:last]]) | ||||
|         yy2 = np.minimum(y2[i], y2[idxs[:last]]) | ||||
|  | ||||
|         # compute the width and height of the bounding box | ||||
|         w = np.maximum(0, xx2 - xx1 + 1) | ||||
|         h = np.maximum(0, yy2 - yy1 + 1) | ||||
|  | ||||
|         # compute the ratio of overlap | ||||
|         overlap = (w * h) / area[idxs[:last]] | ||||
|  | ||||
|         # delete all indexes from the index list that have overlap greater than the provided overlap threshold | ||||
|         idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) | ||||
|  | ||||
|     # return only the bounding boxes that were picked | ||||
|     return boxes[pick].astype("int"), angles[pick] | ||||
|  | ||||
|  | ||||
| def decode_predictions(scores, geometry1, geometry2): | ||||
|     # grab the number of rows and columns from the scores volume, then | ||||
|     # initialize our set of bounding box rectangles and corresponding | ||||
|     # confidence scores | ||||
|     (numRows, numCols) = scores.shape[2:4] | ||||
|     rects = [] | ||||
|     confidences = [] | ||||
|     angles = [] | ||||
|  | ||||
|     # loop over the number of rows | ||||
|     for y in range(0, numRows): | ||||
|         # extract the scores (probabilities), followed by the | ||||
|         # geometrical data used to derive potential bounding box | ||||
|         # coordinates that surround text | ||||
|         scoresData = scores[0, 0, y] | ||||
|         xData0 = geometry1[0, 0, y] | ||||
|         xData1 = geometry1[0, 1, y] | ||||
|         xData2 = geometry1[0, 2, y] | ||||
|         xData3 = geometry1[0, 3, y] | ||||
|         anglesData = geometry2[0, 0, y] | ||||
|  | ||||
|         # loop over the number of columns | ||||
|         for x in range(0, numCols): | ||||
|             # if our score does not have sufficient probability, | ||||
|             # ignore it | ||||
|             if scoresData[x] < _conf_threshold: | ||||
|                 continue | ||||
|  | ||||
|             # compute the offset factor as our resulting feature | ||||
|             # maps will be 4x smaller than the input image | ||||
|             (offsetX, offsetY) = (x * 4.0, y * 4.0) | ||||
|  | ||||
|             # extract the rotation angle for the prediction and | ||||
|             # then compute the sin and cosine | ||||
|             angle = anglesData[x] | ||||
|             cos = np.cos(angle) | ||||
|             sin = np.sin(angle) | ||||
|  | ||||
|             # use the geometry volume to derive the width and height | ||||
|             # of the bounding box | ||||
|             h = xData0[x] + xData2[x] | ||||
|             w = xData1[x] + xData3[x] | ||||
|  | ||||
|             # compute both the starting and ending (x, y)-coordinates | ||||
|             # for the text prediction bounding box | ||||
|             endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) | ||||
|             endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) | ||||
|             startX = int(endX - w) | ||||
|             startY = int(endY - h) | ||||
|  | ||||
|             # add the bounding box coordinates and probability score | ||||
|             # to our respective lists | ||||
|             rects.append((startX, startY, endX, endY)) | ||||
|             confidences.append(scoresData[x]) | ||||
|             angles.append(angle) | ||||
|  | ||||
|     # return a tuple of the bounding boxes and associated confidences | ||||
|     return (rects, confidences, angles) | ||||
|  | ||||
|  | ||||
| def decode_east(nnet_packet, **kwargs): | ||||
|     scores = nnet_packet.get_tensor(0) | ||||
|     geometry1 = nnet_packet.get_tensor(1) | ||||
|     geometry2 = nnet_packet.get_tensor(2) | ||||
|     bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2 | ||||
|                                                ) | ||||
|     boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) | ||||
|     boxesangles = (boxes, angles) | ||||
|     return boxesangles | ||||
|  | ||||
|  | ||||
| def show_east(boxesangles, frame, **kwargs): | ||||
|     bboxes = boxesangles[0] | ||||
|     angles = boxesangles[1] | ||||
|     for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles): | ||||
|         width = abs(X0 - X1) | ||||
|         height = abs(Y0 - Y1) | ||||
|         cX = int(X0 + width * 0.5) | ||||
|         cY = int(Y0 + height * 0.5) | ||||
|  | ||||
|         rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1)) | ||||
|         points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1) | ||||
|         cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8) | ||||
|  | ||||
|     return frame | ||||
|  | ||||
|  | ||||
| def order_points(pts): | ||||
|     rect = np.zeros((4, 2), dtype="float32") | ||||
|     s = pts.sum(axis=1) | ||||
|     rect[0] = pts[np.argmin(s)] | ||||
|     rect[2] = pts[np.argmax(s)] | ||||
|     diff = np.diff(pts, axis=1) | ||||
|     rect[1] = pts[np.argmin(diff)] | ||||
|     rect[3] = pts[np.argmax(diff)] | ||||
|     return rect | ||||
|  | ||||
|  | ||||
| def four_point_transform(image, pts): | ||||
|     rect = order_points(pts) | ||||
|     (tl, tr, br, bl) = rect | ||||
|  | ||||
|     widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | ||||
|     widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | ||||
|     maxWidth = max(int(widthA), int(widthB)) | ||||
|  | ||||
|     heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | ||||
|     heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | ||||
|     maxHeight = max(int(heightA), int(heightB)) | ||||
|  | ||||
|     dst = np.array([ | ||||
|         [0, 0], | ||||
|         [maxWidth - 1, 0], | ||||
|         [maxWidth - 1, maxHeight - 1], | ||||
|         [0, maxHeight - 1]], dtype="float32") | ||||
|  | ||||
|     M = cv2.getPerspectiveTransform(rect, dst) | ||||
|     warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | ||||
|  | ||||
|     return warped | ||||
| @@ -14,7 +14,7 @@ _sym_db = _symbol_database.Default() | ||||
| from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 | ||||
|  | ||||
|  | ||||
| DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x65vents/events.proto\x12\x0erobocar.events\x1a\x1fgoogle/protobuf/timestamp.proto\"T\n\x08\x46rameRef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12.\n\ncreated_at\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"C\n\x0c\x46rameMessage\x12$\n\x02id\x18\x01 \x01(\x0b\x32\x18.robocar.events.FrameRef\x12\r\n\x05\x66rame\x18\x02 \x01(\x0c\"d\n\x0fSteeringMessage\x12\x10\n\x08steering\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"d\n\x0fThrottleMessage\x12\x10\n\x08throttle\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"A\n\x10\x44riveModeMessage\x12-\n\ndrive_mode\x18\x01 \x01(\x0e\x32\x19.robocar.events.DriveMode\"f\n\x0eObjectsMessage\x12\'\n\x07objects\x18\x01 \x03(\x0b\x32\x16.robocar.events.Object\x12+\n\tframe_ref\x18\x02 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x80\x01\n\x06Object\x12(\n\x04type\x18\x01 \x01(\x0e\x32\x1a.robocar.events.TypeObject\x12\x0c\n\x04left\x18\x02 \x01(\x05\x12\x0b\n\x03top\x18\x03 \x01(\x05\x12\r\n\x05right\x18\x04 \x01(\x05\x12\x0e\n\x06\x62ottom\x18\x05 \x01(\x05\x12\x12\n\nconfidence\x18\x06 \x01(\x02\"&\n\x13SwitchRecordMessage\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\"\x8c\x01\n\x0bRoadMessage\x12&\n\x07\x63ontour\x18\x01 \x03(\x0b\x32\x15.robocar.events.Point\x12(\n\x07\x65llipse\x18\x02 \x01(\x0b\x32\x17.robocar.events.Ellipse\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x1d\n\x05Point\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05\"r\n\x07\x45llipse\x12%\n\x06\x63\x65nter\x18\x01 \x01(\x0b\x32\x15.robocar.events.Point\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0e\n\x06height\x18\x03 \x01(\x05\x12\r\n\x05\x61ngle\x18\x04 \x01(\x02\x12\x12\n\nconfidence\x18\x05 \x01(\x02\"\x82\x01\n\rRecordMessage\x12+\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1c.robocar.events.FrameMessage\x12\x31\n\x08steering\x18\x02 \x01(\x0b\x32\x1f.robocar.events.SteeringMessage\x12\x11\n\trecordSet\x18\x03 \x01(\t*-\n\tDriveMode\x12\x0b\n\x07INVALID\x10\x00\x12\x08\n\x04USER\x10\x01\x12\t\n\x05PILOT\x10\x02*2\n\nTypeObject\x12\x07\n\x03\x41NY\x10\x00\x12\x07\n\x03\x43\x41R\x10\x01\x12\x08\n\x04\x42UMP\x10\x02\x12\x08\n\x04PLOT\x10\x03\x42\nZ\x08./eventsb\x06proto3') | ||||
| DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x65vents/events.proto\x12\x0erobocar.events\x1a\x1fgoogle/protobuf/timestamp.proto\"T\n\x08\x46rameRef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12.\n\ncreated_at\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"C\n\x0c\x46rameMessage\x12$\n\x02id\x18\x01 \x01(\x0b\x32\x18.robocar.events.FrameRef\x12\r\n\x05\x66rame\x18\x02 \x01(\x0c\"d\n\x0fSteeringMessage\x12\x10\n\x08steering\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"d\n\x0fThrottleMessage\x12\x10\n\x08throttle\x18\x01 \x01(\x02\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"A\n\x10\x44riveModeMessage\x12-\n\ndrive_mode\x18\x01 \x01(\x0e\x32\x19.robocar.events.DriveMode\"f\n\x0eObjectsMessage\x12\'\n\x07objects\x18\x01 \x03(\x0b\x32\x16.robocar.events.Object\x12+\n\tframe_ref\x18\x02 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x80\x01\n\x06Object\x12(\n\x04type\x18\x01 \x01(\x0e\x32\x1a.robocar.events.TypeObject\x12\x0c\n\x04left\x18\x02 \x01(\x02\x12\x0b\n\x03top\x18\x03 \x01(\x02\x12\r\n\x05right\x18\x04 \x01(\x02\x12\x0e\n\x06\x62ottom\x18\x05 \x01(\x02\x12\x12\n\nconfidence\x18\x06 \x01(\x02\"&\n\x13SwitchRecordMessage\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\"\x8c\x01\n\x0bRoadMessage\x12&\n\x07\x63ontour\x18\x01 \x03(\x0b\x32\x15.robocar.events.Point\x12(\n\x07\x65llipse\x18\x02 \x01(\x0b\x32\x17.robocar.events.Ellipse\x12+\n\tframe_ref\x18\x03 \x01(\x0b\x32\x18.robocar.events.FrameRef\"\x1d\n\x05Point\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05\"r\n\x07\x45llipse\x12%\n\x06\x63\x65nter\x18\x01 \x01(\x0b\x32\x15.robocar.events.Point\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0e\n\x06height\x18\x03 \x01(\x05\x12\r\n\x05\x61ngle\x18\x04 \x01(\x02\x12\x12\n\nconfidence\x18\x05 \x01(\x02\"\x82\x01\n\rRecordMessage\x12+\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1c.robocar.events.FrameMessage\x12\x31\n\x08steering\x18\x02 \x01(\x0b\x32\x1f.robocar.events.SteeringMessage\x12\x11\n\trecordSet\x18\x03 \x01(\t*-\n\tDriveMode\x12\x0b\n\x07INVALID\x10\x00\x12\x08\n\x04USER\x10\x01\x12\t\n\x05PILOT\x10\x02*2\n\nTypeObject\x12\x07\n\x03\x41NY\x10\x00\x12\x07\n\x03\x43\x41R\x10\x01\x12\x08\n\x04\x42UMP\x10\x02\x12\x08\n\x04PLOT\x10\x03\x42\nZ\x08./eventsb\x06proto3') | ||||
|  | ||||
| _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) | ||||
| _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'events.events_pb2', globals()) | ||||
|   | ||||
							
								
								
									
										274
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										274
									
								
								main.py
									
									
									
									
									
								
							| @@ -1,274 +0,0 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| from pathlib import Path | ||||
|  | ||||
| import cv2 | ||||
| import numpy as np | ||||
| import depthai as dai | ||||
| import east | ||||
| import blobconverter | ||||
|  | ||||
| class HostSeqSync: | ||||
|     def __init__(self): | ||||
|         self.imfFrames = [] | ||||
|     def add_msg(self, msg): | ||||
|         self.imfFrames.append(msg) | ||||
|     def get_msg(self, target_seq): | ||||
|         for i, imgFrame in enumerate(self.imfFrames): | ||||
|             if target_seq == imgFrame.getSequenceNum(): | ||||
|                 self.imfFrames = self.imfFrames[i:] | ||||
|                 break | ||||
|         return self.imfFrames[0] | ||||
|  | ||||
| pipeline = dai.Pipeline() | ||||
| version = "2021.2" | ||||
| pipeline.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_2) | ||||
|  | ||||
| colorCam = pipeline.create(dai.node.ColorCamera) | ||||
| colorCam.setPreviewSize(256, 256) | ||||
| colorCam.setVideoSize(1024, 1024) # 4 times larger in both axis | ||||
| colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) | ||||
| colorCam.setInterleaved(False) | ||||
| colorCam.setBoardSocket(dai.CameraBoardSocket.RGB) | ||||
| colorCam.setFps(10) | ||||
|  | ||||
| controlIn = pipeline.create(dai.node.XLinkIn) | ||||
| controlIn.setStreamName('control') | ||||
| controlIn.out.link(colorCam.inputControl) | ||||
|  | ||||
| cam_xout = pipeline.create(dai.node.XLinkOut) | ||||
| cam_xout.setStreamName('video') | ||||
| colorCam.video.link(cam_xout.input) | ||||
|  | ||||
| # --------------------------------------- | ||||
| # 1st stage NN - text-detection | ||||
| # --------------------------------------- | ||||
|  | ||||
| nn = pipeline.create(dai.node.NeuralNetwork) | ||||
| nn.setBlobPath(blobconverter.from_zoo(name="east_text_detection_256x256",zoo_type="depthai",shaves=6, version=version)) | ||||
| colorCam.preview.link(nn.input) | ||||
|  | ||||
| nn_xout = pipeline.create(dai.node.XLinkOut) | ||||
| nn_xout.setStreamName('detections') | ||||
| nn.out.link(nn_xout.input) | ||||
|  | ||||
| # --------------------------------------- | ||||
| # 2nd stage NN - text-recognition-0012 | ||||
| # --------------------------------------- | ||||
|  | ||||
| manip = pipeline.create(dai.node.ImageManip) | ||||
| manip.setWaitForConfigInput(True) | ||||
|  | ||||
| manip_img = pipeline.create(dai.node.XLinkIn) | ||||
| manip_img.setStreamName('manip_img') | ||||
| manip_img.out.link(manip.inputImage) | ||||
|  | ||||
| manip_cfg = pipeline.create(dai.node.XLinkIn) | ||||
| manip_cfg.setStreamName('manip_cfg') | ||||
| manip_cfg.out.link(manip.inputConfig) | ||||
|  | ||||
| manip_xout = pipeline.create(dai.node.XLinkOut) | ||||
| manip_xout.setStreamName('manip_out') | ||||
|  | ||||
| nn2 = pipeline.create(dai.node.NeuralNetwork) | ||||
| nn2.setBlobPath(blobconverter.from_zoo(name="text-recognition-0012", shaves=6, version=version)) | ||||
| nn2.setNumInferenceThreads(2) | ||||
| manip.out.link(nn2.input) | ||||
| manip.out.link(manip_xout.input) | ||||
|  | ||||
| nn2_xout = pipeline.create(dai.node.XLinkOut) | ||||
| nn2_xout.setStreamName("recognitions") | ||||
| nn2.out.link(nn2_xout.input) | ||||
|  | ||||
| def to_tensor_result(packet): | ||||
|     return { | ||||
|         name: np.array(packet.getLayerFp16(name)) | ||||
|         for name in [tensor.name for tensor in packet.getRaw().tensors] | ||||
|     } | ||||
|  | ||||
| def to_planar(frame): | ||||
|     return frame.transpose(2, 0, 1).flatten() | ||||
|  | ||||
| with dai.Device(pipeline) as device: | ||||
|     q_vid = device.getOutputQueue("video", 4, blocking=False) | ||||
|     # This should be set to block, but would get to some extreme queuing/latency! | ||||
|     q_det = device.getOutputQueue("detections", 4, blocking=False) | ||||
|  | ||||
|     q_rec = device.getOutputQueue("recognitions", 4, blocking=True) | ||||
|  | ||||
|     q_manip_img = device.getInputQueue("manip_img") | ||||
|     q_manip_cfg = device.getInputQueue("manip_cfg") | ||||
|     q_manip_out = device.getOutputQueue("manip_out", 4, blocking=False) | ||||
|  | ||||
|     controlQueue = device.getInputQueue('control') | ||||
|  | ||||
|     frame = None | ||||
|     cropped_stacked = None | ||||
|     rotated_rectangles = [] | ||||
|     rec_pushed = 0 | ||||
|     rec_received = 0 | ||||
|     host_sync = HostSeqSync() | ||||
|  | ||||
|     class CTCCodec(object): | ||||
|         """ Convert between text-label and text-index """ | ||||
|         def __init__(self, characters): | ||||
|             # characters (str): set of the possible characters. | ||||
|             dict_character = list(characters) | ||||
|  | ||||
|             self.dict = {} | ||||
|             for i, char in enumerate(dict_character): | ||||
|                 self.dict[char] = i + 1 | ||||
|  | ||||
|             self.characters = dict_character | ||||
|             #print(self.characters) | ||||
|             #input() | ||||
|         def decode(self, preds): | ||||
|             """ convert text-index into text-label. """ | ||||
|             texts = [] | ||||
|             index = 0 | ||||
|             # Select max probabilty (greedy decoding) then decode index to character | ||||
|             preds = preds.astype(np.float16) | ||||
|             preds_index = np.argmax(preds, 2) | ||||
|             preds_index = preds_index.transpose(1, 0) | ||||
|             preds_index_reshape = preds_index.reshape(-1) | ||||
|             preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0]) | ||||
|  | ||||
|             for l in preds_sizes: | ||||
|                 t = preds_index_reshape[index:index + l] | ||||
|  | ||||
|                 # NOTE: t might be zero size | ||||
|                 if t.shape[0] == 0: | ||||
|                     continue | ||||
|  | ||||
|                 char_list = [] | ||||
|                 for i in range(l): | ||||
|                     # removing repeated characters and blank. | ||||
|                     if not (i > 0 and t[i - 1] == t[i]): | ||||
|                         if self.characters[t[i]] != '#': | ||||
|                             char_list.append(self.characters[t[i]]) | ||||
|                 text = ''.join(char_list) | ||||
|                 texts.append(text) | ||||
|  | ||||
|                 index += l | ||||
|  | ||||
|             return texts | ||||
|  | ||||
|     characters = '0123456789abcdefghijklmnopqrstuvwxyz#' | ||||
|     codec = CTCCodec(characters) | ||||
|  | ||||
|     ctrl = dai.CameraControl() | ||||
|     ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.CONTINUOUS_VIDEO) | ||||
|     ctrl.setAutoFocusTrigger() | ||||
|     controlQueue.send(ctrl) | ||||
|  | ||||
|     while True: | ||||
|         vid_in = q_vid.tryGet() | ||||
|         if vid_in is not None: | ||||
|             host_sync.add_msg(vid_in) | ||||
|  | ||||
|         # Multiple recognition results may be available, read until queue is empty | ||||
|         while True: | ||||
|             in_rec = q_rec.tryGet() | ||||
|             if in_rec is None: | ||||
|                 break | ||||
|             rec_data = bboxes = np.array(in_rec.getFirstLayerFp16()).reshape(30,1,37) | ||||
|             decoded_text = codec.decode(rec_data)[0] | ||||
|             pos = rotated_rectangles[rec_received] | ||||
|             print("{:2}: {:20}".format(rec_received, decoded_text), | ||||
|                 "center({:3},{:3}) size({:3},{:3}) angle{:5.1f} deg".format( | ||||
|                     int(pos[0][0]), int(pos[0][1]), pos[1][0], pos[1][1], pos[2])) | ||||
|             # Draw the text on the right side of 'cropped_stacked' - placeholder | ||||
|             if cropped_stacked is not None: | ||||
|                 cv2.putText(cropped_stacked, decoded_text, | ||||
|                                 (120 + 10 , 32 * rec_received + 24), | ||||
|                                 cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2) | ||||
|                 cv2.imshow('cropped_stacked', cropped_stacked) | ||||
|             rec_received += 1 | ||||
|  | ||||
|         if cv2.waitKey(1) == ord('q'): | ||||
|             break | ||||
|  | ||||
|         if rec_received >= rec_pushed: | ||||
|             in_det = q_det.tryGet() | ||||
|             if in_det is not None: | ||||
|                 frame = host_sync.get_msg(in_det.getSequenceNum()).getCvFrame().copy() | ||||
|  | ||||
|                 scores, geom1, geom2 = to_tensor_result(in_det).values() | ||||
|                 scores = np.reshape(scores, (1, 1, 64, 64)) | ||||
|                 geom1 = np.reshape(geom1, (1, 4, 64, 64)) | ||||
|                 geom2 = np.reshape(geom2, (1, 1, 64, 64)) | ||||
|  | ||||
|                 bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2) | ||||
|                 boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) | ||||
|                 rotated_rectangles = [ | ||||
|                     east.get_cv_rotated_rect(bbox, angle * -1) | ||||
|                     for (bbox, angle) in zip(boxes, angles) | ||||
|                 ] | ||||
|  | ||||
|                 rec_received = 0 | ||||
|                 rec_pushed = len(rotated_rectangles) | ||||
|                 if rec_pushed: | ||||
|                     print("====== Pushing for recognition, count:", rec_pushed) | ||||
|                 cropped_stacked = None | ||||
|                 for idx, rotated_rect in enumerate(rotated_rectangles): | ||||
|                     # Detections are done on 256x256 frames, we are sending back 1024x1024 | ||||
|                     # That's why we multiply center and size values by 4 | ||||
|                     rotated_rect[0][0] = rotated_rect[0][0] * 4 | ||||
|                     rotated_rect[0][1] = rotated_rect[0][1] * 4 | ||||
|                     rotated_rect[1][0] = rotated_rect[1][0] * 4 | ||||
|                     rotated_rect[1][1] = rotated_rect[1][1] * 4 | ||||
|  | ||||
|                     # Draw detection crop area on input frame | ||||
|                     points = np.int0(cv2.boxPoints(rotated_rect)) | ||||
|                     print(rotated_rect) | ||||
|                     cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8) | ||||
|  | ||||
|                     # TODO make it work taking args like in OpenCV: | ||||
|                     # rr = ((256, 256), (128, 64), 30) | ||||
|                     rr = dai.RotatedRect() | ||||
|                     rr.center.x    = rotated_rect[0][0] | ||||
|                     rr.center.y    = rotated_rect[0][1] | ||||
|                     rr.size.width  = rotated_rect[1][0] | ||||
|                     rr.size.height = rotated_rect[1][1] | ||||
|                     rr.angle       = rotated_rect[2] | ||||
|                     cfg = dai.ImageManipConfig() | ||||
|                     cfg.setCropRotatedRect(rr, False) | ||||
|                     cfg.setResize(120, 32) | ||||
|                     # Send frame and config to device | ||||
|                     if idx == 0: | ||||
|                         w,h,c = frame.shape | ||||
|                         imgFrame = dai.ImgFrame() | ||||
|                         imgFrame.setData(to_planar(frame)) | ||||
|                         imgFrame.setType(dai.ImgFrame.Type.BGR888p) | ||||
|                         imgFrame.setWidth(w) | ||||
|                         imgFrame.setHeight(h) | ||||
|                         q_manip_img.send(imgFrame) | ||||
|                     else: | ||||
|                         cfg.setReusePreviousImage(True) | ||||
|                     q_manip_cfg.send(cfg) | ||||
|  | ||||
|                     # Get manipulated image from the device | ||||
|                     transformed = q_manip_out.get().getCvFrame() | ||||
|  | ||||
|                     rec_placeholder_img = np.zeros((32, 200, 3), np.uint8) | ||||
|                     transformed = np.hstack((transformed, rec_placeholder_img)) | ||||
|                     if cropped_stacked is None: | ||||
|                         cropped_stacked = transformed | ||||
|                     else: | ||||
|                         cropped_stacked = np.vstack((cropped_stacked, transformed)) | ||||
|  | ||||
|         if cropped_stacked is not None: | ||||
|             cv2.imshow('cropped_stacked', cropped_stacked) | ||||
|  | ||||
|         if frame is not None: | ||||
|             cv2.imshow('frame', frame) | ||||
|  | ||||
|         key = cv2.waitKey(1) | ||||
|         if  key == ord('q'): | ||||
|             break | ||||
|         elif key == ord('t'): | ||||
|             print("Autofocus trigger (and disable continuous)") | ||||
|             ctrl = dai.CameraControl() | ||||
|             ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.AUTO) | ||||
|             ctrl.setAutoFocusTrigger() | ||||
|             controlQueue.send(ctrl) | ||||
| @@ -1,9 +1,8 @@ | ||||
| paho-mqtt~=1.6.1 | ||||
| docopt~=0.6.2 | ||||
| depthai==2.14.1.0 | ||||
| opencv-python~=4.5.5.62 | ||||
| depthai==2.17.2.0 | ||||
| opencv-python==4.6.0.66 | ||||
| google~=3.0.0 | ||||
| google-api-core~=2.4.0 | ||||
| setuptools==60.5.0 | ||||
| protobuf3 | ||||
| blobconverter>=1.2.9 | ||||
| blobconverter==1.3.0 | ||||
		Reference in New Issue
	
	Block a user