import cv2 import depthai import numpy as np _conf_threshold = 0.5 def get_cv_rotated_rect(bbox, angle): x0, y0, x1, y1 = bbox width = abs(x0 - x1) height = abs(y0 - y1) x = x0 + width * 0.5 y = y0 + height * 0.5 return ([x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle)) def rotated_Rectangle(bbox, angle): X0, Y0, X1, Y1 = bbox width = abs(X0 - X1) height = abs(Y0 - Y1) x = int(X0 + width * 0.5) y = int(Y0 + height * 0.5) pt1_1 = (int(x + width / 2), int(y + height / 2)) pt2_1 = (int(x + width / 2), int(y - height / 2)) pt3_1 = (int(x - width / 2), int(y - height / 2)) pt4_1 = (int(x - width / 2), int(y + height / 2)) t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)], [np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)], [0, 0, 1]]) tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]]) tmp_pt1_2 = np.dot(t, tmp_pt1_1) pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0])) tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]]) tmp_pt2_2 = np.dot(t, tmp_pt2_1) pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0])) tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]]) tmp_pt3_2 = np.dot(t, tmp_pt3_1) pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0])) tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]]) tmp_pt4_2 = np.dot(t, tmp_pt4_1) pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0])) points = np.array([pt1_2, pt2_2, pt3_2, pt4_2]) return points def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3): # if there are no boxes, return an empty list if len(boxes) == 0: return [], [] # if the bounding boxes are integers, convert them to floats -- this # is important since we'll be doing a bunch of divisions if boxes.dtype.kind == "i": boxes = boxes.astype("float") # initialize the list of picked indexes pick = [] # grab the coordinates of the bounding boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] # compute the area of the bounding boxes and grab the indexes to sort # (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate) area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = y2 # if probabilities are provided, sort on them instead if probs is not None: idxs = probs # sort the indexes idxs = np.argsort(idxs) # keep looping while some indexes still remain in the indexes list while len(idxs) > 0: # grab the last index in the indexes list and add the index value to the list of picked indexes last = len(idxs) - 1 i = idxs[last] pick.append(i) # find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates for the end of the bounding box xx1 = np.maximum(x1[i], x1[idxs[:last]]) yy1 = np.maximum(y1[i], y1[idxs[:last]]) xx2 = np.minimum(x2[i], x2[idxs[:last]]) yy2 = np.minimum(y2[i], y2[idxs[:last]]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) # compute the ratio of overlap overlap = (w * h) / area[idxs[:last]] # delete all indexes from the index list that have overlap greater than the provided overlap threshold idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) # return only the bounding boxes that were picked return boxes[pick].astype("int"), angles[pick] def decode_predictions(scores, geometry1, geometry2): # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] angles = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the # geometrical data used to derive potential bounding box # coordinates that surround text scoresData = scores[0, 0, y] xData0 = geometry1[0, 0, y] xData1 = geometry1[0, 1, y] xData2 = geometry1[0, 2, y] xData3 = geometry1[0, 3, y] anglesData = geometry2[0, 0, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, # ignore it if scoresData[x] < _conf_threshold: continue # compute the offset factor as our resulting feature # maps will be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and # then compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height # of the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates # for the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score # to our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) angles.append(angle) # return a tuple of the bounding boxes and associated confidences return (rects, confidences, angles) def decode_east(nnet_packet, **kwargs): scores = nnet_packet.get_tensor(0) geometry1 = nnet_packet.get_tensor(1) geometry2 = nnet_packet.get_tensor(2) bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2 ) boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) boxesangles = (boxes, angles) return boxesangles def show_east(boxesangles, frame, **kwargs): bboxes = boxesangles[0] angles = boxesangles[1] for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles): width = abs(X0 - X1) height = abs(Y0 - Y1) cX = int(X0 + width * 0.5) cY = int(Y0 + height * 0.5) rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1)) points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1) cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8) return frame def order_points(pts): rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def four_point_transform(image, pts): rect = order_points(pts) (tl, tr, br, bl) = rect widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped