1'''
+ 2xgo graphical python library edu library
+ 3'''
+ 4import cv2
+ 5import numpy as np
+ 6import math
+ 7import os, sys, time, json, base64
+ 8import spidev as SPI
+ 9import xgoscreen.LCD_2inch as LCD_2inch
+ 10import RPi.GPIO as GPIO
+ 11from PIL import Image, ImageDraw, ImageFont
+ 12import json
+ 13import threading
+ 14
+ 15# from xgolib import XGO
+ 16# from keras.preprocessing import image
+ 17# import _thread # using _thread will report an error, pitfall!
+ 18
+ 19__version__ = '1.5'
+ 20__last_modified__ = '2024/12/18'
+ 21
+ 22GPIO.setwarnings(False)
+ 23GPIO.setmode(GPIO.BCM)
+ 24
+ 25camera_still = False
+ 26
+ 27'''
+ 28Face detection
+ 29'''
+ 30def getFaceBox(net, frame, conf_threshold=0.7):
+ 31 """
+ 32 Detects faces in a given frame using a pre-trained deep neural network.
+ 33
+ 34 Parameters:
+ 35 net (cv2.dnn.Net): The pre-trained face detection model.
+ 36 frame (numpy.ndarray): The input image frame.
+ 37 conf_threshold (float, optional): The minimum confidence threshold for a detection to be considered a face. Defaults to 0.7.
+ 38
+ 39 Returns:
+ 40 tuple: A tuple containing the frame with detected faces and a list of bounding boxes.
+ 41 - frameOpencvDnn (numpy.ndarray): The frame with bounding boxes drawn around detected faces.
+ 42 - bboxes (list): A list of bounding boxes, where each bounding box is represented as [x1, y1, x2, y2].
+ 43 """
+ 44 frameOpencvDnn = frame.copy()
+ 45 frameHeight = frameOpencvDnn.shape[0]
+ 46 frameWidth = frameOpencvDnn.shape[1]
+ 47 blob = cv2.dnn.blobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], True, False)
+ 48 net.setInput(blob)
+ 49 detections = net.forward()
+ 50 bboxes = []
+ 51 for i in range(detections.shape[2]):
+ 52 confidence = detections[0, 0, i, 2]
+ 53 if confidence > conf_threshold:
+ 54 x1 = int(detections[0, 0, i, 3] * frameWidth)
+ 55 y1 = int(detections[0, 0, i, 4] * frameHeight)
+ 56 x2 = int(detections[0, 0, i, 5] * frameWidth)
+ 57 y2 = int(detections[0, 0, i, 6] * frameHeight)
+ 58 bboxes.append([x1, y1, x2, y2])
+ 59 cv2.rectangle(frameOpencvDnn, (x1, y1), (x2, y2), (0, 255, 0), int(round(frameHeight / 150)), 8)
+ 60 return frameOpencvDnn, bboxes
+ 61
+ 62'''
+ 63Gesture recognition function
+ 64'''
+ 65def hand_pos(angle):
+ 66 """
+ 67 Recognizes hand gestures based on finger angles.
+ 68
+ 69 Parameters:
+ 70 angle (list): A list of 5 finger angles (thumb, index, middle, ring, pinky).
+ 71
+ 72 Returns:
+ 73 str or None: The recognized hand gesture ('Good', 'Ok', 'Rock', 'Stone', '1', '3', '4', '5', '2') or None if no gesture is recognized.
+ 74 """
+ 75 pos = None
+ 76 # Thumb angle
+ 77 f1 = angle[0]
+ 78 # Index finger angle
+ 79 f2 = angle[1]
+ 80 # Middle finger angle
+ 81 f3 = angle[2]
+ 82 # Ring finger angle
+ 83 f4 = angle[3]
+ 84 # Pinky finger angle
+ 85 f5 = angle[4]
+ 86 if f1 < 50 and (f2 >= 50 and (f3 >= 50 and (f4 >= 50 and f5 >= 50))):
+ 87 pos = 'Good'
+ 88 elif f1 < 50 and (f2 >= 50 and (f3 < 50 and (f4 < 50 and f5 < 50))):
+ 89 pos = 'Ok'
+ 90 elif f1 < 50 and (f2 < 50 and (f3 >= 50 and (f4 >= 50 and f5 < 50))):
+ 91 pos = 'Rock'
+ 92 elif f1 >= 50 and (f2 >= 50 and (f3 >= 50 and (f4 >= 50 and f5 >= 50))):
+ 93 pos = 'Stone'
+ 94 elif f1 >= 50 and (f2 < 50 and (f3 >= 50 and (f4 >= 50 and f5 >= 50))):
+ 95 pos = '1'
+ 96 elif f1 >= 50 and (f2 < 50 and (f3 < 50 and (f4 < 50 and f5 >= 50))):
+ 97 pos = '3'
+ 98 elif f1 >= 50 and (f2 < 50 and (f3 < 50 and (f4 < 50 and f5 < 50))):
+ 99 pos = '4'
+ 100 elif f1 < 50 and (f2 < 50 and (f3 < 50 and (f4 < 50 and f5 < 50))):
+ 101 pos = '5'
+ 102 elif f1 >= 50 and (f2 < 50 and (f3 < 50 and (f4 >= 50 and f5 >= 50))):
+ 103 pos = '2'
+ 104 return pos
+ 105
+ 106def color(value):
+ 107 """
+ 108 Converts a color value between hexadecimal and RGB tuple formats.
+ 109
+ 110 Parameters:
+ 111 value (str or tuple): The color value to convert.
+ 112 - If a string, it should be a hexadecimal color code (e.g., '#FF0000').
+ 113 - If a tuple, it should be an RGB tuple (e.g., (255, 0, 0)).
+ 114
+ 115 Returns:
+ 116 str or tuple: The converted color value.
+ 117 - If the input is a tuple, the output is a hexadecimal color string.
+ 118 - If the input is a string, the output is an RGB tuple.
+ 119 """
+ 120 digit = list(map(str, range(10))) + list("ABCDEF")
+ 121 value = value.upper()
+ 122 if isinstance(value, tuple):
+ 123 string = '#'
+ 124 for i in value:
+ 125 a1 = i // 16
+ 126 a2 = i % 16
+ 127 string += digit[a1] + digit[a2]
+ 128 return string
+ 129 elif isinstance(value, str):
+ 130 a1 = digit.index(value[1]) * 16 + digit.index(value[2])
+ 131 a2 = digit.index(value[3]) * 16 + digit.index(value[4])
+ 132 a3 = digit.index(value[5]) * 16 + digit.index(value[6])
+ 133 return (a3, a2, a1)
+ 134
+ 135class XGOEDU():
+ 136 """
+ 137 A class for controlling and interacting with the XGO robot's educational features, including the LCD display, camera, and various AI functions.
+ 138 """
+ 139 def __init__(self):
+ 140 """
+ 141 Initializes the XGOEDU object, setting up the LCD display, GPIO pins, and various attributes.
+ 142 """
+ 143 self.display = LCD_2inch.LCD_2inch()
+ 144 self.display.Init()
+ 145 self.display.clear()
+ 146 self.splash = Image.new("RGB", (320, 240), "black")
+ 147 self.display.ShowImage(self.splash)
+ 148 self.draw = ImageDraw.Draw(self.splash)
+ 149 self.font = ImageFont.truetype("/home/pi/model/msyh.ttc", 15)
+ 150 self.key1 = 17
+ 151 self.key2 = 22
+ 152 self.key3 = 23
+ 153 self.key4 = 24
+ 154 self.cap = None
+ 155 self.hand = None
+ 156 self.yolo = None
+ 157 self.face = None
+ 158 self.face_classifier = None
+ 159 self.classifier = None
+ 160 self.agesexmark = None
+ 161 self.camera_still = False
+ 162 GPIO.setup(self.key1, GPIO.IN, GPIO.PUD_UP)
+ 163 GPIO.setup(self.key2, GPIO.IN, GPIO.PUD_UP)
+ 164 GPIO.setup(self.key3, GPIO.IN, GPIO.PUD_UP)
+ 165 GPIO.setup(self.key4, GPIO.IN, GPIO.PUD_UP)
+ 166
+ 167 def open_camera(self):
+ 168 """
+ 169 Opens the camera if it's not already open.
+ 170 """
+ 171 if self.cap == None:
+ 172 self.cap = cv2.VideoCapture(0)
+ 173 self.cap.set(3, 320)
+ 174 self.cap.set(4, 240)
+ 175
+ 176 def fetch_token(self):
+ 177 """
+ 178 Fetches an access token from the Baidu AI platform.
+ 179
+ 180 Returns:
+ 181 str: The access token.
+ 182
+ 183 Raises:
+ 184 DemoError: If the API key or secret key is incorrect, or if the scope is not correct.
+ 185 """
+ 186 from urllib.request import urlopen
+ 187 from urllib.request import Request
+ 188 from urllib.error import URLError
+ 189 from urllib.parse import urlencode
+ 190 API_KEY = 'Q4ZgU8bfnhA8HQFnNucBO2ut'
+ 191 SECRET_KEY = 'MqFrVgdwoM8ZuGIp0NIFF7qfYti4mjP6'
+ 192 TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'
+ 193 params = {'grant_type': 'client_credentials',
+ 194 'client_id': API_KEY,
+ 195 'client_secret': SECRET_KEY}
+ 196 post_data = urlencode(params)
+ 197 post_data = post_data.encode('utf-8')
+ 198 req = Request(TOKEN_URL, post_data)
+ 199 try:
+ 200 f = urlopen(req)
+ 201 result_str = f.read()
+ 202 except URLError as err:
+ 203 print('token http response http code : ' + str(err.code))
+ 204 result_str = err.read()
+ 205 result_str = result_str.decode()
+ 206
+ 207 # print(result_str)
+ 208 result = json.loads(result_str)
+ 209 # print(result)
+ 210 SCOPE = False
+ 211 if ('access_token' in result.keys() and 'scope' in result.keys()):
+ 212 # print(SCOPE)
+ 213 if SCOPE and (not SCOPE in result['scope'].split(' ')): # SCOPE = False ignore check
+ 214 raise DemoError('scope is not correct')
+ 215 # print('SUCCESS WITH TOKEN: %s EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
+ 216 return result['access_token']
+ 217 else:
+ 218 raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
+ 219
+ 220 # draw a straight line
+ 221 '''
+ 222 x1, y1 are the initial point coordinates, x2, y2 are the end point coordinates
+ 223 '''
+ 224 def lcd_line(self, x1, y1, x2, y2, color="WHITE", width=2):
+ 225 """
+ 226 Draws a straight line on the LCD display.
+ 227
+ 228 Parameters:
+ 229 x1 (int): The x-coordinate of the starting point.
+ 230 y1 (int): The y-coordinate of the starting point.
+ 231 x2 (int): The x-coordinate of the ending point.
+ 232 y2 (int): The y-coordinate of the ending point.
+ 233 color (str, optional): The color of the line. Defaults to "WHITE".
+ 234 width (int, optional): The width of the line. Defaults to 2.
+ 235 """
+ 236 self.draw.line([(x1, y1), (x2, y2)], fill=color, width=width)
+ 237 self.display.ShowImage(self.splash)
+ 238
+ 239 # draw circle
+ 240 '''
+ 241 x1, y1, x2, y2 are two points defining the given border, angle0 is the initial angle, angle1 is the end angle
+ 242 '''
+ 243 def lcd_circle(self, x1, y1, x2, y2, angle0, angle1, color="WHITE", width=2):
+ 244 """
+ 245 Draws a circle or an arc on the LCD display.
+ 246
+ 247 Parameters:
+ 248 x1 (int): The x-coordinate of the top-left corner of the bounding box.
+ 249 y1 (int): The y-coordinate of the top-left corner of the bounding box.
+ 250 x2 (int): The x-coordinate of the bottom-right corner of the bounding box.
+ 251 y2 (int): The y-coordinate of the bottom-right corner of the bounding box.
+ 252 angle0 (int): The starting angle (in degrees).
+ 253 angle1 (int): The ending angle (in degrees).
+ 254 color (str, optional): The color of the circle/arc. Defaults to "WHITE".
+ 255 width (int, optional): The width of the circle/arc. Defaults to 2.
+ 256 """
+ 257 self.draw.arc((x1, y1, x2, y2), angle0, angle1, fill=color, width=width)
+ 258 self.display.ShowImage(self.splash)
+ 259
+ 260 # draw a circle: draw a circle based on the circle point and radius
+ 261 '''
+ 262 center_x, center_y coordinates of the center point of the circle
+ 263 radius circle radius length mm
+ 264
+ 265 '''
+ 266 def lcd_round(self, center_x, center_y, radius, color, width=2):
+ 267 """
+ 268 Draws a circle on the LCD display using the center point and radius.
+ 269
+ 270 Parameters:
+ 271 center_x (int): The x-coordinate of the center of the circle.
+ 272 center_y (int): The y-coordinate of the center of the circle.
+ 273 radius (int): The radius of the circle.
+ 274 color (str): The color of the circle.
+ 275 width (int, optional): The width of the circle's outline. Defaults to 2.
+ 276 """
+ 277 # Calculate the bounding box for the circle
+ 278 x1 = center_x - radius
+ 279 y1 = center_y - radius
+ 280 x2 = center_x + radius
+ 281 y2 = center_y + radius
+ 282
+ 283 # Call lcd_circle() function to draw the circle
+ 284 self.lcd_circle(x1, y1, x2, y2, 0, 360, color=color, width=width)
+ 285
+ 286 # draw rectangle
+ 287 '''
+ 288 x1, y1 are the initial point coordinates, x2, y2 are the diagonal end point coordinates
+ 289 '''
+ 290 def lcd_rectangle(self, x1, y1, x2, y2, fill=None, outline="WHITE", width=2):
+ 291 """
+ 292 Draws a rectangle on the LCD display.
+ 293
+ 294 Parameters:
+ 295 x1 (int): The x-coordinate of the top-left corner.
+ 296 y1 (int): The y-coordinate of the top-left corner.
+ 297 x2 (int): The x-coordinate of the bottom-right corner.
+ 298 y2 (int): The y-coordinate of the bottom-right corner.
+ 299 fill (str, optional): The fill color of the rectangle. Defaults to None.
+ 300 outline (str, optional): The outline color of the rectangle. Defaults to "WHITE".
+ 301 width (int, optional): The width of the rectangle's outline. Defaults to 2.
+ 302 """
+ 303 self.draw.rectangle((x1, y1, x2, y2), fill=fill, outline=outline, width=width)
+ 304 self.display.ShowImage(self.splash)
+ 305
+ 306 # clear the screen
+ 307 def lcd_clear(self):
+ 308 """
+ 309 Clears the LCD display.
+ 310 """
+ 311 self.splash = Image.new("RGB", (320, 240), "black")
+ 312 self.draw = ImageDraw.Draw(self.splash)
+ 313 self.display.ShowImage(self.splash)
+ 314
+ 315 # show picture
+ 316 '''
+ 317 The size of the picture is 320*240, jpg format
+ 318 '''
+ 319 def lcd_picture(self, filename, x=0, y=0):
+ 320 """
+ 321 Displays an image on the LCD display.
+ 322
+ 323 Parameters:
+ 324 filename (str): The name of the image file (must be in the /home/pi/xgoPictures/ directory).
+ 325 x (int, optional): The x-coordinate of the top-left corner where the image will be displayed. Defaults to 0.
+ 326 y (int, optional): The y-coordinate of the top-left corner where the image will be displayed. Defaults to 0.
+ 327 """
+ 328 path = "/home/pi/xgoPictures/"
+ 329 image = Image.open(path + filename)
+ 330 self.splash.paste(image, (x, y))
+ 331 self.display.ShowImage(self.splash)
+ 332
+ 333 # display text
+ 334 '''
+ 335 x1, y1 are the initial point coordinates, content is the content
+ 336 '''
+ 337 def lcd_text(self, x, y, content, color="WHITE", fontsize=15):
+ 338 """
+ 339 Displays text on the LCD display.
+ 340
+ 341 Parameters:
+ 342 x (int): The x-coordinate of the top-left corner of the text.
+ 343 y (int): The y-coordinate of the top-left corner of the text.
+ 344 content (str): The text to display.
+ 345 color (str, optional): The color of the text. Defaults to "WHITE".
+ 346 fontsize (int, optional): The font size of the text. Defaults to 15.
+ 347 """
+ 348 if fontsize != 15:
+ 349 self.font = ImageFont.truetype("/home/pi/model/msyh.ttc", fontsize)
+ 350 self.draw.text((x, y), content, fill=color, font=self.font)
+ 351 self.display.ShowImage(self.splash)
+ 352
+ 353 # streaming display all text
+ 354 '''
+ 355 x1, y1 are the initial point coordinates, content is the content
+ 356 Automatically wrap when encountering a carriage return character, wrap when encountering the edge, and automatically clear the screen when a page is full, 2, 2 continue to display
+ 357 '''
+ 358 def display_text_on_screen(self, content, color, start_x=2, start_y=2, font_size=20, screen_width=320,
+ 359 screen_height=240):
+ 360 """
+ 361 Displays text on the screen with automatic wrapping and page breaks.
+ 362
+ 363 Parameters:
+ 364 content (str): The text content to display.
+ 365 color (str): The color of the text.
+ 366 start_x (int, optional): The x-coordinate of the starting position. Defaults to 2.
+ 367 start_y (int, optional): The y-coordinate of the starting position. Defaults to 2.
+ 368 font_size (int, optional): The font size. Defaults to 20.
+ 369 screen_width (int, optional): The width of the screen. Defaults to 320.
+ 370 screen_height (int, optional): The height of the screen. Defaults to 240.
+ 371 """
+ 372 # Calculate the number of characters that can be displayed per line and the number of lines
+ 373 char_width = font_size + 1 # // 2
+ 374 chars_per_line = screen_width // char_width
+ 375 lines = screen_height // char_width
+ 376
+ 377 # Split the content into a list of individual characters
+ 378 chars = list(content)
+ 379
+ 380 # Handle newline characters
+ 381 line_break_indices = [i for i, char in enumerate(chars) if char == '\n']
+ 382
+ 383 # Calculate the total number of lines and pages
+ 384 total_lines = len(chars) // chars_per_line + 1
+ 385 total_pages = (total_lines - 1 + len(line_break_indices)) // lines + 1
+ 386
+ 387 # Clear the screen
+ 388 self.display.clear()
+ 389
+ 390 # Display the text line by line
+ 391 current_page = 1
+ 392 current_line = 1
+ 393 current_char = 0
+ 394
+ 395 while current_page <= total_pages or current_char < len(chars):
+ 396 self.display.clear()
+ 397 # Calculate the number of lines to display on the current page
+ 398 if current_page < total_pages or current_char < len(chars):
+ 399 lines_to_display = lines
+ 400 else:
+ 401 lines_to_display = (total_lines - 1) % lines + 1
+ 402
+ 403 current_line = 1
+ 404 # Display the content of the current page
+ 405 for line in range(lines_to_display):
+ 406 current_x = start_x
+ 407 current_y = start_y + current_line * char_width # font_size
+ 408 current_line += 1
+ 409 if current_line >= lines:
+ 410 break
+ 411
+ 412 # Show the text of the current line
+ 413 for _ in range(chars_per_line):
+ 414 # Check if all characters have been displayed
+ 415 if current_char >= len(chars):
+ 416 break
+ 417
+ 418 char = chars[current_char]
+ 419 if char == '\n':
+ 420 current_x = start_x
+ 421 current_y = start_y + current_line * char_width # font_size
+ 422 current_line += 1
+ 423
+ 424 self.lcd_text(current_x, current_y, char, color, font_size)
+ 425 current_char += 1
+ 426 break # continue
+ 427
+ 428 self.lcd_text(current_x, current_y, char, color, font_size)
+ 429 current_x += char_width
+ 430 current_char += 1
+ 431
+ 432 # Check if all characters have been displayed
+ 433 if current_char >= len(chars):
+ 434 break
+ 435
+ 436 # Update current page and current line
+ 437 current_page += 1
+ 438 current_line += lines_to_display
+ 439
+ 440 # Wait for display time or manually trigger page turning
+ 441 # Here you can add appropriate delay code or a mechanism to trigger page turning as needed
+ 442
+ 443 # If the content exceeds one screen, clear the screen
+ 444 # if total_lines > lines:
+ 445 if current_page < total_pages:
+ 446 self.display.clear()
+ 447
+ 448 # key_value
+ 449 '''
+ 450 a upper left button
+ 451 b upper right button
+ 452 c lower left button
+ 453 d lower right button
+ 454 Return value 0 not pressed, 1 pressed
+ 455 '''
+ 456 def xgoButton(self, button):
+ 457 """
+ 458 Checks the state of a button.
+ 459
+ 460 Parameters:
+ 461 button (str): The button to check ('a', 'b', 'c', or 'd').
+ 462
+ 463 Returns:
+ 464 bool: True if the button is pressed, False otherwise.
+ 465 """
+ 466 if button == "a":
+ 467 last_state_a = GPIO.input(self.key1)
+ 468 time.sleep(0.02)
+ 469 return (not last_state_a)
+ 470 elif button == "b":
+ 471 last_state_b = GPIO.input(self.key2)
+ 472 time.sleep(0.02)
+ 473 return (not last_state_b)
+ 474 elif button == "c":
+ 475 last_state_c = GPIO.input(self.key3)
+ 476 time.sleep(0.02)
+ 477 return (not last_state_c)
+ 478 elif button == "d":
+ 479 last_state_d = GPIO.input(self.key4)
+ 480 time.sleep(0.02)
+ 481 return (not last_state_d)
+ 482
+ 483 # speaker
+ 484 '''
+ 485 filename file name string
+ 486 '''
+ 487 def xgoSpeaker(self, filename):
+ 488 """
+ 489 Plays an audio file using mplayer.
+ 490
+ 491 Parameters:
+ 492 filename (str): The name of the audio file (must be in the /home/pi/xgoMusic/ directory).
+ 493 """
+ 494 path = "/home/pi/xgoMusic/"
+ 495 os.system("mplayer" + " " + path + filename)
+ 496
+ 497 def xgoVideoAudio(self, filename):
+ 498 """
+ 499 Plays the audio track of a video file using mplayer.
+ 500
+ 501 Parameters:
+ 502 filename (str): The name of the video file (must be in the /home/pi/xgoVideos/ directory).
+ 503 """
+ 504 path = "/home/pi/xgoVideos/"
+ 505 time.sleep(0.2) # Synchronize sound and picture speed, but the timeline may not be synchronized. Adjust here.
+ 506 cmd = "sudo mplayer " + path + filename + " -novideo"
+ 507 os.system(cmd)
+ 508
+ 509 def xgoVideo(self, filename):
+ 510 """
+ 511 Plays a video file on the LCD display while simultaneously playing its audio track in a separate thread.
+ 512
+ 513 Parameters:
+ 514 filename (str): The name of the video file (must be in the /home/pi/xgoVideos/ directory).
+ 515 """
+ 516 path = "/home/pi/xgoVideos/"
+ 517 x = threading.Thread(target=self.xgoVideoAudio, args=(filename,))
+ 518 x.start()
+ 519 global counter
+ 520 video = cv2.VideoCapture(path + filename)
+ 521 print(path + filename)
+ 522 fps = video.get(cv2.CAP_PROP_FPS)
+ 523 print(fps)
+ 524 init_time = time.time()
+ 525 counter = 0
+ 526 while True:
+ 527 grabbed, dst = video.read()
+ 528 try:
+ 529 b, g, r = cv2.split(dst)
+ 530 dst = cv2.merge((r, g, b))
+ 531 except:
+ 532 pass
+ 533 try:
+ 534 imgok = Image.fromarray(dst)
+ 535 except:
+ 536 break
+ 537 self.display.ShowImage(imgok)
+ 538 # Force frame rate. It is recommended that the frame rate should not exceed 20 frames, otherwise the display will not keep up, but 20 frames often have problems, so it is recommended to directly use 15 frames.
+ 539 counter += 1
+ 540 ctime = time.time() - init_time
+ 541 if ctime != 0:
+ 542 qtime = counter / fps - ctime
+ 543 # print(qtime)
+ 544 if qtime > 0:
+ 545 time.sleep(qtime)
+ 546 if not grabbed:
+ 547 break
+ 548
+ 549 # audio_record
+ 550 '''
+ 551 filename file name string
+ 552 seconds recording time S string
+ 553 '''
+ 554 def xgoAudioRecord(self, filename="record", seconds=5):
+ 555 """
+ 556 Records audio for a specified duration and saves it as a WAV file.
+ 557
+ 558 Parameters:
+ 559 filename (str, optional): The name of the output audio file. Defaults to "record".
+ 560 seconds (int, optional): The recording duration in seconds. Defaults to 5.
+ 561 """
+ 562 path = "/home/pi/xgoMusic/"
+ 563 command1 = "sudo arecord -d"
+ 564 command2 = "-f S32_LE -r 8000 -c 1 -t wav"
+ 565 cmd = command1 + " " + str(seconds) + " " + command2 + " " + path + filename + ".wav"
+ 566 print(cmd)
+ 567 os.system(cmd)
+ 568
+ 569 def xgoCamera(self, switch):
+ 570 """
+ 571 Turns the camera on or off and displays the camera feed on the LCD.
+ 572
+ 573 Parameters:
+ 574 switch (bool): True to turn the camera on, False to turn it off.
+ 575 """
+ 576 global camera_still
+ 577 if switch:
+ 578 self.open_camera()
+ 579 self.camera_still = True
+ 580 t = threading.Thread(target=self.camera_mode)
+ 581 t.start()
+ 582 else:
+ 583 self.camera_still = False
+ 584 time.sleep(0.5)
+ 585 splash = Image.new("RGB", (320, 240), "black")
+ 586 self.display.ShowImage(splash)
+ 587
+ 588 def camera_mode(self):
+ 589 """
+ 590 Continuously captures and displays the camera feed on the LCD until camera_still is set to False.
+ 591 """
+ 592 self.camera_still = True
+ 593 while 1:
+ 594 success, image = self.cap.read()
+ 595 b, g, r = cv2.split(image)
+ 596 image = cv2.merge((r, g, b))
+ 597 image = cv2.flip(image, 1)
+ 598 imgok = Image.fromarray(image)
+ 599 self.display.ShowImage(imgok)
+ 600 if not self.camera_still:
+ 601 break
+ 602
+ 603 def xgoVideoRecord(self, filename="record", seconds=5):
+ 604 """
+ 605 Records a video for a specified duration, displays it on the LCD, and saves it as an MP4 file.
+ 606
+ 607 Parameters:
+ 608 filename (str, optional): The name of the output video file. Defaults to "record".
+ 609 seconds (int, optional): The recording duration in seconds. Defaults to 5.
+ 610 """
+ 611 path = "/home/pi/xgoVideos/"
+ 612 self.camera_still = False
+ 613 time.sleep(0.6)
+ 614 self.open_camera()
+ 615 FPS = 15
+ 616 fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ 617 width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ 618 height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ 619 videoWrite = cv2.VideoWriter(path + filename + '.mp4', fourcc, FPS, (width, height))
+ 620 starttime = time.time()
+ 621 while 1:
+ 622 print('recording...')
+ 623 ret, image = self.cap.read()
+ 624 if not ret:
+ 625 break
+ 626 videoWrite.write(image)
+ 627 b, g, r = cv2.split(image)
+ 628 image = cv2.merge((r, g, b))
+ 629 image = cv2.flip(image, 1)
+ 630 imgok = Image.fromarray(image)
+ 631 self.display.ShowImage(imgok)
+ 632 if time.time() - starttime > seconds:
+ 633 break
+ 634 print('recording done')
+ 635 self.xgoCamera(True)
+ 636 videoWrite.release()
+ 637
+ 638 def xgoTakePhoto(self, filename="photo"):
+ 639 """
+ 640 Takes a photo, displays it on the LCD, and saves it as a JPG file.
+ 641
+ 642 Parameters:
+ 643 filename (str, optional): The name of the output image file. Defaults to "photo".
+ 644 """
+ 645 path = "/home/pi/xgoPictures/"
+ 646 self.camera_still = False
+ 647 time.sleep(0.6)
+ 648 self.open_camera()
+ 649 success, image = self.cap.read()
+ 650 cv2.imwrite(path + filename + '.jpg', image)
+ 651 if not success:
+ 652 print("Ignoring empty camera frame")
+ 653 b, g, r = cv2.split(image)
+ 654 image = cv2.merge((r, g, b))
+ 655 image = cv2.flip(image, 1)
+ 656 imgok = Image.fromarray(image)
+ 657 self.display.ShowImage(imgok)
+ 658 print('photo writed!')
+ 659 time.sleep(0.7)
+ 660 self.xgoCamera(True)
+ 661
+ 662 '''
+ 663 Turn on the camera, take a photo with the A button, record a video with the B button, and exit with the C button
+ 664 '''
+ 665 def camera(self, filename="camera"):
+ 666 """
+ 667 Activates the camera and allows the user to take photos, record videos, or exit using the A, B, and C buttons, respectively.
+ 668
+ 669 Parameters:
+ 670 filename (str, optional): The base filename for photos and videos. Defaults to "camera".
+ 671 """
+ 672 font = ImageFont.truetype("/home/pi/model/msyh.ttc", 20)
+ 673 self.open_camera()
+ 674 while True:
+ 675 success, image = self.cap.read()
+ 676 # cv2.imwrite('/home/pi/xgoEdu/camera/file.jpg',image)
+ 677 if not success:
+ 678 print("Ignoring empty camera frame")
+ 679 continue
+ 680 # cv2.imshow('frame',image)
+ 681 b, g, r = cv2.split(image)
+ 682 image = cv2.merge((r, g, b))
+ 683 image = cv2.flip(image, 1)
+ 684 imgok = Image.fromarray(image)
+ 685 self.display.ShowImage(imgok)
+ 686 if cv2.waitKey(5) & 0xFF == 27:
+ 687 XGOEDU.lcd_clear(self)
+ 688 time.sleep(0.5)
+ 689 break
+ 690 if XGOEDU.xgoButton(self, "a"):
+ 691 draw = ImageDraw.Draw(imgok)
+ 692 cv2.imwrite(filename + '.jpg', image)
+ 693 print('photo writed!')
+ 694 draw.text((5, 5), filename + '.jpg saved!', fill=(255, 0, 0), font=font)
+ 695 self.display.ShowImage(imgok)
+ 696 time.sleep(1)
+ 697 if XGOEDU.xgoButton(self, "b"):
+ 698 FPS = 15
+ 699 fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ 700 width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ 701 height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ 702 videoWrite = cv2.VideoWriter(filename + '.mp4', fourcc, FPS, (width, height))
+ 703 while 1:
+ 704 ret, image = self.cap.read()
+ 705 if not ret:
+ 706 break
+ 707 videoWrite.write(image)
+ 708 b, g, r = cv2.split(image)
+ 709 image = cv2.merge((r, g, b))
+ 710 image = cv2.flip(image, 1)
+ 711 imgok = Image.fromarray(image)
+ 712 draw = ImageDraw.Draw(imgok)
+ 713 draw.text((5, 5), 'recording', fill=(255, 0, 0), font=font)
+ 714 self.display.ShowImage(imgok)
+ 715 if cv2.waitKey(33) & 0xFF == ord('q'):
+ 716 break
+ 717 if XGOEDU.xgoButton(self, "b"):
+ 718 break
+ 719 time.sleep(1)
+ 720 videoWrite.release()
+ 721 if XGOEDU.xgoButton(self, "c"):
+ 722 XGOEDU.lcd_clear(self)
+ 723 time.sleep(0.5)
+ 724 break
+ 725
+ 726 '''
+ 727 Skeletal recognition
+ 728 '''
+ 729 def posenetRecognition(self, target="camera"):
+ 730 """
+ 731 Performs skeletal (pose) recognition on an image or video frame and displays the results on the LCD.
+ 732
+ 733 Parameters:
+ 734 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+ 735
+ 736 Returns:
+ 737 list or None: A list of angles between key body joints if successful, or None if no pose is detected.
+ 738 """
+ 739 import mediapipe as mp
+ 740 mp_pose = mp.solutions.pose
+ 741 ges = ''
+ 742 mp_drawing = mp.solutions.drawing_utils
+ 743 mp_drawing_styles = mp.solutions.drawing_styles
+ 744 mp_holistic = mp.solutions.holistic
+ 745 joint_list = [[24, 26, 28], [23, 25, 27], [14, 12, 24], [13, 11, 23]] # leg&arm
+ 746 if target == "camera":
+ 747 self.open_camera()
+ 748 success, image = self.cap.read()
+ 749 else:
+ 750 image = np.array(Image.open(target))
+ 751
+ 752 with mp_pose.Pose(
+ 753 min_detection_confidence=0.5,
+ 754 min_tracking_confidence=0.5) as pose:
+ 755
+ 756 image.flags.writeable = False
+ 757 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ 758 results = pose.process(image)
+ 759
+ 760 # Draw the pose annotation on the image.
+ 761 image.flags.writeable = True
+ 762 image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+ 763 mp_drawing.draw_landmarks(
+ 764 image,
+ 765 results.pose_landmarks,
+ 766 mp_pose.POSE_CONNECTIONS,
+ 767 landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
+ 768 # Flip the image horizontally for a selfie-view display.
+ 769
+ 770 if results.pose_landmarks:
+ 771 RHL = results.pose_landmarks
+ 772 angellist = []
+ 773 for joint in joint_list:
+ 774 a = np.array([RHL.landmark[joint[0]].x, RHL.landmark[joint[0]].y])
+ 775 b = np.array([RHL.landmark[joint[1]].x, RHL.landmark[joint[1]].y])
+ 776 c = np.array([RHL.landmark[joint[2]].x, RHL.landmark[joint[2]].y])
+ 777 radians_fingers = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
+ 778 angle = np.abs(radians_fingers * 180.0 / np.pi)
+ 779 if angle > 180.0:
+ 780 angle = 360 - angle
+ 781 # cv2.putText(image, str(round(angle, 2)), tuple(np.multiply(b, [640, 480]).astype(int)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
+ 782 angellist.append(angle)
+ 783 else:
+ 784 angellist = []
+ 785 print(angellist)
+ 786 b, g, r = cv2.split(image)
+ 787 image = cv2.merge((r, g, b))
+ 788 image = cv2.flip(image, 1)
+ 789 try:
+ 790 ges = str(int(angellist[0])) + '|' + str(int(angellist[1])) + '|' + str(int(angellist[2])) + '|' + str(
+ 791 int(angellist[3]))
+ 792 except:
+ 793 ges = ' '
+ 794 cv2.putText(image, ges, (10, 220), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
+ 795 imgok = Image.fromarray(image)
+ 796 self.display.ShowImage(imgok)
+ 797
+ 798 # datas = self.hand.run(image)
+ 799 # b,g,r = cv2.split(image)
+ 800 # image = cv2.merge((r,g,b))
+ 801 # #image = cv2.flip(image,1)
+ 802 # for data in datas:
+ 803 # rect = data['rect']
+ 804 # right_left = data['right_left']
+ 805 # center = data['center']
+ 806 # dlandmark = data['dlandmark']
+ 807 # hand_angle = data['hand_angle']
+ 808 # XGOEDU.rectangle(self,image,rect,"#33cc00",2)
+ 809 # #XGOEDU.text(self,image,right_left,center,2,"#cc0000",5)
+ 810 # if right_left == 'L':
+ 811 # XGOEDU.text(self,image,hand_pos(hand_angle),(180,80),1.5,"#33cc00",2)
+ 812 # elif right_left == 'R':
+ 813 # XGOEDU.text(self,image,hand_pos(hand_angle),(50,80),1.5,"#ff0000",2)
+ 814 # ges = hand_pos(hand_angle)
+ 815 # for i in dlandmark:
+ 816 # XGOEDU.circle(self,image,i,3,"#ff9900",-1)
+ 817 # imgok = Image.fromarray(image)
+ 818 # self.display.ShowImage(imgok)
+ 819 if angellist == []:
+ 820 return None
+ 821 else:
+ 822 return angellist
+ 823
+ 824 '''
+ 825 Gesture recognition
+ 826 '''
+ 827 def gestureRecognition(self, target="camera"):
+ 828 """
+ 829 Performs hand gesture recognition on an image or video frame and displays the results on the LCD.
+ 830
+ 831 Parameters:
+ 832 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+ 833
+ 834 Returns:
+ 835 tuple or None: A tuple containing the recognized gesture (str) and the center coordinates of the hand if successful, or None if no gesture is recognized.
+ 836 """
+ 837 ges = ''
+ 838 if self.hand == None:
+ 839 self.hand = hands(0, 2, 0.6, 0.5)
+ 840 if target == "camera":
+ 841 self.open_camera()
+ 842 success, image = self.cap.read()
+ 843 else:
+ 844 image = np.array(Image.open(target))
+ 845 image = cv2.flip(image, 1)
+ 846 datas = self.hand.run(image)
+ 847 b, g, r = cv2.split(image)
+ 848 image = cv2.merge((r, g, b))
+ 849 for data in datas:
+ 850 rect = data['rect']
+ 851 right_left = data['right_left']
+ 852 center = data['center']
+ 853 dlandmark = data['dlandmark']
+ 854 hand_angle = data['hand_angle']
+ 855 XGOEDU.rectangle(self, image, rect, "#33cc00", 2)
+ 856 # XGOEDU.text(self,image,right_left,center,2,"#cc0000",5)
+ 857 if right_left == 'L':
+ 858 XGOEDU.text(self, image, hand_pos(hand_angle), (180, 80), 1.5, "#33cc00", 2)
+ 859 elif right_left == 'R':
+ 860 XGOEDU.text(self, image, hand_pos(hand_angle), (50, 80), 1.5, "#ff0000", 2)
+ 861 ges = hand_pos(hand_angle)
+ 862 for i in dlandmark:
+ 863 XGOEDU.circle(self, image, i, 3, "#ff9900", -1)
+ 864 imgok = Image.fromarray(image)
+ 865 self.display.ShowImage(imgok)
+ 866 if ges == '':
+ 867 return None
+ 868 else:
+ 869 return (ges, center)
+ 870
+ 871 '''
+ 872 yolo
+ 873 '''
+ 874 def yoloFast(self, target="camera"):
+ 875 """
+ 876 Performs object detection using YOLO on an image or video frame and displays the results on the LCD.
+ 877
+ 878 Parameters:
+ 879 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+ 880
+ 881 Returns:
+ 882 tuple or None: A tuple containing the detected class (str) and the bounding box coordinates if successful, or None if no object is detected.
+ 883 """
+ 884 ret = ''
+ 885 self.open_camera()
+ 886 if self.yolo == None:
+ 887 self.yolo = yoloXgo('/home/pi/model/Model.onnx',
+ 888 ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat',
+ 889 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
+ 890 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+ 891 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 892 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+ 893 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 894 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
+ 895 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse',
+ 896 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 897 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
+ 898 'toothbrush'],
+ 899 [352, 352], 0.66)
+ 900 if target == "camera":
+ 901 self.open_camera()
+ 902 success, image = self.cap.read()
+ 903 else:
+ 904 image = np.array(Image.open(target))
+ 905 datas = self.yolo.run(image)
+ 906 b, g, r = cv2.split(image)
+ 907 image = cv2.merge((r, g, b))
+ 908 image = cv2.flip(image, 1)
+ 909 if datas:
+ 910 for data in datas:
+ 911 XGOEDU.rectangle(self, image, data['xywh'], "#33cc00", 2)
+ 912 xy = (data['xywh'][0], data['xywh'][1])
+ 913 XGOEDU.text(self, image, data['classes'], xy, 1, "#ff0000", 2)
+ 914 value_yolo = data['classes']
+ 915 ret = (value_yolo, xy)
+ 916 imgok = Image.fromarray(image)
+ 917 self.display.ShowImage(imgok)
+ 918 if ret == '':
+ 919 return None
+ 920 else:
+ 921 return ret
+ 922
+ 923 '''
+ 924 Face detection (coordinate points)
+ 925 '''
+ 926 def face_detect(self, target="camera"):
+ 927 """
+ 928 Performs face detection (including facial landmarks) on an image or video frame and displays the results on the LCD.
+ 929
+ 930 Parameters:
+ 931 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+ 932
+ 933 Returns:
+ 934 list or None: A list of bounding box coordinates [x, y, w, h] of detected faces if successful, or None if no face is detected.
+ 935 """
+ 936 ret = ''
+ 937 if self.face == None:
+ 938 self.face = face_detection(0.7)
+ 939 if target == "camera":
+ 940 self.open_camera()
+ 941 success, image = self.cap.read()
+ 942 else:
+ 943 image = np.array(Image.open(target))
+ 944 b, g, r = cv2.split(image)
+ 945 image = cv2.merge((r, g, b))
+ 946 image = cv2.flip(image, 1)
+ 947 datas = self.face.run(image)
+ 948 for data in datas:
+ 949 lefteye = str(data['left_eye'])
+ 950 righteye = str(data['right_eye'])
+ 951 nose = str(data['nose'])
+ 952 mouth = str(data['mouth'])
+ 953 leftear = str(data['left_ear'])
+ 954 rightear = str(data['right_ear'])
+ 955 cv2.putText(image, 'lefteye', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
+ 956 cv2.putText(image, lefteye, (100, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
+ 957 cv2.putText(image, 'righteye', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+ 958 cv2.putText(image, righteye, (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+ 959 cv2.putText(image, 'nose', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+ 960 cv2.putText(image, nose, (100, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+ 961 cv2.putText(image, 'leftear', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
+ 962 cv2.putText(image, leftear, (100, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
+ 963 cv2.putText(image, 'rightear', (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 0, 200), 2)
+ 964 cv2.putText(image, rightear, (100, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 0, 200), 2)
+ 965 XGOEDU.rectangle(self, image, data['rect'], "#33cc00", 2)
+ 966 ret = data['rect']
+ 967 imgok = Image.fromarray(image)
+ 968 self.display.ShowImage(imgok)
+ 969 if ret == '':
+ 970 return None
+ 971 else:
+ 972 return ret
+ 973
+ 974 '''
+ 975 Emotion recognition
+ 976 '''
+ 977 def emotion(self, target="camera"):
+ 978 """
+ 979 Performs emotion recognition on an image or video frame and displays the results on the LCD.
+ 980
+ 981 Parameters:
+ 982 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+ 983
+ 984 Returns:
+ 985 tuple or None: A tuple containing the detected emotion (str) and the bounding box coordinates (x, y) of the face if successful, or None if no face or emotion is detected.
+ 986 """
+ 987 ret = ''
+ 988 if self.classifier == None:
+ 989 from keras.models import load_model
+ 990 self.face_classifier = cv2.CascadeClassifier('/home/pi/model/haarcascade_frontalface_default.xml')
+ 991 self.classifier = load_model('/home/pi/model/EmotionDetectionModel.h5')
+ 992 class_labels = ['Angry', 'Happy', 'Neutral', 'Sad', 'Surprise']
+ 993 if target == "camera":
+ 994 self.open_camera()
+ 995 success, image = self.cap.read()
+ 996 else:
+ 997 image = np.array(Image.open(target))
+ 998 labels = []
+ 999 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+1000 faces = self.face_classifier.detectMultiScale(gray, 1.3, 5)
+1001 label = ''
+1002 for (x, y, w, h) in faces:
+1003 cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
+1004 roi_gray = gray[y:y + h, x:x + w]
+1005 roi_gray = cv2.resize(roi_gray, (48, 48), interpolation=cv2.INTER_AREA)
+1006 if np.sum([roi_gray]) != 0:
+1007 from tensorflow.keras.utils import img_to_array
+1008 roi = roi_gray.astype('float') / 255.0
+1009 roi = img_to_array(roi)
+1010 roi = np.expand_dims(roi, axis=0)
+1011
+1012 preds = self.classifier.predict(roi)[0]
+1013 label = class_labels[preds.argmax()]
+1014 ret = (label, (x, y))
+1015 else:
+1016 pass
+1017 b, g, r = cv2.split(image)
+1018 image = cv2.merge((r, g, b))
+1019 image = cv2.flip(image, 1)
+1020 try:
+1021 cv2.putText(image, label, label_position, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
+1022 except:
+1023 pass
+1024 imgok = Image.fromarray(image)
+1025 self.display.ShowImage(imgok)
+1026 if ret == '':
+1027 return None
+1028 else:
+1029 return ret
+1030
+1031 '''
+1032 Age and gender detection
+1033 '''
+1034 def agesex(self, target="camera"):
+1035 """
+1036 Performs age and gender detection on an image or video frame and displays the results on the LCD.
+1037
+1038 Parameters:
+1039 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+1040
+1041 Returns:
+1042 tuple or None: A tuple containing the detected gender (str), age (str), and the bounding box coordinates (x, y) of the face if successful, or None if no face, gender, or age is detected.
+1043 """
+1044 ret = ''
+1045 MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
+1046 ageList = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
+1047 genderList = ['Male', 'Female']
+1048 padding = 20
+1049 if target == "camera":
+1050 self.open_camera()
+1051 success, image = self.cap.read()
+1052 else:
+1053 image = np.array(Image.open(target))
+1054 if self.agesexmark == None:
+1055 faceProto = "/home/pi/model/opencv_face_detector.pbtxt"
+1056 faceModel = "/home/pi/model/opencv_face_detector_uint8.pb"
+1057 ageProto = "/home/pi/model/age_deploy.prototxt"
+1058 ageModel = "/home/pi/model/age_net.caffemodel"
+1059 genderProto = "/home/pi/model/gender_deploy.prototxt"
+1060 genderModel = "/home/pi/model/gender_net.caffemodel"
+1061 self.ageNet = cv2.dnn.readNet(ageModel, ageProto)
+1062 self.genderNet = cv2.dnn.readNet(genderModel, genderProto)
+1063 self.faceNet = cv2.dnn.readNet(faceModel, faceProto)
+1064 self.agesexmark = True
+1065
+1066 image = cv2.flip(image, 1)
+1067 frameFace, bboxes = getFaceBox(self.faceNet, image)
+1068 gender = ''
+1069 age = ''
+1070 for bbox in bboxes:
+1071 face = image[max(0, bbox[1] - padding):min(bbox[3] + padding, image.shape[0] - 1),
+1072 max(0, bbox[0] - padding):min(bbox[2] + padding, image.shape[1] - 1)]
+1073 blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
+1074 self.genderNet.setInput(blob)
+1075 genderPreds = self.genderNet.forward()
+1076 gender = genderList[genderPreds[0].argmax()]
+1077 self.ageNet.setInput(blob)
+1078 agePreds = self.ageNet.forward()
+1079 age = ageList[agePreds[0].argmax()]
+1080 label = "{},{}".format(gender, age)
+1081 cv2.putText(frameFace, label, (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2,
+1082 cv2.LINE_AA)
+1083 ret = (gender, age, (bbox[0], bbox[1]))
+1084 b, g, r = cv2.split(frameFace)
+1085 frameFace = cv2.merge((r, g, b))
+1086 imgok = Image.fromarray(frameFace)
+1087 self.display.ShowImage(imgok)
+1088 if ret == '':
+1089 return None
+1090 else:
+1091 return ret
+1092
+1093 def rectangle(self, frame, z, colors, size):
+1094 """
+1095 Draws a rectangle on a given frame.
+1096
+1097 Parameters:
+1098 frame (numpy.ndarray): The image frame to draw on.
+1099 z (tuple): A tuple of four integers (x, y, w, h) representing the top-left corner, width, and height of the rectangle.
+1100 colors (str): The color of the rectangle in hexadecimal format (e.g., "#FF0000" for red).
+1101 size (int): The thickness of the rectangle's outline.
+1102
+1103 Returns:
+1104 numpy.ndarray: The frame with the rectangle drawn on it.
+1105 """
+1106 frame = cv2.rectangle(frame, (int(z[0]), int(z[1])), (int(z[0] + z[2]), int(z[1] + z[3])), color(colors), size)
+1107 return frame
+1108
+1109 def circle(self, frame, xy, rad, colors, tk):
+1110 """
+1111 Draws a circle on a given frame.
+1112
+1113 Parameters:
+1114 frame (numpy.ndarray): The image frame to draw on.
+1115 xy (tuple): A tuple of two integers (x, y) representing the center coordinates of the circle.
+1116 rad (int): The radius of the circle.
+1117 colors (str): The color of the circle in hexadecimal format.
+1118 tk (int): The thickness of the circle's outline. Use -1 to fill the circle.
+1119
+1120 Returns:
+1121 numpy.ndarray: The frame with the circle drawn on it.
+1122 """
+1123 frame = cv2.circle(frame, xy, rad, color(colors), tk)
+1124 return frame
+1125
+1126 def text(self, frame, text, xy, font_size, colors, size):
+1127 """
+1128 Draws text on a given frame.
+1129
+1130 Parameters:
+1131 frame (numpy.ndarray): The image frame to draw on.
+1132 text (str): The text to be drawn.
+1133 xy (tuple): A tuple of two integers (x, y) representing the top-left corner of the text.
+1134 font_size (float): The font size of the text.
+1135 colors (str): The color of the text in hexadecimal format.
+1136 size (int): The thickness of the text.
+1137
+1138 Returns:
+1139 numpy.ndarray: The frame with the text drawn on it.
+1140 """
+1141 frame = cv2.putText(frame, text, xy, cv2.FONT_HERSHEY_SIMPLEX, font_size, color(colors), size)
+1142 return frame
+1143
+1144 def SpeechRecognition(self, seconds=3):
+1145 """
+1146 Performs speech recognition using the Baidu ASR API.
+1147
+1148 Parameters:
+1149 seconds (int, optional): The duration of the audio recording in seconds. Defaults to 3.
+1150
+1151 Returns:
+1152 str: The recognized text.
+1153 """
+1154 self.xgoAudioRecord(filename="recog", seconds=seconds)
+1155 from urllib.request import urlopen
+1156 from urllib.request import Request
+1157 from urllib.error import URLError
+1158 from urllib.parse import urlencode
+1159 timer = time.perf_counter
+1160 AUDIO_FILE = 'recog.wav'
+1161 FORMAT = AUDIO_FILE[-3:]
+1162 CUID = '123456PYTHON'
+1163 RATE = 16000
+1164 DEV_PID = 1537
+1165 ASR_URL = 'http://vop.baidu.com/server_api'
+1166 SCOPE = 'audio_voice_assistant_get'
+1167
+1168 token = self.fetch_token()
+1169
+1170 speech_data = []
+1171 path = "/home/pi/xgoMusic/"
+1172 with open(path + AUDIO_FILE, 'rb') as speech_file:
+1173 speech_data = speech_file.read()
+1174
+1175 length = len(speech_data)
+1176 if length == 0:
+1177 raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)
+1178 speech = base64.b64encode(speech_data)
+1179 speech = str(speech, 'utf-8')
+1180 params = {'dev_pid': DEV_PID,
+1181 'format': FORMAT,
+1182 'rate': RATE,
+1183 'token': token,
+1184 'cuid': CUID,
+1185 'channel': 1,
+1186 'speech': speech,
+1187 'len': length
+1188 }
+1189 post_data = json.dumps(params, sort_keys=False)
+1190 req = Request(ASR_URL, post_data.encode('utf-8'))
+1191 req.add_header('Content-Type', 'application/json')
+1192 try:
+1193 begin = timer()
+1194 f = urlopen(req)
+1195 result_str = f.read()
+1196 print("Request time cost %f" % (timer() - begin))
+1197 except URLError as err:
+1198 print('asr http response http code : ' + str(err.code))
+1199 result_str = err.read()
+1200 try:
+1201 result_str = str(result_str, 'utf-8')
+1202 re = json.loads(result_str)
+1203 text = re['result'][0]
+1204 except:
+1205 text = 'error!'
+1206 return text
+1207
+1208 def SpeechSynthesis(self, texts):
+1209 """
+1210 Performs speech synthesis (text-to-speech) using the Baidu TTS API.
+1211
+1212 Parameters:
+1213 texts (str): The text to be synthesized.
+1214 """
+1215 from urllib.request import urlopen
+1216 from urllib.request import Request
+1217 from urllib.error import URLError
+1218 from urllib.parse import urlencode
+1219 from urllib.parse import quote_plus
+1220
+1221 TEXT = texts
+1222 PER = 0
+1223 SPD = 5
+1224 PIT = 5
+1225 VOL = 5
+1226 AUE = 6
+1227 FORMATS = {3: "mp3", 4: "pcm", 5: "pcm", 6: "wav"}
+1228 FORMAT = FORMATS[AUE]
+1229 CUID = "123456PYTHON"
+1230 TTS_URL = 'http://tsn.baidu.com/text2audio'
+1231
+1232 SCOPE = 'audio_tts_post'
+1233
+1234 token = self.fetch_token()
+1235 tex = quote_plus(TEXT)
+1236 print(tex)
+1237 params = {'tok': token, 'tex': tex, 'per': PER, 'spd': SPD, 'pit': PIT, 'vol': VOL, 'aue': AUE, 'cuid': CUID,
+1238 'lan': 'zh', 'ctp': 1}
+1239
+1240 data = urlencode(params)
+1241 print('test on Web Browser' + TTS_URL + '?' + data)
+1242
+1243 req = Request(TTS_URL, data.encode('utf-8'))
+1244 has_error = False
+1245 try:
+1246 f = urlopen(req)
+1247 result_str = f.read()
+1248
+1249 headers = dict((name.lower(), value) for name, value in f.headers.items())
+1250
+1251 has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0)
+1252 except URLError as err:
+1253 print('asr http response http code : ' + str(err.code))
+1254 result_str = err.read()
+1255 has_error = True
+1256
+1257 path = "/home/pi/xgoMusic/"
+1258 save_file = "error.txt" if has_error else 'result.' + FORMAT
+1259 with open(path + save_file, 'wb') as of:
+1260 of.write(result_str)
+1261
+1262 if has_error:
+1263 result_str = str(result_str, 'utf-8')
+1264 print("tts api error:" + result_str)
+1265
+1266 print("result saved as :" + save_file)
+1267
+1268 self.xgoSpeaker("result.wav")
+1269
+1270 def cv2AddChineseText(self, img, text, position, textColor=(0, 255, 0), textSize=30):
+1271 """
+1272 Adds Chinese text to an image using PIL, as OpenCV doesn't support Chinese characters directly.
+1273
+1274 Parameters:
+1275 img (numpy.ndarray): The image to add text to.
+1276 text (str): The Chinese text to add.
+1277 position (tuple): The (x, y) coordinates of the top-left corner of the text.
+1278 textColor (tuple, optional): The RGB color of the text. Defaults to (0, 255, 0) (green).
+1279 textSize (int, optional): The font size. Defaults to 30.
+1280
+1281 Returns:
+1282 numpy.ndarray: The image with the Chinese text added.
+1283 """
+1284 if (isinstance(img, np.ndarray)):
+1285 img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+1286 draw = ImageDraw.Draw(img)
+1287 fontStyle = ImageFont.truetype(
+1288 "/home/pi/model/msyh.ttc", textSize, encoding="utf-8")
+1289 draw.text(position, text, textColor, font=fontStyle)
+1290 return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
+1291
+1292 def QRRecognition(self, target="camera"):
+1293 """
+1294 Performs QR code recognition on an image or video frame and displays the results on the LCD.
+1295
+1296 Parameters:
+1297 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+1298
+1299 Returns:
+1300 list: A list of decoded QR code data strings.
+1301 """
+1302 import pyzbar.pyzbar as pyzbar
+1303 if target == "camera":
+1304 self.open_camera()
+1305 success, img = self.cap.read()
+1306 else:
+1307 path = "/home/pi/xgoPictures/"
+1308 img = np.array(Image.open(path + target))
+1309
+1310 barcodes = pyzbar.decode(img)
+1311 result = []
+1312 for barcode in barcodes:
+1313 barcodeData = barcode.data.decode("utf-8")
+1314 barcodeType = barcode.type
+1315 result.append(barcodeData)
+1316 text = "{} ({})".format(barcodeData, barcodeType)
+1317 img = self.cv2AddChineseText(img, text, (10, 30), (0, 255, 0), 30)
+1318 try:
+1319 re = result[0]
+1320 except:
+1321 result = []
+1322 b, g, r = cv2.split(img)
+1323 img = cv2.merge((r, g, b))
+1324 imgok = Image.fromarray(img)
+1325 self.display.ShowImage(imgok)
+1326 return result
+1327
+1328 def ColorRecognition(self, target="camera", mode='R'):
+1329 """
+1330 Performs color recognition on an image or video frame, identifies the largest contour of the specified color, and displays the results on the LCD.
+1331
+1332 Parameters:
+1333 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+1334 mode (str, optional): The color to recognize ('R' for red, 'G' for green, 'B' for blue, 'Y' for yellow). Defaults to 'R'.
+1335
+1336 Returns:
+1337 tuple: A tuple containing the center coordinates (x, y) and radius of the largest contour of the specified color.
+1338 """
+1339 color_x = 0
+1340 color_y = 0
+1341 color_radius = 0
+1342
+1343 if mode == 'R': # red
+1344 color_lower = np.array([0, 43, 46])
+1345 color_upper = np.array([10, 255, 255])
+1346 elif mode == 'G': # green
+1347 color_lower = np.array([35, 43, 46])
+1348 color_upper = np.array([77, 255, 255])
+1349 elif mode == 'B': # blue
+1350 color_lower = np.array([100, 43, 46])
+1351 color_upper = np.array([124, 255, 255])
+1352 elif mode == 'Y': # yellow
+1353 color_lower = np.array([26, 43, 46])
+1354 color_upper = np.array([34, 255, 255])
+1355 if target == "camera":
+1356 self.open_camera()
+1357 success, frame = self.cap.read()
+1358 else:
+1359 path = "/home/pi/xgoPictures/"
+1360 frame = np.array(Image.open(path + target))
+1361 frame_ = cv2.GaussianBlur(frame, (5, 5), 0)
+1362 hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+1363 mask = cv2.inRange(hsv, color_lower, color_upper)
+1364 mask = cv2.erode(mask, None, iterations=2)
+1365 mask = cv2.dilate(mask, None, iterations=2)
+1366 mask = cv2.GaussianBlur(mask, (3, 3), 0)
+1367 cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
+1368
+1369 if len(cnts) > 0:
+1370 cnt = max(cnts, key=cv2.contourArea)
+1371 (color_x, color_y), color_radius = cv2.minEnclosingCircle(cnt)
+1372 cv2.circle(frame, (int(color_x), int(color_y)), int(color_radius), (255, 0, 255), 2)
+1373 cv2.putText(frame, "X:%d, Y%d" % (int(color_x), int(color_y)), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
+1374 (255, 255, 0), 3)
+1375
+1376 b, g, r = cv2.split(frame)
+1377 img = cv2.merge((r, g, b))
+1378 imgok = Image.fromarray(img)
+1379 self.display.ShowImage(imgok)
+1380
+1381 return ((color_x, color_y), color_radius)
+1382
+1383 def cap_color_mask(self, position=None, scale=25, h_error=20, s_limit=[90, 255], v_limit=[90, 230]):
+1384 """
+1385 Captures a color mask from the camera feed based on a specified region and displays it on the LCD.
+1386
+1387 Parameters:
+1388 position (list, optional): The top-left corner coordinates (x, y) of the region to sample for color. Defaults to [160, 100].
+1389 scale (int, optional): The width and height of the square region to sample. Defaults to 25.
+1390 h_error (int, optional): The tolerance for hue variation. Defaults to 20.
+1391 s_limit (list, optional): The lower and upper limits for saturation. Defaults to [90, 255].
+1392 v_limit (list, optional): The lower and upper limits for value (brightness). Defaults to [90, 230].
+1393
+1394 Returns:
+1395 list: A list containing two lists, representing the lower and upper bounds of the captured color mask in HSV format.
+1396 """
+1397 if position is None:
+1398 position = [160, 100]
+1399 count = 0
+1400 self.open_camera()
+1401 while True:
+1402 if self.xgoButton("c"):
+1403 break
+1404 success, frame = self.cap.read()
+1405 b, g, r = cv2.split(frame)
+1406 frame_bgr = cv2.merge((r, g, b))
+1407 hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
+1408 h, s, v = cv2.split(hsv)
+1409 color = np.mean(h[position[1]:position[1] + scale, position[0]:position[0] + scale])
+1410 if self.xgoButton("b") and count == 0:
+1411 count += 1
+1412 color = np.mean(h[position[1]:position[1] + scale, position[0]:position[0] + scale])
+1413 color_lower = [max(color - h_error, 0), s_limit[0], v_limit[0]]
+1414 color_upper = [min(color + h_error, 255), s_limit[1], v_limit[1]]
+1415 return [color_lower, color_upper]
+1416
+1417 if count == 0:
+1418 cv2.rectangle(frame, (position[0], position[1]), (position[0] + scale, position[1] + scale),
+1419 (255, 255, 255), 2)
+1420 cv2.putText(frame, 'press button B', (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+1421 b, g, r = cv2.split(frame)
+1422 img = cv2.merge((r, g, b))
+1423 imgok = Image.fromarray(img)
+1424 self.display.ShowImage(imgok)
+1425
+1426 def filter_img(self, frame, color):
+1427 """
+1428 Applies a color mask to an image frame, isolating the specified color.
+1429
+1430 Parameters:
+1431 frame (numpy.ndarray): The input image frame.
+1432 color (list or str): The color to filter. Can be a list of two lists representing the lower and upper bounds of the color mask in HSV format, or a string representing a predefined color ('red', 'green', 'blue', 'yellow').
+1433
+1434 Returns:
+1435 numpy.ndarray: The image frame with the color mask applied.
+1436 """
+1437 b, g, r = cv2.split(frame)
+1438 frame_bgr = cv2.merge((r, g, b))
+1439 hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
+1440 if isinstance(color, list):
+1441 color_lower = np.array(color[0])
+1442 color_upper = np.array(color[1])
+1443 else:
+1444 color_upper, color_lower = get_color_mask(color)
+1445 mask = cv2.inRange(hsv, color_lower, color_upper)
+1446 img_mask = cv2.bitwise_and(frame, frame, mask=mask)
+1447 return img_mask
+1448
+1449 def BallRecognition(self, color_mask, target="camera", p1=36, p2=15, minR=6, maxR=35):
+1450 """
+1451 Detects and tracks a ball of a specific color in an image or video frame using Hough Circle Transform.
+1452
+1453 Parameters:
+1454 color_mask (list): A list of two lists, representing the lower and upper bounds of the ball's color in HSV format.
+1455 target (str, optional): The source of the image data. Can be "camera" for live camera feed or a file path for an image. Defaults to "camera".
+1456 p1 (int, optional): The higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). Defaults to 36.
+1457 p2 (int, optional): The accumulator threshold for the circle centers at the detection stage. Defaults to 15.
+1458 minR (int, optional): The minimum circle radius. Defaults to 6.
+1459 maxR (int, optional): The maximum circle radius. Defaults to 35.
+1460
+1461 Returns:
+1462 tuple: A tuple containing the x-coordinate, y-coordinate, and radius of the detected ball.
+1463 """
+1464 x = y = ra = 0
+1465 if target == "camera":
+1466 self.open_camera()
+1467 success, image = self.cap.read()
+1468 else:
+1469 path = "/home/pi/xgoPictures/"
+1470 image = np.array(Image.open(path + target))
+1471
+1472 frame_mask = self.filter_img(image, color_mask)
+1473
+1474 img = cv2.medianBlur(frame_mask, 5)
+1475 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+1476
+1477 circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1, 20, param1=p1, param2=p2, minRadius=minR, maxRadius=maxR)
+1478 b, g, r = cv2.split(image)
+1479 image = cv2.merge((r, g, b))
+1480 if circles is not None and len(circles[0]) == 1:
+1481 param = circles[0][0]
+1482 x, y, ra = int(param[0]), int(param[1]), int(param[2])
+1483 cv2.circle(image, (x, y), ra, (255, 255, 255), 2)
+1484 cv2.circle(image, (x, y), 2, (255, 255, 255), 2)
+1485 imgok = Image.fromarray(image)
+1486 self.display.ShowImage(imgok)
+1487 return x, y, ra
+1488
+1489class DemoError(Exception):
+1490 """
+1491 A custom exception class for errors related to the Baidu AI API.
+1492 """
+1493 pass
+1494
+1495class hands():
+1496 """
+1497 A class for hand detection and landmark estimation using MediaPipe.
+1498 """
+1499 def __init__(self, model_complexity, max_num_hands, min_detection_confidence, min_tracking_confidence):
+1500 """
+1501 Initializes the hands object.
+1502
+1503 Parameters:
+1504 model_complexity (int): Complexity of the hand landmark model: 0 or 1.
+1505 max_num_hands (int): Maximum number of hands to detect.
+1506 min_detection_confidence (float): Minimum confidence value ([0.0, 1.0]) for hand detection to be considered successful.
+1507 min_tracking_confidence (float): Minimum confidence value ([0.0, 1.0]) for the hand landmarks to be considered tracked successfully.
+1508 """
+1509 import mediapipe as mp
+1510 self.model_complexity = model_complexity
+1511 self.max_num_hands = max_num_hands
+1512 self.min_detection_confidence = min_detection_confidence
+1513 self.min_tracking_confidence = min_tracking_confidence
+1514 self.mp_hands = mp.solutions.hands
+1515 self.hands = self.mp_hands.Hands(
+1516 max_num_hands=self.max_num_hands,
+1517 min_detection_confidence=self.min_detection_confidence,
+1518 min_tracking_confidence=self.min_tracking_confidence,
+1519 )
+1520
+1521 def run(self, cv_img):
+1522 """
+1523 Processes an image and returns hand landmarks and other related information.
+1524
+1525 Parameters:
+1526 cv_img (numpy.ndarray): The input image.
+1527
+1528 Returns:
+1529 list: A list of dictionaries, where each dictionary contains information about a detected hand, including center coordinates, bounding rectangle, landmark coordinates, hand angles, and right/left classification.
+1530 """
+1531 import copy
+1532 image = cv_img
+1533 debug_image = copy.deepcopy(image)
+1534 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+1535 results = self.hands.process(image)
+1536 hf = []
+1537 if results.multi_hand_landmarks is not None:
+1538 for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
+1539 results.multi_handedness):
+1540 # Calculate the center of the palm
+1541 cx, cy = self.calc_palm_moment(debug_image, hand_landmarks)
+1542 # Calculate the bounding rectangle of the hand
+1543 rect = self.calc_bounding_rect(debug_image, hand_landmarks)
+1544 # Get individual landmarks
+1545 dlandmark = self.dlandmarks(debug_image, hand_landmarks, handedness)
+1546
+1547 hf.append({'center': (cx, cy), 'rect': rect, 'dlandmark': dlandmark[0],
+1548 'hand_angle': self.hand_angle(dlandmark[0]), 'right_left': dlandmark[1]})
+1549 return hf
+1550
+1551 def calc_palm_moment(self, image, landmarks):
+1552 """
+1553 Calculates the moment (center) of the palm.
+1554
+1555 Parameters:
+1556 image (numpy.ndarray): The input image.
+1557 landmarks (mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList): Hand landmarks.
+1558
+1559 Returns:
+1560 tuple: The (x, y) coordinates of the palm's center.
+1561 """
+1562 image_width, image_height = image.shape[1], image.shape[0]
+1563 palm_array = np.empty((0, 2), int)
+1564 for index, landmark in enumerate(landmarks.landmark):
+1565 landmark_x = min(int(landmark.x * image_width), image_width - 1)
+1566 landmark_y = min(int(landmark.y * image_height), image_height - 1)
+1567 landmark_point = [np.array((landmark_x, landmark_y))]
+1568 if index == 0: # Wrist 1
+1569 palm_array = np.append(palm_array, landmark_point, axis=0)
+1570 if index == 1: # Wrist 2
+1571 palm_array = np.append(palm_array, landmark_point, axis=0)
+1572 if index == 5: # Index finger: base
+1573 palm_array = np.append(palm_array, landmark_point, axis=0)
+1574 if index == 9: # Middle finger: base
+1575 palm_array = np.append(palm_array, landmark_point, axis=0)
+1576 if index == 13: # Ring finger: base
+1577 palm_array = np.append(palm_array, landmark_point, axis=0)
+1578 if index == 17: # Pinky finger: base
+1579 palm_array = np.append(palm_array, landmark_point, axis=0)
+1580 M = cv2.moments(palm_array)
+1581 cx, cy = 0, 0
+1582 if M['m00'] != 0:
+1583 cx = int(M['m10'] / M['m00'])
+1584 cy = int(M['m01'] / M['m00'])
+1585 return cx, cy
+1586
+1587 def calc_bounding_rect(self, image, landmarks):
+1588 """
+1589 Calculates the bounding rectangle of the hand.
+1590
+1591 Parameters:
+1592 image (numpy.ndarray): The input image.
+1593 landmarks (mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList): Hand landmarks.
+1594
+1595 Returns:
+1596 list: A list [x, y, w, h] representing the bounding rectangle.
+1597 """
+1598 image_width, image_height = image.shape[1], image.shape[0]
+1599 landmark_array = np.empty((0, 2), int)
+1600 for _, landmark in enumerate(landmarks.landmark):
+1601 landmark_x = min(int(landmark.x * image_width), image_width - 1)
+1602 landmark_y = min(int(landmark.y * image_height), image_height - 1)
+1603 landmark_point = [np.array((landmark_x, landmark_y))]
+1604 landmark_array = np.append(landmark_array, landmark_point, axis=0)
+1605 x, y, w, h = cv2.boundingRect(landmark_array)
+1606 return [x, y, w, h]
+1607
+1608 def dlandmarks(self, image, landmarks, handedness):
+1609 """
+1610 Extracts and returns the coordinates of hand landmarks.
+1611
+1612 Parameters:
+1613 image (numpy.ndarray): The input image.
+1614 landmarks (mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList): Hand landmarks.
+1615 handedness (mediapipe.framework.formats.classification_pb2.ClassificationList): Handedness information.
+1616
+1617 Returns:
+1618 tuple: A tuple containing a list of landmark coordinates and the handedness label ('Right' or 'Left').
+1619 """
+1620 image_width, image_height = image.shape[1], image.shape[0]
+1621 landmark_point = []
+1622 for index, landmark in enumerate(landmarks.landmark):
+1623 if landmark.visibility < 0 or landmark.presence < 0:
+1624 continue
+1625 landmark_x = min(int(landmark.x * image_width), image_width - 1)
+1626 landmark_y = min(int(landmark.y * image_height), image_height - 1)
+1627 landmark_point.append((landmark_x, landmark_y))
+1628 return landmark_point, handedness.classification[0].label[0]
+1629
+1630 def vector_2d_angle(self, v1, v2):
+1631 """
+1632 Calculates the angle between two 2D vectors.
+1633
+1634 Parameters:
+1635 v1 (tuple): The first vector (x, y).
+1636 v2 (tuple): The second vector (x, y).
+1637
+1638 Returns:
+1639 float: The angle between the two vectors in degrees.
+1640 """
+1641 v1_x = v1[0]
+1642 v1_y = v1[1]
+1643 v2_x = v2[0]
+1644 v2_y = v2[1]
+1645 try:
+1646 angle_ = math.degrees(math.acos((v1_x * v2_x + v1_y * v2_y) / (((v1_x ** 2 + v1_y ** 2) ** 0.5) * ((v2_x ** 2 + v2_y ** 2) ** 0.5))))
+1647 except:
+1648 angle_ = 180
+1649 return angle_
+1650
+1651 def hand_angle(self, hand_):
+1652 """
+1653 Calculates the angles of the fingers.
+1654
+1655 Parameters:
+1656 hand_ (list): A list of hand landmark coordinates.
+1657
+1658 Returns:
+1659 list: A list of finger angles (thumb, index, middle, ring, pinky).
+1660 """
+1661 angle_list = []
+1662 # thumb Thumb angle
+1663 angle_ = self.vector_2d_angle(
+1664 ((int(hand_[0][0]) - int(hand_[2][0])), (int(hand_[0][1]) - int(hand_[2][1]))),
+1665 ((int(hand_[3][0]) - int(hand_[4][0])), (int(hand_[3][1]) - int(hand_[4][1])))
+1666 )
+1667 angle_list.append(angle_)
+1668 # index Index finger angle
+1669 angle_ = self.vector_2d_angle(
+1670 ((int(hand_[0][0]) - int(hand_[6][0])), (int(hand_[0][1]) - int(hand_[6][1]))),
+1671 ((int(hand_[7][0]) - int(hand_[8][0])), (int(hand_[7][1]) - int(hand_[8][1])))
+1672 )
+1673 angle_list.append(angle_)
+1674 # middle Middle finger angle
+1675 angle_ = self.vector_2d_angle(
+1676 ((int(hand_[0][0]) - int(hand_[10][0])), (int(hand_[0][1]) - int(hand_[10][1]))),
+1677 ((int(hand_[11][0]) - int(hand_[12][0])), (int(hand_[11][1]) - int(hand_[12][1])))
+1678 )
+1679 angle_list.append(angle_)
+1680 # ring Ring finger angle
+1681 angle_ = self.vector_2d_angle(
+1682 ((int(hand_[0][0]) - int(hand_[14][0])), (int(hand_[0][1]) - int(hand_[14][1]))),
+1683 ((int(hand_[15][0]) - int(hand_[16][0])), (int(hand_[15][1]) - int(hand_[16][1])))
+1684 )
+1685 angle_list.append(angle_)
+1686 # pink Pinky finger angle
+1687 angle_ = self.vector_2d_angle(
+1688 ((int(hand_[0][0]) - int(hand_[18][0])), (int(hand_[0][1]) - int(hand_[18][1]))),
+1689 ((int(hand_[19][0]) - int(hand_[20][0])), (int(hand_[19][1]) - int(hand_[20][1])))
+1690 )
+1691 angle_list.append(angle_)
+1692 return angle_list
+1693
+1694class yoloXgo():
+1695 """
+1696 A class for object detection using YOLO (You Only Look Once) models with ONNX Runtime.
+1697 """
+1698 def __init__(self, model, classes, inputwh, thresh):
+1699 """
+1700 Initializes the yoloXgo object.
+1701
+1702 Parameters:
+1703 model (str): The path to the ONNX model file.
+1704 classes (list): A list of class names.
+1705 inputwh (list): A list [width, height] representing the input size of the model.
+1706 thresh (float): The confidence threshold for object detection.
+1707 """
+1708 import onnxruntime
+1709 self.session = onnxruntime.InferenceSession(model)
+1710 self.input_width = inputwh[0]
+1711 self.input_height = inputwh[1]
+1712 self.thresh = thresh
+1713 self.classes = classes
+1714
+1715 def sigmoid(self, x):
+1716 """
+1717 Computes the sigmoid function.
+1718
+1719 Parameters:
+1720 x (float or numpy.ndarray): The input value.
+1721
+1722 Returns:
+1723 float or numpy.ndarray: The sigmoid of the input.
+1724 """
+1725 return 1. / (1 + np.exp(-x))
+1726
+1727 # tanh function
+1728 def tanh(self, x):
+1729 """
+1730 Computes the hyperbolic tangent function.
+1731
+1732 Parameters:
+1733 x (float or numpy.ndarray): The input value.
+1734
+1735 Returns:
+1736 float or numpy.ndarray: The hyperbolic tangent of the input.
+1737 """
+1738 return 2. / (1 + np.exp(-2 * x)) - 1
+1739
+1740 # Data preprocessing
+1741 def preprocess(self, src_img, size):
+1742 """
+1743 Preprocesses the input image for the YOLO model.
+1744
+1745 Parameters:
+1746 src_img (numpy.ndarray): The input image.
+1747 size (list): A list [width, height] representing the target size.
+1748
+1749 Returns:
+1750 numpy.ndarray: The preprocessed image data.
+1751 """
+1752 output = cv2.resize(src_img, (size[0], size[1]), interpolation=cv2.INTER_AREA)
+1753 output = output.transpose(2, 0, 1)
+1754 output = output.reshape((1, 3, size[1], size[0])) / 255
+1755 return output.astype('float32')
+1756
+1757 # nms algorithm
+1758
+1759 def nms(self, dets, thresh=0.45):
+1760 """
+1761 Performs Non-Maximum Suppression (NMS) to filter out overlapping bounding boxes.
+1762
+1763 Parameters:
+1764 dets (numpy.ndarray): An array of bounding boxes with shape (N, 6), where N is the number of boxes, and each box is represented as [x1, y1, x2, y2, score, class_index].
+1765 thresh (float, optional): The IoU (Intersection over Union) threshold for suppression. Defaults to 0.45.
+1766
+1767 Returns:
+1768 list: A list of filtered bounding boxes, each represented as [x1, y1, x2, y2, score, class_index].
+1769 """
+1770 # dets:N*M,N is the number of bbox, the first 4 digits of M are the corresponding (x1, y1, x2, y2), and the 5th digit is the corresponding score
+1771 # #thresh:0.3,0.5....
+1772 x1 = dets[:, 0]
+1773 y1 = dets[:, 1]
+1774 x2 = dets[:, 2]
+1775 y2 = dets[:, 3]
+1776 scores = dets[:, 4]
+1777 areas = (x2 - x1 + 1) * (y2 - y1 + 1) # Calculate the area of each bbox
+1778 order = scores.argsort()[::-1] # Sort scores in descending order
+1779 keep = [] # Used to store the bboxx subscripts that are finally retained
+1780
+1781 while order.size > 0:
+1782 i = order[0] # Unconditionally keep the bbox with the highest confidence in each iteration
+1783 keep.append(i)
+1784
+1785 # Calculate the intersection area between the bbox with the highest confidence and the other remaining bboxes
+1786 xx1 = np.maximum(x1[i], x1[order[1:]])
+1787 yy1 = np.maximum(y1[i], y1[order[1:]])
+1788 xx2 = np.minimum(x2[i], x2[order[1:]])
+1789 yy2 = np.minimum(y2[i], y2[order[1:]])
+1790
+1791 # Calculate the area of the intersection area between the high-confidence bbox and the other remaining bboxes
+1792 w = np.maximum(0.0, xx2 - xx1 + 1)
+1793 h = np.maximum(0.0, yy2 - yy1 + 1)
+1794 inter = w * h
+1795
+1796 # Calculate the ratio of the area of the intersection area to the area of both (the bbox with high confidence and other bboxes)
+1797 ovr = inter / (areas[i] + areas[order[1:]] - inter)
+1798
+1799 # Keep the bbox whose ovr is less than thresh and enter the next iteration.
+1800 inds = np.where(ovr <= thresh)[0]
+1801
+1802 # Because the index in ovr does not include order[0], it needs to be moved one bit backward
+1803 order = order[inds + 1]
+1804
+1805 output = []
+1806 for i in keep:
+1807 output.append(dets[i].tolist())
+1808
+1809 return output
+1810
+1811 def run(self, img, ):
+1812 """
+1813 Runs object detection on an image using the YOLO model.
+1814
+1815 Parameters:
+1816 img (numpy.ndarray): The input image.
+1817
+1818 Returns:
+1819 list or bool: A list of dictionaries, where each dictionary represents a detected object and contains the class name, confidence score, and bounding box coordinates. Returns False if no objects are detected.
+1820 """
+1821 pred = []
+1822
+1823 # Original width and height of the input image
+1824 H, W, _ = img.shape
+1825
+1826 # Data preprocessing: resize, 1/255
+1827 data = self.preprocess(img, [self.input_width, self.input_height])
+1828
+1829 # Model inference
+1830 input_name = self.session.get_inputs()[0].name
+1831 feature_map = self.session.run([], {input_name: data})[0][0]
+1832
+1833 # Output feature map transpose: CHW, HWC
+1834 feature_map = feature_map.transpose(1, 2, 0)
+1835 # The width and height of the output feature map
+1836 feature_map_height = feature_map.shape[0]
+1837 feature_map_width = feature_map.shape[1]
+1838
+1839 # Feature map post-processing
+1840 for h in range(feature_map_height):
+1841 for w in range(feature_map_width):
+1842 data = feature_map[h][w]
+1843
+1844 # Resolve detection frame confidence
+1845 obj_score, cls_score = data[0], data[5:].max()
+1846 score = (obj_score ** 0.6) * (cls_score ** 0.4)
+1847
+1848 # Threshold screening
+1849 if score > self.thresh:
+1850 # Detection frame category
+1851 cls_index = np.argmax(data[5:])
+1852 # Detection frame center point offset
+1853 x_offset, y_offset = self.tanh(data[1]), self.tanh(data[2])
+1854 # Normalized width and height of the detection frame
+1855 box_width, box_height = self.sigmoid(data[3]), self.sigmoid(data[4])
+1856 # The center point after normalization of the detection frame
+1857 box_cx = (w + x_offset) / feature_map_width
+1858 box_cy = (h + y_offset) / feature_map_height
+1859
+1860 # cx,cy,w,h => x1, y1, x2, y2
+1861 x1, y1 = box_cx - 0.5 * box_width, box_cy - 0.5 * box_height
+1862 x2, y2 = box_cx + 0.5 * box_width, box_cy + 0.5 * box_height
+1863 x1, y1, x2, y2 = int(x1 * W), int(y1 * H), int(x2 * W), int(y2 * H)
+1864
+1865 pred.append([x1, y1, x2, y2, score, cls_index])
+1866 datas = np.array(pred)
+1867 data = []
+1868 if len(datas) > 0:
+1869 boxes = self.nms(datas)
+1870 for b in boxes:
+1871 obj_score, cls_index = b[4], int(b[5])
+1872 x1, y1, x2, y2 = int(b[0]), int(b[1]), int(b[2]), int(b[3])
+1873 s = {'classes': self.classes[cls_index], 'score': '%.2f' % obj_score, 'xywh': [x1, y1, x2 - x1, y2 - y1], }
+1874 data.append(s)
+1875 return data
+1876 else:
+1877 return False
+1878
+1879class face_detection():
+1880 """
+1881 A class for face detection using MediaPipe.
+1882 """
+1883 def __init__(self, min_detection_confidence):
+1884 """
+1885 Initializes the face_detection object.
+1886
+1887 Parameters:
+1888 min_detection_confidence (float): Minimum confidence value ([0.0, 1.0]) for face detection to be considered successful.
+1889 """
+1890 import mediapipe as mp
+1891 self.model_selection = 0
+1892 self.min_detection_confidence = min_detection_confidence
+1893 self.mp_face_detection = mp.solutions.face_detection
+1894 self.face_detection = self.mp_face_detection.FaceDetection(
+1895 min_detection_confidence=self.min_detection_confidence,
+1896 )
+1897
+1898 def run(self, cv_img):
+1899 """
+1900 Performs face detection on an image.
+1901
+1902 Parameters:
+1903 cv_img (numpy.ndarray): The input image.
+1904
+1905 Returns:
+1906 list: A list of dictionaries, where each dictionary contains information about a detected face, including bounding box coordinates and landmark coordinates.
+1907 """
+1908 image = cv_img
+1909 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+1910 results = self.face_detection.process(cv_img)
+1911 face = []
+1912 if results.detections is not None:
+1913 for detection in results.detections:
+1914 data = self.draw_detection(image, detection)
+1915 face.append(data)
+1916 return face
+1917
+1918 def draw_detection(self, image, detection):
+1919 """
+1920 Extracts face detection information and returns it as a dictionary.
+1921
+1922 Parameters:
+1923 image (numpy.ndarray): The input image.
+1924 detection (mediapipe.framework.formats.detection_pb2.Detection): Face detection result.
+1925
+1926 Returns:
+1927 dict: A dictionary containing face detection information, including label ID, confidence score, bounding box coordinates, and landmark coordinates.
+1928 """
+1929 image_width, image_height = image.shape[1], image.shape[0]
+1930 bbox = detection.location_data.relative_bounding_box
+1931 bbox.xmin = int(bbox.xmin * image_width)
+1932 bbox.ymin = int(bbox.ymin * image_height)
+1933 bbox.width = int(bbox.width * image_width)
+1934 bbox.height = int(bbox.height * image_height)
+1935
+1936 # Position: right eye
+1937 keypoint0 = detection.location_data.relative_keypoints[0]
+1938 keypoint0.x = int(keypoint0.x * image_width)
+1939 keypoint0.y = int(keypoint0.y * image_height)
+1940
+1941 # Position: left eye
+1942 keypoint1 = detection.location_data.relative_keypoints[1]
+1943 keypoint1.x = int(keypoint1.x * image_width)
+1944 keypoint1.y = int(keypoint1.y * image_height)
+1945
+1946 # Position: nose
+1947 keypoint2 = detection.location_data.relative_keypoints[2]
+1948 keypoint2.x = int(keypoint2.x * image_width)
+1949 keypoint2.y = int(keypoint2.y * image_height)
+1950
+1951 # Position: mouth
+1952 keypoint3 = detection.location_data.relative_keypoints[3]
+1953 keypoint3.x = int(keypoint3.x * image_width)
+1954 keypoint3.y = int(keypoint3.y * image_height)
+1955
+1956 # Position: right ear
+1957 keypoint4 = detection.location_data.relative_keypoints[4]
+1958 keypoint4.x = int(keypoint4.x * image_width)
+1959 keypoint4.y = int(keypoint4.y * image_height)
+1960
+1961 # Position: left ear
+1962 keypoint5 = detection.location_data.relative_keypoints[5]
+1963 keypoint5.x = int(keypoint5.x * image_width)
+1964 keypoint5.y = int(keypoint5.y * image_height)
+1965
+1966 data = {'id': detection.label_id[0],
+1967 'score': round(detection.score[0], 3),
+1968 'rect': [int(bbox.xmin), int(bbox.ymin), int(bbox.width), int(bbox.height)],
+1969 'right_eye': (int(keypoint0.x), int(keypoint0.y)),
+1970 'left_eye': (int(keypoint1.x), int(keypoint1.y)),
+1971 'nose': (int(keypoint2.x), int(keypoint2.y)),
+1972 'mouth': (int(keypoint3.x), int(keypoint3.y)),
+1973 'right_ear': (int(keypoint4.x), int(keypoint4.y)),
+1974 'left_ear': (int(keypoint5.x), int(keypoint5.y)),
+1975 }
+1976 return data
+
+
+
+