• ROS2 + 科大讯飞 初步实现机器人语音控制


    环境配置:

            电脑端: ubuntu22.04实体机作为上位机

            ROS版本:ros2-humble

            实体机器人: STM32 + 思岚A1激光雷达

            科大讯飞语音SDK 讯飞开放平台-以语音交互为核心的人工智能开放平台

    实现步骤:

            1. 下载和处理科大讯飞语音模块

            (1)进入官网的控制台

           

     (2)在左侧导航栏中选择 语音识别-> 语音听写

            (3)下载语音模块

     

     2.科大讯飞SDK的处理

    新建一个工作空间,里面新建两个文件夹 src   voice_ros2

    将SDK压缩包解压后的文件,放入voice_ros2中,进入sample目录的iat_online_record_sample目录下,运行下面的命令

    source 64bit_make.sh
    

    在bin目录下执行对应的可执行文件了 

    ./iat_online_record_sample
    

     

     如果遇到下列问题:error while loading shared libraries: libmsc.so: cannot open shared object file: No such file or directory


    就把在终端中进入下列目录中

     执行命令:

    1. sudo cp libmsc.so /usr/local/lib
    2. sudo ldconfig
    3.上位机实现

     

    src 文件夹中放的是 两个功能包,base 中是stm32的ROS2驱动包,teleop_twist_keyboard是github上下载的键盘控制节点功能包,地址如下:

    GitHub - ros2/teleop_twist_keyboard at ardent

    这个目录下的文件是SDK解压后的文件,其中 红框中的voice.py是也单独编写的文件

    1. import subprocess
    2. import multiprocessing
    3. import time
    4. def run_iat_online_record_sample(queue):
    5. process = subprocess.Popen(["./bin/iat_online_record_sample"],
    6. stdout=subprocess.PIPE,
    7. stdin=subprocess.PIPE,
    8. stderr=subprocess.PIPE,
    9. )
    10. # Communicate with the process
    11. stdout, _ = process.communicate(input=b"0\n1\n")
    12. # Put the result into the queue
    13. queue.put(stdout.decode('utf-8'))
    14. def main():
    15. while True:
    16. # Create a queue for communication between processes
    17. queue = multiprocessing.Queue()
    18. # Start the process
    19. process = multiprocessing.Process(target=run_iat_online_record_sample, args=(queue,))
    20. process.start()
    21. # Wait for the process to finish and get the result from the queue
    22. process.join()
    23. result = queue.get()
    24. # Print the result
    25. print("Result:", result)
    26. # Save the result to a text file, clearing the file first
    27. with open("result.txt", "w") as f:
    28. f.write(result)
    29. # Ask user whether to continue recognition
    30. continue_recognition = input("是否继续识别? (0: 结束, 1: 继续): ")
    31. if continue_recognition == "0":
    32. break
    33. if __name__ == "__main__":
    34. main()

    这个文件运行后会在当前目录生成一个result.txt文件,如下图,这个文件的内容每次识别之后都会更新,键盘节点就是通过获取这个文件的数据来通过语音控制机器人移动的

    4.修改teleop_twist_keyboard.py文件

    在键盘控制的代码前添加读取文件数据的代码


    这里将刚刚识别到的语音过滤后存储在voice_command[0]中,以供后续使用,下面会通过判断voice_command[0]中的值来进行不同的操作

    1. import sys
    2. import threading
    3. import time
    4. import os
    5. from std_msgs.msg import String
    6. import geometry_msgs.msg
    7. import rclpy
    8. if sys.platform == 'win32':
    9. import msvcrt
    10. else:
    11. import termios
    12. import tty
    13. msg = """
    14. This node takes keypresses from the keyboard and publishes them
    15. as Twist/TwistStamped messages. It works best with a US keyboard layout.
    16. ---------------------------
    17. Moving around:
    18. u i o
    19. j k l
    20. m , .
    21. For Holonomic mode (strafing), hold down the shift key:
    22. ---------------------------
    23. U I O
    24. J K L
    25. M < >
    26. t : up (+z)
    27. b : down (-z)
    28. anything else : stop
    29. q/z : increase/decrease max speeds by 10%
    30. w/x : increase/decrease only linear speed by 10%
    31. e/c : increase/decrease only angular speed by 10%
    32. CTRL-C to quit
    33. """
    34. moveBindings = {
    35. 'i': (1, 0, 0, 0),
    36. 'o': (1, 0, 0, -1),
    37. 'j': (0, 0, 0, 1),
    38. 'l': (0, 0, 0, -1),
    39. 'u': (1, 0, 0, 1),
    40. ',': (-1, 0, 0, 0),
    41. '.': (-1, 0, 0, 1),
    42. 'm': (-1, 0, 0, -1),
    43. 'O': (1, -1, 0, 0),
    44. 'I': (1, 0, 0, 0),
    45. 'J': (0, 1, 0, 0),
    46. 'L': (0, -1, 0, 0),
    47. 'U': (1, 1, 0, 0),
    48. '<': (-1, 0, 0, 0),
    49. '>': (-1, -1, 0, 0),
    50. 'M': (-1, 1, 0, 0),
    51. 't': (0, 0, 1, 0),
    52. 'b': (0, 0, -1, 0),
    53. }
    54. speedBindings = {
    55. 'q': (1.1, 1.1),
    56. 'z': (.9, .9),
    57. 'w': (1.1, 1),
    58. 'x': (.9, 1),
    59. 'e': (1, 1.1),
    60. 'c': (1, .9),
    61. }
    62. def getKey(settings):
    63. if sys.platform == 'win32':
    64. # getwch() returns a string on Windows
    65. key = msvcrt.getwch()
    66. else:
    67. tty.setraw(sys.stdin.fileno())
    68. # sys.stdin.read() returns a string on Linux
    69. key = sys.stdin.read(1)
    70. termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings)
    71. return key
    72. def saveTerminalSettings():
    73. if sys.platform == 'win32':
    74. return None
    75. return termios.tcgetattr(sys.stdin)
    76. def restoreTerminalSettings(old_settings):
    77. if sys.platform == 'win32':
    78. return
    79. termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
    80. def vels(speed, turn):
    81. return 'currently:\tspeed %s\tturn %s ' % (speed, turn)
    82. def main():
    83. settings = saveTerminalSettings()
    84. rclpy.init()
    85. node = rclpy.create_node('teleop_twist_keyboard')
    86. # parameters
    87. stamped = node.declare_parameter('stamped', False).value
    88. frame_id = node.declare_parameter('frame_id', '').value
    89. if not stamped and frame_id:
    90. raise Exception("'frame_id' can only be set when 'stamped' is True")
    91. if stamped:
    92. TwistMsg = geometry_msgs.msg.TwistStamped
    93. else:
    94. TwistMsg = geometry_msgs.msg.Twist
    95. pub = node.create_publisher(TwistMsg, 'cmd_vel', 10)
    96. voice_command = [None] # Initializing as a list
    97. spinner = threading.Thread(target=rclpy.spin, args=(node,))
    98. spinner.start()
    99. speed = 0.5
    100. turn = 1.0
    101. x = 0.0
    102. y = 0.0
    103. z = 0.0
    104. th = 0.0
    105. status = 0.0
    106. twist_msg = TwistMsg()
    107. if stamped:
    108. twist = twist_msg.twist
    109. twist_msg.header.stamp = node.get_clock().now().to_msg()
    110. twist_msg.header.frame_id = frame_id
    111. else:
    112. twist = twist_msg
    113. try:
    114. print(msg)
    115. print(vels(speed, turn))
    116. while True:
    117. print("当前工作路径:", os.getcwd())
    118. with open('./voice_ros2/result.txt', 'r') as f:
    119. # with open('/home/lsg/xufen3_ws/voice_ros2/result.txt', 'r') as f:
    120. for line in f:
    121. if line.startswith('Result: ['):
    122. start = line.find('[')
    123. end = line.find(']')
    124. if start != -1 and end != -1:
    125. voice_command[0] = line[start + 1:end].strip()
    126. print("voice_command", voice_command[0])
    127. # Clearing the content of result.txt
    128. open('./voice_ros2/result.txt', 'w').close()
    129. # open('/home/lsg/xufen3_ws/voice_ros2/result.txt', 'w').close()
    130. break
    131. key = getKey(settings)
    132. # print("键盘控制按键输出", key)
    133. if key in moveBindings.keys():
    134. x = moveBindings[key][0]
    135. y = moveBindings[key][1]
    136. z = moveBindings[key][2]
    137. th = moveBindings[key][3]
    138. elif key in speedBindings.keys():
    139. speed = speed * speedBindings[key][0]
    140. turn = turn * speedBindings[key][1]
    141. print(vels(speed, turn))
    142. if (status == 14):
    143. print(msg)
    144. status = (status + 1) % 15
    145. elif voice_command[0] is not None:
    146. if voice_command[0] == "小车后退":
    147. print("语音控制小车前进", voice_command[0])
    148. x = moveBindings['i'][0]
    149. y = moveBindings['i'][1]
    150. z = moveBindings['i'][2]
    151. th = moveBindings['i'][3]
    152. elif voice_command[0] == "小车前进":
    153. print("语音控制小车后退", voice_command[0])
    154. x = moveBindings[','][0]
    155. y = moveBindings[','][1]
    156. z = moveBindings[','][2]
    157. th = moveBindings[','][3]
    158. elif voice_command[0] == "小车左转":
    159. print("语音控制小车左转", voice_command[0])
    160. x = moveBindings['j'][0]
    161. y = moveBindings['j'][1]
    162. z = moveBindings['j'][2]
    163. th = moveBindings['j'][3]
    164. elif voice_command[0] == "小车右转":
    165. print("语音控制小车右转", voice_command[0])
    166. x = moveBindings['l'][0]
    167. y = moveBindings['l'][1]
    168. z = moveBindings['l'][2]
    169. th = moveBindings['l'][3]
    170. elif voice_command[0] == "小车停":
    171. print("语音控制小车停", voice_command[0])
    172. x = moveBindings['k'][0]
    173. y = moveBindings['k'][1]
    174. z = moveBindings['k'][2]
    175. th = moveBindings['k'][3]
    176. voice_command[0] = None
    177. else:
    178. x = 0.0
    179. y = 0.0
    180. z = 0.0
    181. th = 0.0
    182. if (key == '\x03'):
    183. break
    184. if stamped:
    185. twist_msg.header.stamp = node.get_clock().now().to_msg()
    186. twist.linear.x = x * speed
    187. twist.linear.y = y * speed
    188. twist.linear.z = z * speed
    189. twist.angular.x = 0.0
    190. twist.angular.y = 0.0
    191. twist.angular.z = th * turn
    192. pub.publish(twist_msg)
    193. # Print timestamp every second
    194. time.sleep(1)
    195. print("时间戳:", time.time())
    196. except Exception as e:
    197. print(e)
    198. finally:
    199. if stamped:
    200. twist_msg.header.stamp = node.get_clock().now().to_msg()
    201. twist.linear.x = 0.0
    202. twist.linear.y = 0.0
    203. twist.linear.z = 0.0
    204. twist.angular.x = 0.0
    205. twist.angular.y = 0.0
    206. twist.angular.z = 0.0
    207. pub.publish(twist_msg)
    208. rclpy.shutdown()
    209. spinner.join()
    210. restoreTerminalSettings(settings)
    211. if __name__ == '__main__':
    212. main()
    5. 编译运行

    1. // xufen3_ws工作空间下
    2. // 终端1:
    3. colcon build
    4. . install/setup.bash
    5. ros2 launch ros2_stm32_bridge driver.launch.py
    6. // 终端2:
    7. . install/setup.bash
    8. ros2 run teleop_twist_keyboard teleop_twist_keyboard
    9. // 终端3 ~/xufen3_ws/voice_ros2$ 目录下 :
    10. python3 voice.py

    然后就可以通过语音控制小车
    在右侧终端按1进行语音识别,此时将识别到小车前进的命令并打印,在左侧终端按回车健获取result中的命令,将输出voice_command 小车前进,此时再按键ctrl+c,将输出语音控制小车前进 小车前进并且小车开始移动。
    目前的代码需要按键才能加载进来语音的命令并控制小车移动,但好在实现了功能,后续还会继续优化。

     

    终端3中,输入数字1    然后 语音输入指令 “小车前进” 或“  小车后退”  或 “小车左转” 或“”小车右转” 

    等到终端3中,打印了语音指令后,鼠标移动到终端2,按下回车键即可小车移动。

    需要按键控制,感觉发出语音指令后,要等好几秒才能移动小车,还需要按键,不过还是初步实现了语音控制,后期优化,实现更实用的语音控制

  • 相关阅读:
    2019 ICPC香港站 G. Game Design
    Java核心编程(19)
    5+甲基化+预后模型搭配实验
    Python学习基础笔记六十七——格式化字符串
    Win11校园网无法连接怎么办?Win11连接不到校园网的解决方法
    基于知识蒸馏的两阶段去雨去雪去雾模型学习记录(三)之知识测试阶段与评估模块
    QML之定位器(Column,Row,Flow,Grid)
    电流探头的干扰源电流谱测试
    python中json的使用
    [红蓝攻防]MDOG(全新UI重制版)为Xss跨站而生,数据共享,表单劫持,URL重定向
  • 原文地址:https://blog.csdn.net/LSG_Down/article/details/140374897