起因
- 由于前段时间一个小项目需要实现关键词识别,所以打算参考maixpy的官方文档实现isolated_word的本地存储和加载
方法
- 主要是使用官方提供的方法。
- 保存是将
get( )
返回的数据帧长和数据帧保存到本地,为了方便这里我这里直接将帧长数据保存到txt文件,数据帧保存到二进制文件。
- 读取的话只要调用
set( )
加载【词汇模板】就可以了。
但是需要注意的是,get( )
返回的数据帧的长度统一都是 5256 (尽管有用的数据帧只是前面一小部分,后面自动补零),因此set( )
也强制要求输入的数据帧的长度为 5256 ,所以我们从文件中读取的数据帧的长度也应该是 5256 ,如果长度不对则会报错。
踩坑
- 推荐大家调试的时候使用串口调试而不是IDE,部分数据在IDE下看不到
代码
保存并读取
import time
from Maix import GPIO, I2S
from fpioa_manager import fm
import os, Maix, lcd, image
sample_rate = 16000
record_time = 2
img = image.Image(size=(320, 240))
fm.register(20,fm.fpioa.I2S0_IN_D0, force=True)
fm.register(30,fm.fpioa.I2S0_WS, force=True) # 19 on Go Board and Bit(new version)
fm.register(32,fm.fpioa.I2S0_SCLK, force=True) # 18 bit
rx = I2S(I2S.DEVICE_0)
rx.channel_config(rx.CHANNEL_0, rx.RECEIVER, align_mode=I2S.STANDARD_MODE)
rx.set_sample_rate(sample_rate)
print('I2S = ', rx)
lcd.init()
from speech_recognizer import isolated_word
# model
sr = isolated_word(dmac=2, i2s=I2S.DEVICE_0, size=10)
print('Total vocabulary templates = ', sr.size())
print('isolated_word module = ', sr)
## threshold
sr.set_threshold(0, 0, 10000)
# list to save datas
frm_len = []
frm_data = []
# Configuration complete
#-------------------------------------------------------------------------
def isolated_save(frm_len, frm_data):
print('path = ', os.getcwd())
with open('/sd/Data_frame_length.txt', 'w') as f:
for i in range(0, len(frm_len)):
f.write(str(frm_len[i]))
f.write('\n')
print('\n')
print('frm_len = ', frm_len[i])
with open('/sd/Data_frame', 'wb') as f:
for i in range(0, len(frm_len)):
f.write(frm_data[i])
print('\n save success')
def isolated_read():
global data
print('\n')
print('isolated_read')
with open('Data_frame_length.txt', 'r') as f:
datas = f.readlines()
len_datas = len(datas)
for i in range(0,len_datas):
Location = i*5256
read_len = 5256
frm_len = int(datas[i])
with open('Data_frame', 'rb') as f:
f.seek(Location)
frm_data = f.read(read_len)
data = (frm_len,frm_data)
sr.set((2*i+1), data)
print('read success')
# function set complete
#-------------------------------------------------------------------------
## record and get & set
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(0):
data = sr.get(0)
print('data one =', data)
break
if sr.Speak == sr.state():
print('speak A')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10,80, "Please speak A", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(2):
data = sr.get(2)
print('data two =', data)
break
if sr.Speak == sr.state():
print('speak B')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "Please speak B", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(4):
data = sr.get(4)
print('data three =', data)
break
if sr.Speak == sr.state():
print('speak C')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "Please speak C", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
#print('\n')
#print('before save , frm len =', frm_len, 'frm data = ',frm_data)
isolated_save(frm_len, frm_data)
# record and save complete
#-------------------------------------------------------------------------
# read and set
isolated_read()
# begin recognize
while True:
time.sleep_ms(200)
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(20, 80, "Please speak A or B or C", color=(255, 0, 0), scale=2, mono_space=0)
lcd.display(img)
print(sr.state())
#print(sr.dtw(data))
if sr.Done == sr.recognize():
res = sr.result()
if res != None:
print(str(res[0]))
if res[0] == 0:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150,100, "A", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)
print('1')
if res[0] == 2:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150, 100, "B", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)
print('2')
if res[0] == 4:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150,100,"C", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)
print('3')
保存
import time
from Maix import GPIO, I2S
from fpioa_manager import fm
import os, Maix, lcd, image
sample_rate = 16000
record_time = 2
img = image.Image(size=(320, 240))
lcd.init()
fm.register(20,fm.fpioa.I2S0_IN_D0, force=True)
fm.register(30,fm.fpioa.I2S0_WS, force=True) # 19 on Go Board and Bit(new version)
fm.register(32,fm.fpioa.I2S0_SCLK, force=True) # 18 bit
rx = I2S(I2S.DEVICE_0)
rx.channel_config(rx.CHANNEL_0, rx.RECEIVER, align_mode=I2S.STANDARD_MODE)
rx.set_sample_rate(sample_rate)
print('I2S = ', rx)
from speech_recognizer import isolated_word
# model
sr = isolated_word(dmac=2, i2s=I2S.DEVICE_0, size=10)
print('Total vocabulary templates = ', sr.size())
print('isolated_word module = ', sr)
## threshold
sr.set_threshold(0, 0, 10000)
# list to save datas
frm_len = []
frm_data = []
# Configuration complete
#-------------------------------------------------------------------------
def isolated_save(frm_len, frm_data):
print('path = ', os.getcwd())
with open('/sd/Data_frame_length.txt', 'w') as f:
for i in range(0, len(frm_len)):
f.write(str(frm_len[i]))
f.write('\n')
print('\n')
print('frm_len = ', frm_len[i])
with open('/sd/Data_frame', 'wb') as f:
for i in range(0, len(frm_len)):
f.write(frm_data[i])
#print('\n')
#print('frm_data = ', frm_data[i])
print('\n save success')
# function set complete
#-------------------------------------------------------------------------
## record and get & set
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(0):
data = sr.get(0)
print('data one =', data)
break
if sr.Speak == sr.state():
print('speak A')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10,80, "Please speak A", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(2):
data = sr.get(2)
print('data two =', data)
break
if sr.Speak == sr.state():
print('speak B')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "Please speak B", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
while True:
time.sleep_ms(100)
print(sr.state())
if sr.Done == sr.record(4):
data = sr.get(4)
print('data three =', data)
break
if sr.Speak == sr.state():
print('speak C')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "Please speak C", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
frm_len.append(data[0])
frm_data.append(data[1])
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "get !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
time.sleep_ms(500)
#print('\n')
#print('before save , frm len =', frm_len, 'frm data = ',frm_data)
isolated_save(frm_len, frm_data)
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(10, 80, "save success !", color=(255, 0, 0), scale=4, mono_space=0)
lcd.display(img)
# record and save complete
#-------------------------------------------------------------------------
读取
import time
from Maix import GPIO, I2S
from fpioa_manager import fm
import os, Maix, lcd, image
sample_rate = 16000
record_time = 2
img = image.Image(size=(320, 240))
fm.register(20,fm.fpioa.I2S0_IN_D0, force=True)
fm.register(30,fm.fpioa.I2S0_WS, force=True) # 19 on Go Board and Bit(new version)
fm.register(32,fm.fpioa.I2S0_SCLK, force=True) # 18 bit
rx = I2S(I2S.DEVICE_0)
rx.channel_config(rx.CHANNEL_0, rx.RECEIVER, align_mode=I2S.STANDARD_MODE)
rx.set_sample_rate(sample_rate)
print('I2S = ', rx)
from speech_recognizer import isolated_word
# model
sr = isolated_word(dmac=2, i2s=I2S.DEVICE_0, size=10)
print('Total vocabulary templates = ', sr.size())
print('isolated_word module = ', sr)
## threshold
sr.set_threshold(0, 0, 10000)
# list to save datas
frm_len = []
frm_data = []
# Configuration complete
#-------------------------------------------------------------------------
def isolated_read():
global data
print('\n')
print('isolated_read')
with open('Data_frame_length.txt', 'r') as f:
datas = f.readlines()
len_datas = len(datas)
for i in range(0,len_datas):
Location = i*5256
data_len = 5256
frm_len = int(datas[i])
with open('Data_frame', 'rb') as f:
f.seek(Location)
frm_data = f.read(data_len)
#print(frm_data)
data = (frm_len,frm_data)
sr.set((2*i+1), data)
#print('\n')
#print('final data = ', data)
#print('if final data = natural data 1', data == natural_data_1)
print('read success')
# function set complete
#-------------------------------------------------------------------------
# read and set
isolated_read()
# begin recognize
while True:
time.sleep_ms(200)
print('doing')
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(20, 80, "Please speak A or B or C", color=(255, 0, 0), scale=2, mono_space=0)
lcd.display(img)
print(sr.state())
print(sr.dtw(data))
if sr.Done == sr.recognize():
res = sr.result()
if res != None:
print(str(res[0]))
if res[0] == 0:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150,100, "A", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)
if res[0] == 2:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150, 100, "B", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)
if res[0] == 4:
img.draw_rectangle((0, 0, 320, 240), fill=True, color=(255, 255, 255))
img.draw_string(150,100,"C", color=(255, 0, 0), scale=10, mono_space=0)
lcd.display(img)
time.sleep_ms(200)