Template
reco_vosk.h
Go to the documentation of this file.
1/*
2-----------------------------------------------------------------------------
3This source file is part of OpenSpace3D
4For the latest info, see http://www.openspace3d.com
5
6Copyright (c) 2012 I-maginer
7
8This program is free software; you can redistribute it and/or modify it under
9the terms of the GNU Lesser General Public License as published by the Free Software
10Foundation; either version 2 of the License, or (at your option) any later
11version.
12
13This program is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16
17You should have received a copy of the GNU Lesser General Public License along with
18this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19Place - Suite 330, Boston, MA 02111-1307, USA, or go to
20http://www.gnu.org/copyleft/lesser.txt
21
22-----------------------------------------------------------------------------
23*/
24
25
34#ifndef __RECO_H__
35#define __RECO_H__
36
38#include <vosk_api.h>
39#include <thread>
40#include <mutex>
41
44#include <cstring>
45#include <iostream>
46#include <algorithm>
47
48#include <unordered_map>
49
50class Buffer {
51public:
52 Buffer(size_t bufferSize) : size(bufferSize), data(new char[size]), count(0) {}
53
55 {
56 delete[] data;
57 }
58
59 void fill(const char* newData, size_t newDataSize)
60 {
61 const std::lock_guard<std::mutex> lock(mMutex);
62 if (newDataSize > size - count)
63 {
64 // Buffer overflow. Data not filled completely. consumme previous data
65 //consume(newDataSize - (size - count));
66 //std::memcpy(data + count, newData, size - count);
67 return;
68 }
69
70 std::memcpy(data + count, newData, newDataSize);
71 count += newDataSize;
72 }
73
74 void consume(size_t consumeSize)
75 {
76 if (consumeSize >= count)
77 {
78 count = 0;
79 }
80 else
81 {
82 std::memmove(data, data + consumeSize, count - consumeSize);
83 count -= consumeSize;
84 }
85 }
86
87 void consume_safe(size_t consumeSize)
88 {
89 const std::lock_guard<std::mutex> lock(mMutex);
90 if (consumeSize >= count)
91 {
92 count = 0;
93 }
94 else
95 {
96 std::memmove(data, data + consumeSize, count - consumeSize);
97 count -= consumeSize;
98 }
99 }
100
101 size_t getBufferCopy(char* buffer, size_t length)
102 {
103 const std::lock_guard<std::mutex> lock(mMutex);
104 size_t cpl = std::min(length, count);
105 std::memcpy(buffer, data, cpl);
106 consume(cpl);
107 return cpl;
108 }
109
110 void getInt16(int16_t* int16Array, size_t length)
111 {
112 const int16_t* audioData = reinterpret_cast<const int16_t*>(data);
113 for (size_t i = 0; i < length; ++i)
114 {
115 int16Array[i] = audioData[i];
116 }
117 consume(length);
118 }
119
120 const char* getData() const
121 {
122 return data;
123 }
124
125 size_t getCount() const
126 {
127 return count;
128 }
129
130 float calculateAudioLevel(size_t length)
131 {
132 // Assuming 16-bit signed PCM audio data
133 const int16_t* samples = reinterpret_cast<const int16_t*>(data);
134 size_t numSamples = length / sizeof(int16_t);
135
136 // Calculate the sum of squared samples
137 double sum = 0.0;
138 for (size_t i = 0; i < numSamples; ++i)
139 {
140 double sample = static_cast<double>(samples[i]) / static_cast<double>(INT16_MAX);
141 sum += sample * sample;
142 }
143
144 // Calculate the root mean square (RMS) level
145 double rmsLevel = std::sqrt(sum / numSamples);
146
147 return rmsLevel;
148 }
149
150 void RemoveNoise(size_t length, float noiseLevel, float reductionFactor)
151 {
152 // Assuming audio samples are 16-bit signed integers (2 bytes per sample)
153 const size_t sampleSize = 2;
154
155 // Compute the threshold based on the noise level
156 const float threshold = noiseLevel * std::numeric_limits<short>::max();
157
158 // Process each sample in the audio buffer
159 for (size_t i = 0; i < length; i += sampleSize)
160 {
161 // Convert the sample bytes to a signed short value
162 short* sample = reinterpret_cast<short*>(&data[i]);
163
164 // Apply noise reduction
165 if (std::abs(*sample) < threshold)
166 {
167 // Reduce the sample value by the reduction factor
168 *sample *= reductionFactor;
169 }
170 }
171 }
172
173 size_t ProcessBuffer(char* buffer, size_t length, float threshold)
174 {
175 const std::lock_guard<std::mutex> lock(mMutex);
176
177 size_t cpl = std::min(length, count);
178 RemoveNoise(cpl, 0.4f, 0.8f);
179 float audioLevel = calculateAudioLevel(cpl);
180
181 if (audioLevel < threshold)
182 {
183 consume(cpl); // Skip the buffer
184 return 0;
185 }
186
187 std::memcpy(buffer, data, cpl);
188 consume(cpl);
189 return cpl;
190 }
191
192private:
193 std::mutex mMutex;
194 size_t size;
195 char* data;
196 size_t count;
197};
198
199
200class Recognition
201{
202public:
203protected:
204private:
205 std::thread mThread;
206 std::mutex mMutexConfig;
207 bool mValid;
208 VoskModel* mModel;
209 VoskRecognizer* mRecognizer;
210 Buffer* mBuffer;
211 bool mInSpeech;
212 int mTimeOutSamples;
213 bool mKeySearch;
214 std::vector<std::string> mKeyWords;
215public:
219
222 Recognition(std::string pathtobin, std::string lang);
223
227
230 void cbThread();
231
232 void fillAudioBuffer(const char* data, size_t lenght);
233
237
241
244 void setVolume(int volume);
245
248 void AddWord(std::string s_Rule, std::string s_Word);
249protected:
250private:
251};
252
253#endif
void consume(size_t consumeSize)
Definition reco_vosk.h:74
void consume_safe(size_t consumeSize)
Definition reco_vosk.h:87
size_t getBufferCopy(char *buffer, size_t length)
Definition reco_vosk.h:101
Buffer(size_t bufferSize)
Definition reco_vosk.h:52
~Buffer()
Definition reco_vosk.h:54
void getInt16(int16_t *int16Array, size_t length)
Definition reco_vosk.h:110
float calculateAudioLevel(size_t length)
Definition reco_vosk.h:130
size_t getCount() const
Definition reco_vosk.h:125
const char * getData() const
Definition reco_vosk.h:120
size_t ProcessBuffer(char *buffer, size_t length, float threshold)
Definition reco_vosk.h:173
void RemoveNoise(size_t length, float noiseLevel, float reductionFactor)
Definition reco_vosk.h:150
void fill(const char *newData, size_t newDataSize)
Definition reco_vosk.h:59
Management of the recognition class .
Definition reco.h:50
void setVolume(int volume)
void AddWord(std::string s_Rule, std::string s_Word)
void fillAudioBuffer(const char *data, size_t lenght)
bool initializeObjects()
int getVolume()
void cbThread()
Recognition(std::string pathtobin, std::string lang)