Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix lipsync.h compile error #77

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions examples/audio/AudioOutputM5Speaker.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
This source was taken from M5Unified's example(WebRadio_with_ESP8266Audio.ino).
https://github.com/m5stack/M5Unified
*/


#include <AudioOutput.h>
#include <M5Unified.h>

class AudioOutputM5Speaker : public AudioOutput
{
public:
AudioOutputM5Speaker(m5::Speaker_Class* m5sound, uint8_t virtual_sound_channel = 0)
{
_m5sound = m5sound;
_virtual_ch = virtual_sound_channel;
}
virtual ~AudioOutputM5Speaker(void) {};
virtual bool begin(void) override { return true; }
virtual bool ConsumeSample(int16_t sample[2]) override
{
if (_tri_buffer_index < tri_buf_size)
{
_tri_buffer[_tri_index][_tri_buffer_index ] = sample[0];
_tri_buffer[_tri_index][_tri_buffer_index+1] = sample[1];
_tri_buffer_index += 2;

return true;
}

flush();
return false;
}
virtual void flush(void) override
{
if (_tri_buffer_index)
{
_m5sound->playRaw(_tri_buffer[_tri_index], _tri_buffer_index, hertz, true, 1, _virtual_ch);
_tri_index = _tri_index < 2 ? _tri_index + 1 : 0;
_tri_buffer_index = 0;
++_update_count;
}
}
virtual bool stop(void) override
{
flush();
_m5sound->stop(_virtual_ch);
for (size_t i = 0; i < 3; ++i)
{
memset(_tri_buffer[i], 0, tri_buf_size);
}
++_update_count;
return true;
}

const int16_t* getBuffer(void) const { return _tri_buffer[(_tri_index + 2) % 3]; }
const uint32_t getUpdateCount(void) const { return _update_count; }

protected:
m5::Speaker_Class* _m5sound;
uint8_t _virtual_ch;
static constexpr size_t tri_buf_size = 640;
int16_t _tri_buffer[3][tri_buf_size];
size_t _tri_buffer_index = 0;
size_t _tri_index = 0;
size_t _update_count = 0;
};

#define FFT_SIZE 256
class fft_t
{
float _wr[FFT_SIZE + 1];
float _wi[FFT_SIZE + 1];
float _fr[FFT_SIZE + 1];
float _fi[FFT_SIZE + 1];
uint16_t _br[FFT_SIZE + 1];
size_t _ie;

public:
fft_t(void)
{
#ifndef M_PI
#define M_PI 3.141592653
#endif
_ie = logf( (float)FFT_SIZE ) / log(2.0) + 0.5;
static constexpr float omega = 2.0f * M_PI / FFT_SIZE;
static constexpr int s4 = FFT_SIZE / 4;
static constexpr int s2 = FFT_SIZE / 2;
for ( int i = 1 ; i < s4 ; ++i)
{
float f = cosf(omega * i);
_wi[s4 + i] = f;
_wi[s4 - i] = f;
_wr[ i] = f;
_wr[s2 - i] = -f;
}
_wi[s4] = _wr[0] = 1;

size_t je = 1;
_br[0] = 0;
_br[1] = FFT_SIZE / 2;
for ( size_t i = 0 ; i < _ie - 1 ; ++i )
{
_br[ je << 1 ] = _br[ je ] >> 1;
je = je << 1;
for ( size_t j = 1 ; j < je ; ++j )
{
_br[je + j] = _br[je] + _br[j];
}
}
}

void exec(const int16_t* in)
{
memset(_fi, 0, sizeof(_fi));
for ( size_t j = 0 ; j < FFT_SIZE / 2 ; ++j )
{
float basej = 0.25 * (1.0-_wr[j]);
size_t r = FFT_SIZE - j - 1;

/// perform han window and stereo to mono convert.
_fr[_br[j]] = basej * (in[j * 2] + in[j * 2 + 1]);
_fr[_br[r]] = basej * (in[r * 2] + in[r * 2 + 1]);
}

size_t s = 1;
size_t i = 0;
do
{
size_t ke = s;
s <<= 1;
size_t je = FFT_SIZE / s;
size_t j = 0;
do
{
size_t k = 0;
do
{
size_t l = s * j + k;
size_t m = ke * (2 * j + 1) + k;
size_t p = je * k;
float Wxmr = _fr[m] * _wr[p] + _fi[m] * _wi[p];
float Wxmi = _fi[m] * _wr[p] - _fr[m] * _wi[p];
_fr[m] = _fr[l] - Wxmr;
_fi[m] = _fi[l] - Wxmi;
_fr[l] += Wxmr;
_fi[l] += Wxmi;
} while ( ++k < ke) ;
} while ( ++j < je );
} while ( ++i < _ie );
}

uint32_t get(size_t index)
{
return (index < FFT_SIZE / 2) ? (uint32_t)sqrtf(_fr[ index ] * _fr[ index ] + _fi[ index ] * _fi[ index ]) : 0u;
}
};
112 changes: 82 additions & 30 deletions examples/audio/audio.ino
Original file line number Diff line number Diff line change
@@ -1,72 +1,124 @@
#pragma mark - Depend ESP8266Audio and ESP8266_Spiram libraries
#include <Arduino.h>
/*
cd ~/Arduino/libraries
git clone https://github.com/earlephilhower/ESP8266Audio
git clone https://github.com/Gianbacchio/ESP8266_Spiram
Use the "Tools->ESP32 Sketch Data Upload" menu to write the MP3 to SPIFFS
Then upload the sketch normally.
https://github.com/me-no-dev/arduino-esp32fs-plugin
*/

#include <HTTPClient.h>
#include <ESP8266Spiram.h>
#include <M5Unified.h>
#include <WiFi.h>
#include "SPIFFS.h"
#include "AudioFileSourceSPIFFS.h"
#include "AudioFileSourceID3.h"
#include "AudioGeneratorMP3.h"
#include "AudioOutputI2S.h"
#include "AudioOutputM5Speaker.hpp"
#include <Avatar.h>

using namespace m5avatar;

AudioGeneratorMP3 *mp3;
AudioFileSourceSPIFFS *file;
AudioOutputI2S *out;
static constexpr uint8_t m5spk_virtual_channel = 0;

AudioGeneratorMP3 mp3;
AudioFileSourceSPIFFS file;
AudioFileSourceID3 *id3;
AudioOutputM5Speaker out(&M5.Speaker, m5spk_virtual_channel);

static fft_t fft;
static constexpr size_t WAVE_SIZE = 320;
static int16_t raw_data[WAVE_SIZE * 2];

#define LIPSYNC_LEVEL_MAX 10.0f
static float lipsync_level_max = LIPSYNC_LEVEL_MAX;
float mouth_ratio = 0.0f;

Avatar avatar;

int levels[10];
const int levelsSize = sizeof(levels) / sizeof(int);
int levelsIdx = 0;
void lipsync(void *args) {
DriveContext * ctx = reinterpret_cast<DriveContext *>(args);
Avatar *avatar = ctx->getAvatar();
for(;;) {
uint64_t level = 0;
auto buf = out.getBuffer();
if (buf) {
memcpy(raw_data, buf, WAVE_SIZE * 2 * sizeof(int16_t));
fft.exec(raw_data);
// Specify the range of tones to be extracted by lipsync.(low))0〜63(high)
for (size_t bx = 0; bx <= 63; ++bx) {
int32_t f = fft.get(bx);
level += abs(f);
}
}

int avgLevel() {
int sum = 0;
for (int i = 0; i < levelsSize; i++) {
sum += levels[i];
}
return sum / levelsSize;
mouth_ratio = (float)(level >> 16)/lipsync_level_max;
if (mouth_ratio > 1.2f) {
if (mouth_ratio > 1.5f) {
lipsync_level_max += 10.0f; // If the lipsync limit is updated significantly, the limit will be increased.
}
mouth_ratio = 1.2f;
}
avatar->setMouthOpenRatio(mouth_ratio);
vTaskDelay(1/portTICK_PERIOD_MS);
}
}

void setup()
{
M5.begin();
auto cfg = M5.config();
M5.begin(cfg);
{
auto spk_cfg = M5.Speaker.config();
spk_cfg.sample_rate = 96000;
spk_cfg.task_pinned_core = APP_CPU_NUM;
// If the sound is choppy, change the following spk_cfg parameters.
//spk_cfg.task_priority = 1;
//spk_cfg.dma_buf_count = 8;
//spk_cfg.dma_buf_len = 512;

M5.Speaker.config(spk_cfg);
}
M5.Speaker.begin();
M5.Speaker.setVolume(128);

WiFi.mode(WIFI_OFF);
SPIFFS.begin();
delay(500);
avatar.init();
avatar.addTask(lipsync, "lipsync");
}

void playMusic(const char* filename) {
file.open(filename);
id3 = new AudioFileSourceID3(&file);
id3->open(filename);
mp3.begin(id3, &out);
}

void loop()
{
M5.update();
if (M5.BtnA.wasPressed()) {
Serial.printf("Sample MP3 playback begins...\n");
file = new AudioFileSourceSPIFFS("/nyaan.mp3");
id3 = new AudioFileSourceID3(file);
out = new AudioOutputI2S(0, 1); // Output to builtInDAC
out->SetOutputModeMono(true);
out->SetGain(0.16);
mp3 = new AudioGeneratorMP3();
mp3->begin(id3, out);
while (mp3->isRunning()) {
levels[levelIdx] = abs(out->getLevel());
levelIdx = (levelIdx + 1) % levelsSize;
float f = avgLevel() / 12000.0;
avatar.setMouthOpenRatio(f);
if (!mp3->loop()) mp3->stop();
playMusic("/nyaan.mp3");
static int lastms = 0;

while (mp3.isRunning()) {
if (millis()-lastms > 1000) {
lastms = millis();
Serial.printf("Running for %d ms...\n", lastms);
Serial.flush();
}
if (!mp3.loop()) {
Serial.printf("MP3 done\n");
mp3.stop();
delay(100);
mp3.~AudioGeneratorMP3();
id3->~AudioFileSourceID3();
file.~AudioFileSourceSPIFFS();
lipsync_level_max = LIPSYNC_LEVEL_MAX;
}
}
}
}
Loading