Skip to content

Commit

Permalink
Merge pull request #72 from esp-cpp/feature/video-speedup
Browse files Browse the repository at this point in the history
Feature/video speedup
  • Loading branch information
finger563 committed Jun 29, 2024
2 parents febdf32 + abbc832 commit 86732be
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 95 deletions.
8 changes: 4 additions & 4 deletions components/box-emu-hal/include/statistics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

#include "format.hpp"

void update_frame_time(float frame_time);
void update_frame_time(uint64_t frame_time);
void reset_frame_time();

float get_fps();
float get_frame_time();
float get_frame_time_max();
float get_frame_time_min();
uint64_t get_frame_time();
uint64_t get_frame_time_max();
uint64_t get_frame_time_min();
float get_frame_time_avg();

void print_statistics();
32 changes: 16 additions & 16 deletions components/box-emu-hal/src/statistics.cpp
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
#include "statistics.hpp"

static uint32_t num_frames = 0;
static float frame_time = 0.0f;
static float frame_time_total = 0.0f;
static float frame_time_max = 0.0f;
static float frame_time_min = 0.0f;
static uint64_t frame_time = 0.0f;
static uint64_t frame_time_total = 0.0f;
static uint64_t frame_time_max = 0.0f;
static uint64_t frame_time_min = 0.0f;
static float frame_time_avg = 0.0f;

void update_frame_time(float frame_time)
void update_frame_time(uint64_t frame_time)
{
num_frames++;
::frame_time = frame_time;
frame_time_total = frame_time_total + frame_time;
frame_time_max = std::max(frame_time_max, frame_time);
frame_time_min = std::min(frame_time_min, frame_time);
frame_time_avg = frame_time_total / num_frames;
frame_time_avg = float(frame_time_total) / num_frames;
}

void reset_frame_time()
{
num_frames = 0;
frame_time = 0.0f;
frame_time_total = 0.0f;
frame_time_max = 0.0f;
frame_time_min = 100000.0f; // some large number
frame_time = 0;
frame_time_total = 0;
frame_time_max = 0;
frame_time_min = 1000000; // some large number
frame_time_avg = 0.0f;
}

float get_fps() {
if (frame_time_total == 0.0f) {
if (frame_time_total == 0) {
return 0.0f;
}
return num_frames / frame_time_total;
return num_frames / (frame_time_total / 1e6f);
}

float get_frame_time() {
uint64_t get_frame_time() {
return frame_time;
}

float get_frame_time_max() {
uint64_t get_frame_time_max() {
return frame_time_max;
}

float get_frame_time_min() {
uint64_t get_frame_time_min() {
return frame_time_min;
}

Expand All @@ -55,5 +55,5 @@ void print_statistics() {
fmt::print("-----------\n");
fmt::print("Frames: {}\n", num_frames);
fmt::print("FPS: {:.1f}\n", get_fps());
fmt::print("Frame Time: [{:.1f}, {:.1f}, {:.1f}]\n", get_frame_time_min(), get_frame_time_avg(), get_frame_time_max());
fmt::print("Frame Time: [min {} us, avg: {:.1f} us, max: {} us]\n", get_frame_time_min(), get_frame_time_avg(), get_frame_time_max());
}
50 changes: 32 additions & 18 deletions components/box-emu-hal/src/video_task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,27 +104,36 @@ static bool video_task(std::mutex &m, std::condition_variable& cv) {
if (has_palette()) {
const uint8_t* _frame = (const uint8_t*)_frame_ptr;
for (int i=0; i<num_lines; i++) {
// TODO: write two pixels (32 bits) at a time because it's faster
for (int j=0; j<display_width; j++) {
int index = (y+i)*native_pitch + j;
_buf[i*display_width + j] = _palette[_frame[index] % palette_size];
// write two pixels (32 bits) at a time because it's faster
for (int j=0; j<display_width/2; j++) {
int src_index = (y+i)*native_pitch + j * 2;
int dst_index = i*display_width + j * 2;
_buf[dst_index] = _palette[_frame[src_index] % palette_size];
_buf[dst_index + 1] = _palette[_frame[src_index + 1] % palette_size];
}
}
} else {
const uint16_t* _frame = (const uint16_t*)_frame_ptr;
for (int i=0; i<num_lines; i++) {
// TODO: write two pixels (32 bits) at a time because it's faster
for (int j=0; j<display_width; j++)
_buf[i*display_width + j] = _frame[(y+i)*native_pitch + j];
// write two pixels (32 bits) at a time because it's faster
for (int j=0; j<display_width/2; j++) {
int src_index = (y+i)*native_pitch + j * 2;
int dst_index = i*display_width + j * 2;
// memcpy(&_buf[i*display_width + j * 2], &_frame[(y+i)*native_pitch + j * 2], 4);
_buf[dst_index] = _frame[src_index];
_buf[dst_index + 1] = _frame[src_index + 1];
}
}
}
lcd_write_frame(x_offset, y + y_offset, display_width, num_lines, (uint8_t*)&_buf[0]);
}
} else {
// we are scaling the screen (and possibly using a custom palette)
// if we don't have a custom palette, we just need to scale/fill the frame
float y_scale = (float)display_height/native_height;
[[maybe_unused]] float y_scale = (float)display_height/native_height;
float x_scale = (float)display_width/native_width;
float inv_x_scale = (float)native_width/display_width;
float inv_y_scale = (float)native_height/display_height;
int max_y = lcd_height;
int max_x = std::clamp<int>(x_scale * native_width, 0, lcd_width);
for (int y=0; y<max_y; y+=num_lines_to_write) {
Expand All @@ -138,23 +147,28 @@ static bool video_task(std::mutex &m, std::condition_variable& cv) {
if (_y >= max_y) {
break;
}
int source_y = (float)_y/y_scale;
int source_y = (float)_y * inv_y_scale;
// shoudl i put this around the outer loop or is this loop a good
// balance for perfomance of the check?
if (has_palette()) {
const uint8_t* _frame = (const uint8_t*)_frame_ptr;
// TODO: write two pixels (32 bits) at a time because it's faster
for (int x=0; x<max_x; x++) {
int source_x = (float)x/x_scale;
int index = source_y*native_pitch + source_x;
_buf[i*max_x + x] = _palette[_frame[index] % palette_size];
// write two pixels (32 bits) at a time because it's faster
for (int x=0; x<max_x/2; x++) {
int source_x = (float)x * 2 * inv_x_scale;
int src_index = source_y*native_pitch + source_x;
int dst_index = i*max_x + x * 2;
_buf[dst_index] = _palette[_frame[src_index] % palette_size];
_buf[dst_index + 1] = _palette[_frame[src_index + 1] % palette_size];
}
} else {
const uint16_t* _frame = (const uint16_t*)_frame_ptr;
// TODO: write two pixels (32 bits) at a time because it's faster
for (int x=0; x<max_x; x++) {
int source_x = (float)x/x_scale;
_buf[i*max_x + x] = _frame[source_y*native_pitch + source_x];
// write two pixels (32 bits) at a time because it's faster
for (int x=0; x<max_x/2; x++) {
int source_x = (float)x * 2 * inv_x_scale;
int src_index = source_y*native_pitch + source_x;
int dst_index = i*max_x + x * 2;
_buf[dst_index] = _frame[src_index];
_buf[dst_index + 1] = _frame[src_index + 1];
}
}
}
Expand Down
14 changes: 7 additions & 7 deletions components/gbc/src/gameboy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ void init_gameboy(const std::string& rom_filename, uint8_t *romdata, size_t rom_
}

void run_gameboy_rom() {
auto start = std::chrono::steady_clock::now();
auto start = esp_timer_get_time();
// GET INPUT
InputState state;
hal::get_input_state(&state);
Expand All @@ -168,6 +168,7 @@ void run_gameboy_rom() {
pad_set(PAD_A, state.a);
pad_set(PAD_B, state.b);
run_to_vblank();
auto end = esp_timer_get_time();

// update unlock based on x button
static bool last_x = false;
Expand All @@ -176,13 +177,12 @@ void run_gameboy_rom() {
}
last_x = state.x;

auto end = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
auto elapsed_float = std::chrono::duration<float>(elapsed).count();
auto max_frame_time = std::chrono::milliseconds(15);
update_frame_time(elapsed_float);
auto elapsed = end - start;
update_frame_time(elapsed);
static constexpr uint64_t max_frame_time = 1000000 / 60;
if (!unlock && elapsed < max_frame_time) {
std::this_thread::sleep_for(max_frame_time - elapsed);
auto sleep_time = (max_frame_time - elapsed) / 1e3;
std::this_thread::sleep_for(sleep_time * 1ms);
}
}

Expand Down
2 changes: 1 addition & 1 deletion components/genesis/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ idf_component_register(
REQUIRES box-emu-hal
)
# target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-char-subscripts -Wno-attributes -Wno-implicit-fallthrough -Wno-unused-function -Wno-unused-variable -Wno-discarded-qualifiers)
target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-const-variable -O3)
target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-const-variable -Wno-unused-value -O3)
# target_compile_definitions(${COMPONENT_LIB} PRIVATE GWENESIS_AUDIO_ACCURATE=0)
4 changes: 3 additions & 1 deletion components/genesis/gwenesis/src/cpus/M68K/m68kcpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ extern int vdp_68k_irq_ack(int int_level);
#endif
#endif

#include <esp_attr.h>

#include "m68kconf.h"
#include "m68kcpu.h"
#include "m68kops.h"
Expand Down Expand Up @@ -261,7 +263,7 @@ void m68k_set_irq_delay(unsigned int int_level)
m68ki_check_interrupts(); /* Level triggered (IRQ) */
}

void m68k_run(unsigned int cycles)
void IRAM_ATTR m68k_run(unsigned int cycles)
{
// printf("m68K_run current_cycles=%d add=%d STOP=%x\n",m68k.cycles,cycles,CPU_STOPPED);

Expand Down
4 changes: 3 additions & 1 deletion components/genesis/gwenesis/src/sound/gwenesis_sn76489.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
*/


#include <esp_attr.h>

#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
Expand Down Expand Up @@ -207,7 +209,7 @@ static inline void gwenesis_SN76489_Update(INT16 *buffer, int length)
}
/* SN76589 execution */
extern int scan_line;
void gwenesis_SN76489_run(int target) {
void IRAM_ATTR gwenesis_SN76489_run(int target) {

if ( sn76489_clock >= target) return;

Expand Down
5 changes: 3 additions & 2 deletions components/genesis/gwenesis/src/sound/ym2612.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@
#include <string.h>
#include <math.h>

#include <esp_attr.h>

#include "ym2612.h"
#include "gwenesis_bus.h"
#include "gwenesis_savestate.h"
Expand Down Expand Up @@ -1059,7 +1061,6 @@ INLINE void set_sl_rr(FM_SLOT *SLOT,int v)
SLOT->eg_sel_rr = eg_rate_select[SLOT->rr + SLOT->ksr];
}

/* advance LFO to next sample */
INLINE void advance_lfo()
{
if (ym2612.OPN.lfo_timer_overflow) /* LFO enabled ? */
Expand Down Expand Up @@ -2147,7 +2148,7 @@ static inline void YM2612Update(int16_t *buffer, int length)
INTERNAL_TIMER_B(length);
}

void ym2612_run( int target) {
void IRAM_ATTR ym2612_run( int target) {

if ( ym2612_clock >= target) {
return;
Expand Down
4 changes: 3 additions & 1 deletion components/genesis/gwenesis/src/sound/z80inst.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ __license__ = "GPLv3"
#include "gwenesis_sn76489.h"
#include "gwenesis_savestate.h"

#include <esp_attr.h>

#pragma GCC optimize("Ofast")

static int bus_ack = 0;
Expand Down Expand Up @@ -83,7 +85,7 @@ void z80_pulse_reset() {
}
static int current_timeslice = 0;

void z80_run(int target) {
void IRAM_ATTR z80_run(int target) {

// we are in advance,nothind to do
current_timeslice = 0;
Expand Down
Loading

0 comments on commit 86732be

Please sign in to comment.