Skip to content

Commit

Permalink
Improve BitmapCanvas performance
Browse files Browse the repository at this point in the history
  • Loading branch information
dpjudas committed Jan 19, 2024
1 parent 09cfa35 commit 987eb5e
Showing 1 changed file with 181 additions and 7 deletions.
188 changes: 181 additions & 7 deletions src/core/canvas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
#include <stdexcept>
#include <cstring>

#if defined(__SSE2__) || defined(_M_X64)
#include <immintrin.h>
#define USE_SSE2
#endif

class CanvasTexture
{
public:
Expand Down Expand Up @@ -212,6 +217,7 @@ class BitmapCanvas : public Canvas

void drawLineUnclipped(const Point& p0, const Point& p1, const Colorf& color);

void fillTile(float x, float y, float width, float height, Colorf color);
void drawTile(CanvasTexture* texture, float x, float y, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color);
void drawGlyph(CanvasTexture* texture, float x, float y, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color);

Expand Down Expand Up @@ -300,7 +306,7 @@ void BitmapCanvas::popClip()

void BitmapCanvas::fillRect(const Rect& box, const Colorf& color)
{
drawTile(whiteTexture.get(), (float)((origin.x + box.x) * uiscale), (float)((origin.y + box.y) * uiscale), (float)(box.width * uiscale), (float)(box.height * uiscale), 0.0, 0.0, 1.0, 1.0, color);
fillTile((float)((origin.x + box.x) * uiscale), (float)((origin.y + box.y) * uiscale), (float)(box.width * uiscale), (float)(box.height * uiscale), color);
}

void BitmapCanvas::drawImage(const std::shared_ptr<Image>& image, const Point& pos)
Expand Down Expand Up @@ -489,11 +495,11 @@ void BitmapCanvas::drawLineUnclipped(const Point& p0, const Point& p1, const Col
{
if (p0.x == p1.x)
{
drawTile(whiteTexture.get(), (float)((p0.x - 0.5) * uiscale), (float)(p0.y * uiscale), (float)uiscale, (float)((p1.y - p0.y) * uiscale), 0.0f, 0.0f, 1.0f, 1.0f, color);
fillTile((float)((p0.x - 0.5) * uiscale), (float)(p0.y * uiscale), (float)uiscale, (float)((p1.y - p0.y) * uiscale), color);
}
else if (p0.y == p1.y)
{
drawTile(whiteTexture.get(), (float)(p0.x * uiscale), (float)((p0.y - 0.5) * uiscale), (float)((p1.x - p0.x) * uiscale), (float)uiscale, 0.0f, 0.0f, 1.0f, 1.0f, color);
fillTile((float)(p0.x * uiscale), (float)((p0.y - 0.5) * uiscale), (float)((p1.x - p0.x) * uiscale), (float)uiscale, color);
}
else
{
Expand Down Expand Up @@ -521,6 +527,111 @@ int BitmapCanvas::getClipMaxY() const
return clipStack.empty() ? height : (int)std::min((clipStack.back().y + clipStack.back().height) * uiscale, (double)height);
}

void BitmapCanvas::fillTile(float left, float top, float width, float height, Colorf color)
{
if (width <= 0.0f || height <= 0.0f)
return;

int dwidth = this->width;
int dheight = this->height;
uint32_t* dest = this->pixels.data();

int x0 = (int)left;
int x1 = (int)(left + width);
int y0 = (int)top;
int y1 = (int)(top + height);

x0 = std::max(x0, getClipMinX());
y0 = std::max(y0, getClipMinY());
x1 = std::min(x1, getClipMaxX());
y1 = std::min(y1, getClipMaxY());
if (x1 <= x0 || y1 <= y0)
return;

uint32_t cred = (int32_t)clamp(color.r * 255.0f, 0.0f, 255.0f);
uint32_t cgreen = (int32_t)clamp(color.g * 255.0f, 0.0f, 255.0f);
uint32_t cblue = (int32_t)clamp(color.b * 255.0f, 0.0f, 255.0f);
uint32_t calpha = (int32_t)clamp(color.a * 255.0f, 0.0f, 255.0f);
uint32_t invalpha = 256 - (calpha + (calpha >> 7));

if (invalpha == 0) // Solid fill
{
uint32_t c = (calpha << 24) | (cred << 16) | (cgreen << 8) | cblue;
#ifdef USE_SSE2
__m128i crgba = _mm_set1_epi32(c);
#endif

for (int y = y0; y < y1; y++)
{
uint32_t* dline = dest + y * dwidth;

int x = x0;
#ifdef USE_SSE2
int ssex1 = x0 + (((x1 - x0) >> 2) << 2);
while (x < ssex1)
{
_mm_storeu_si128((__m128i*)(dline + x), crgba);
x += 4;
}
#endif

while (x < x1)
{
dline[x] = c;
x++;
}
}
}
else // Alpha blended fill
{
cred <<= 8;
cgreen <<= 8;
cblue <<= 8;
calpha <<= 8;
#ifdef USE_SSE2
__m128i crgba = _mm_set_epi16(calpha, cblue, cgreen, cred, calpha, cblue, cgreen, cred);
__m128i cinvalpha = _mm_set1_epi16(invalpha);
#endif

for (int y = y0; y < y1; y++)
{
uint32_t* dline = dest + y * dwidth;

int x = x0;
#ifdef USE_SSE2
int ssex1 = x0 + (((x1 - x0) >> 1) << 1);
while (x < ssex1)
{
__m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x));
dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128());

// dest.rgba = color.rgba + dest.rgba * (1-color.a)
__m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(crgba, _mm_mullo_epi16(dpixel, cinvalpha)), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*)(dline + x), _mm_packus_epi16(result, _mm_setzero_si128()));
x += 2;
}
#endif

while (x < x1)
{
uint32_t dpixel = dline[x];
uint32_t dalpha = dpixel >> 24;
uint32_t dred = (dpixel >> 16) & 0xff;
uint32_t dgreen = (dpixel >> 8) & 0xff;
uint32_t dblue = dpixel & 0xff;

// dest.rgba = color.rgba + dest.rgba * (1-color.a)
uint32_t a = (calpha + dalpha * invalpha + 127) >> 8;
uint32_t r = (cred + dred * invalpha + 127) >> 8;
uint32_t g = (cgreen + dgreen * invalpha + 127) >> 8;
uint32_t b = (cblue + dblue * invalpha + 127) >> 8;
dline[x] = (a << 24) | (r << 16) | (g << 8) | b;
x++;
}
}
}
}

void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color)
{
if (width <= 0.0f || height <= 0.0f)
Expand Down Expand Up @@ -550,6 +661,9 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float
uint32_t cgreen = (int32_t)clamp(color.g * 256.0f, 0.0f, 256.0f);
uint32_t cblue = (int32_t)clamp(color.b * 256.0f, 0.0f, 256.0f);
uint32_t calpha = (int32_t)clamp(color.a * 256.0f, 0.0f, 256.0f);
#ifdef USE_SSE2
__m128i crgba = _mm_set_epi16(calpha, cblue, cgreen, cred, calpha, cblue, cgreen, cred);
#endif

float uscale = uvwidth / width;
float vscale = uvheight / height;
Expand All @@ -558,9 +672,39 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float
{
float vpix = v + vscale * (y + 0.5f - top);
const uint32_t* sline = src + ((int)vpix) * swidth;

uint32_t* dline = dest + y * dwidth;
for (int x = x0; x < x1; x++)

int x = x0;
#ifdef USE_SSE2
int ssex1 = x0 + (((x1 - x0) >> 1) << 1);
while (x < ssex1)
{
float upix0 = u + uscale * (x + 0.5f - left);
float upix1 = u + uscale * (x + 1 + 0.5f - left);
uint32_t spixel0 = sline[(int)upix0];
uint32_t spixel1 = sline[(int)upix1];
__m128i spixel = _mm_set_epi32(0, 0, spixel1, spixel0);
spixel = _mm_unpacklo_epi8(spixel, _mm_setzero_si128());

__m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x));
dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128());

// Pixel shade
spixel = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(spixel, crgba), _mm_set1_epi16(127)), 8);

// Rescale from [0,255] to [0,256]
__m128i sa = _mm_shufflehi_epi16(_mm_shufflelo_epi16(spixel, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
sa = _mm_add_epi16(sa, _mm_srli_epi16(sa, 7));
__m128i sinva = _mm_sub_epi16(_mm_set1_epi16(256), sa);

// dest.rgba = color.rgba * src.rgba * src.a + dest.rgba * (1-src.a)
__m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(spixel, sa), _mm_mullo_epi16(dpixel, sinva)), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*)(dline + x), _mm_packus_epi16(result, _mm_setzero_si128()));
x += 2;
}
#endif

while (x < x1)
{
float upix = u + uscale * (x + 0.5f - left);
uint32_t spixel = sline[(int)upix];
Expand Down Expand Up @@ -591,6 +735,7 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float
uint32_t g = (sgreen * sa + dgreen * sinva + 127) >> 8;
uint32_t b = (sblue * sa + dblue * sinva + 127) >> 8;
dline[x] = (a << 24) | (r << 16) | (g << 8) | b;
x++;
}
}
}
Expand Down Expand Up @@ -623,6 +768,9 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa
uint32_t cred = (int32_t)clamp(color.r * 255.0f, 0.0f, 255.0f);
uint32_t cgreen = (int32_t)clamp(color.g * 255.0f, 0.0f, 255.0f);
uint32_t cblue = (int32_t)clamp(color.b * 255.0f, 0.0f, 255.0f);
#ifdef USE_SSE2
__m128i crgba = _mm_set_epi16(0, cblue, cgreen, cred, 0, cblue, cgreen, cred);
#endif

float uscale = uvwidth / width;
float vscale = uvheight / height;
Expand All @@ -631,9 +779,34 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa
{
float vpix = v + vscale * (y + 0.5f - top);
const uint32_t* sline = src + ((int)vpix) * swidth;

uint32_t* dline = dest + y * dwidth;
for (int x = x0; x < x1; x++)

int x = x0;
#ifdef USE_SSE2
int ssex1 = x0 + (((x1 - x0) >> 1) << 1);
while (x < ssex1)
{
float upix0 = u + uscale * (x + 0.5f - left);
float upix1 = u + uscale * (x + 1 + 0.5f - left);
uint32_t spixel0 = sline[(int)upix0];
uint32_t spixel1 = sline[(int)upix1];
__m128i spixel = _mm_set_epi32(0, 0, spixel1, spixel0);
spixel = _mm_unpacklo_epi8(spixel, _mm_setzero_si128());

__m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x));
dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128());

// Rescale from [0,255] to [0,256]
spixel = _mm_add_epi16(spixel, _mm_srli_epi16(spixel, 7));

// dest.rgb = color.rgb * src.rgb + dest.rgb * (1-src.rgb)
__m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(crgba, spixel), _mm_mullo_epi16(dpixel, _mm_sub_epi16(_mm_set1_epi16(256), spixel))), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*)(dline + x), _mm_or_si128(_mm_packus_epi16(result, _mm_setzero_si128()), _mm_set1_epi32(0xff000000)));
x += 2;
}
#endif

while (x < x1)
{
float upix = u + uscale * (x + 0.5f - left);
uint32_t spixel = sline[(int)upix];
Expand All @@ -656,6 +829,7 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa
uint32_t g = (cgreen * sgreen + dgreen * (256 - sgreen) + 127) >> 8;
uint32_t b = (cblue * sblue + dblue * (256 - sblue) + 127) >> 8;
dline[x] = 0xff000000 | (r << 16) | (g << 8) | b;
x++;
}
}
}
Expand Down

0 comments on commit 987eb5e

Please sign in to comment.