diff --git a/src/core/canvas.cpp b/src/core/canvas.cpp index 8ed79ad..1a630b3 100644 --- a/src/core/canvas.cpp +++ b/src/core/canvas.cpp @@ -13,6 +13,11 @@ #include #include +#if defined(__SSE2__) || defined(_M_X64) +#include +#define USE_SSE2 +#endif + class CanvasTexture { public: @@ -212,6 +217,7 @@ class BitmapCanvas : public Canvas void drawLineUnclipped(const Point& p0, const Point& p1, const Colorf& color); + void fillTile(float x, float y, float width, float height, Colorf color); void drawTile(CanvasTexture* texture, float x, float y, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color); void drawGlyph(CanvasTexture* texture, float x, float y, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color); @@ -300,7 +306,7 @@ void BitmapCanvas::popClip() void BitmapCanvas::fillRect(const Rect& box, const Colorf& color) { - drawTile(whiteTexture.get(), (float)((origin.x + box.x) * uiscale), (float)((origin.y + box.y) * uiscale), (float)(box.width * uiscale), (float)(box.height * uiscale), 0.0, 0.0, 1.0, 1.0, color); + fillTile((float)((origin.x + box.x) * uiscale), (float)((origin.y + box.y) * uiscale), (float)(box.width * uiscale), (float)(box.height * uiscale), color); } void BitmapCanvas::drawImage(const std::shared_ptr& image, const Point& pos) @@ -489,11 +495,11 @@ void BitmapCanvas::drawLineUnclipped(const Point& p0, const Point& p1, const Col { if (p0.x == p1.x) { - drawTile(whiteTexture.get(), (float)((p0.x - 0.5) * uiscale), (float)(p0.y * uiscale), (float)uiscale, (float)((p1.y - p0.y) * uiscale), 0.0f, 0.0f, 1.0f, 1.0f, color); + fillTile((float)((p0.x - 0.5) * uiscale), (float)(p0.y * uiscale), (float)uiscale, (float)((p1.y - p0.y) * uiscale), color); } else if (p0.y == p1.y) { - drawTile(whiteTexture.get(), (float)(p0.x * uiscale), (float)((p0.y - 0.5) * uiscale), (float)((p1.x - p0.x) * uiscale), (float)uiscale, 0.0f, 0.0f, 1.0f, 1.0f, color); + fillTile((float)(p0.x * uiscale), (float)((p0.y - 0.5) * uiscale), (float)((p1.x - p0.x) * uiscale), (float)uiscale, color); } else { @@ -521,6 +527,111 @@ int BitmapCanvas::getClipMaxY() const return clipStack.empty() ? height : (int)std::min((clipStack.back().y + clipStack.back().height) * uiscale, (double)height); } +void BitmapCanvas::fillTile(float left, float top, float width, float height, Colorf color) +{ + if (width <= 0.0f || height <= 0.0f) + return; + + int dwidth = this->width; + int dheight = this->height; + uint32_t* dest = this->pixels.data(); + + int x0 = (int)left; + int x1 = (int)(left + width); + int y0 = (int)top; + int y1 = (int)(top + height); + + x0 = std::max(x0, getClipMinX()); + y0 = std::max(y0, getClipMinY()); + x1 = std::min(x1, getClipMaxX()); + y1 = std::min(y1, getClipMaxY()); + if (x1 <= x0 || y1 <= y0) + return; + + uint32_t cred = (int32_t)clamp(color.r * 255.0f, 0.0f, 255.0f); + uint32_t cgreen = (int32_t)clamp(color.g * 255.0f, 0.0f, 255.0f); + uint32_t cblue = (int32_t)clamp(color.b * 255.0f, 0.0f, 255.0f); + uint32_t calpha = (int32_t)clamp(color.a * 255.0f, 0.0f, 255.0f); + uint32_t invalpha = 256 - (calpha + (calpha >> 7)); + + if (invalpha == 0) // Solid fill + { + uint32_t c = (calpha << 24) | (cred << 16) | (cgreen << 8) | cblue; +#ifdef USE_SSE2 + __m128i crgba = _mm_set1_epi32(c); +#endif + + for (int y = y0; y < y1; y++) + { + uint32_t* dline = dest + y * dwidth; + + int x = x0; +#ifdef USE_SSE2 + int ssex1 = x0 + (((x1 - x0) >> 2) << 2); + while (x < ssex1) + { + _mm_storeu_si128((__m128i*)(dline + x), crgba); + x += 4; + } +#endif + + while (x < x1) + { + dline[x] = c; + x++; + } + } + } + else // Alpha blended fill + { + cred <<= 8; + cgreen <<= 8; + cblue <<= 8; + calpha <<= 8; +#ifdef USE_SSE2 + __m128i crgba = _mm_set_epi16(calpha, cblue, cgreen, cred, calpha, cblue, cgreen, cred); + __m128i cinvalpha = _mm_set1_epi16(invalpha); +#endif + + for (int y = y0; y < y1; y++) + { + uint32_t* dline = dest + y * dwidth; + + int x = x0; +#ifdef USE_SSE2 + int ssex1 = x0 + (((x1 - x0) >> 1) << 1); + while (x < ssex1) + { + __m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x)); + dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128()); + + // dest.rgba = color.rgba + dest.rgba * (1-color.a) + __m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(crgba, _mm_mullo_epi16(dpixel, cinvalpha)), _mm_set1_epi16(127)), 8); + _mm_storel_epi64((__m128i*)(dline + x), _mm_packus_epi16(result, _mm_setzero_si128())); + x += 2; + } +#endif + + while (x < x1) + { + uint32_t dpixel = dline[x]; + uint32_t dalpha = dpixel >> 24; + uint32_t dred = (dpixel >> 16) & 0xff; + uint32_t dgreen = (dpixel >> 8) & 0xff; + uint32_t dblue = dpixel & 0xff; + + // dest.rgba = color.rgba + dest.rgba * (1-color.a) + uint32_t a = (calpha + dalpha * invalpha + 127) >> 8; + uint32_t r = (cred + dred * invalpha + 127) >> 8; + uint32_t g = (cgreen + dgreen * invalpha + 127) >> 8; + uint32_t b = (cblue + dblue * invalpha + 127) >> 8; + dline[x] = (a << 24) | (r << 16) | (g << 8) | b; + x++; + } + } + } +} + void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float width, float height, float u, float v, float uvwidth, float uvheight, Colorf color) { if (width <= 0.0f || height <= 0.0f) @@ -550,6 +661,9 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float uint32_t cgreen = (int32_t)clamp(color.g * 256.0f, 0.0f, 256.0f); uint32_t cblue = (int32_t)clamp(color.b * 256.0f, 0.0f, 256.0f); uint32_t calpha = (int32_t)clamp(color.a * 256.0f, 0.0f, 256.0f); +#ifdef USE_SSE2 + __m128i crgba = _mm_set_epi16(calpha, cblue, cgreen, cred, calpha, cblue, cgreen, cred); +#endif float uscale = uvwidth / width; float vscale = uvheight / height; @@ -558,9 +672,39 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float { float vpix = v + vscale * (y + 0.5f - top); const uint32_t* sline = src + ((int)vpix) * swidth; - uint32_t* dline = dest + y * dwidth; - for (int x = x0; x < x1; x++) + + int x = x0; +#ifdef USE_SSE2 + int ssex1 = x0 + (((x1 - x0) >> 1) << 1); + while (x < ssex1) + { + float upix0 = u + uscale * (x + 0.5f - left); + float upix1 = u + uscale * (x + 1 + 0.5f - left); + uint32_t spixel0 = sline[(int)upix0]; + uint32_t spixel1 = sline[(int)upix1]; + __m128i spixel = _mm_set_epi32(0, 0, spixel1, spixel0); + spixel = _mm_unpacklo_epi8(spixel, _mm_setzero_si128()); + + __m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x)); + dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128()); + + // Pixel shade + spixel = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(spixel, crgba), _mm_set1_epi16(127)), 8); + + // Rescale from [0,255] to [0,256] + __m128i sa = _mm_shufflehi_epi16(_mm_shufflelo_epi16(spixel, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); + sa = _mm_add_epi16(sa, _mm_srli_epi16(sa, 7)); + __m128i sinva = _mm_sub_epi16(_mm_set1_epi16(256), sa); + + // dest.rgba = color.rgba * src.rgba * src.a + dest.rgba * (1-src.a) + __m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(spixel, sa), _mm_mullo_epi16(dpixel, sinva)), _mm_set1_epi16(127)), 8); + _mm_storel_epi64((__m128i*)(dline + x), _mm_packus_epi16(result, _mm_setzero_si128())); + x += 2; + } +#endif + + while (x < x1) { float upix = u + uscale * (x + 0.5f - left); uint32_t spixel = sline[(int)upix]; @@ -591,6 +735,7 @@ void BitmapCanvas::drawTile(CanvasTexture* texture, float left, float top, float uint32_t g = (sgreen * sa + dgreen * sinva + 127) >> 8; uint32_t b = (sblue * sa + dblue * sinva + 127) >> 8; dline[x] = (a << 24) | (r << 16) | (g << 8) | b; + x++; } } } @@ -623,6 +768,9 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa uint32_t cred = (int32_t)clamp(color.r * 255.0f, 0.0f, 255.0f); uint32_t cgreen = (int32_t)clamp(color.g * 255.0f, 0.0f, 255.0f); uint32_t cblue = (int32_t)clamp(color.b * 255.0f, 0.0f, 255.0f); +#ifdef USE_SSE2 + __m128i crgba = _mm_set_epi16(0, cblue, cgreen, cred, 0, cblue, cgreen, cred); +#endif float uscale = uvwidth / width; float vscale = uvheight / height; @@ -631,9 +779,34 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa { float vpix = v + vscale * (y + 0.5f - top); const uint32_t* sline = src + ((int)vpix) * swidth; - uint32_t* dline = dest + y * dwidth; - for (int x = x0; x < x1; x++) + + int x = x0; +#ifdef USE_SSE2 + int ssex1 = x0 + (((x1 - x0) >> 1) << 1); + while (x < ssex1) + { + float upix0 = u + uscale * (x + 0.5f - left); + float upix1 = u + uscale * (x + 1 + 0.5f - left); + uint32_t spixel0 = sline[(int)upix0]; + uint32_t spixel1 = sline[(int)upix1]; + __m128i spixel = _mm_set_epi32(0, 0, spixel1, spixel0); + spixel = _mm_unpacklo_epi8(spixel, _mm_setzero_si128()); + + __m128i dpixel = _mm_loadl_epi64((const __m128i*)(dline + x)); + dpixel = _mm_unpacklo_epi8(dpixel, _mm_setzero_si128()); + + // Rescale from [0,255] to [0,256] + spixel = _mm_add_epi16(spixel, _mm_srli_epi16(spixel, 7)); + + // dest.rgb = color.rgb * src.rgb + dest.rgb * (1-src.rgb) + __m128i result = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(crgba, spixel), _mm_mullo_epi16(dpixel, _mm_sub_epi16(_mm_set1_epi16(256), spixel))), _mm_set1_epi16(127)), 8); + _mm_storel_epi64((__m128i*)(dline + x), _mm_or_si128(_mm_packus_epi16(result, _mm_setzero_si128()), _mm_set1_epi32(0xff000000))); + x += 2; + } +#endif + + while (x < x1) { float upix = u + uscale * (x + 0.5f - left); uint32_t spixel = sline[(int)upix]; @@ -656,6 +829,7 @@ void BitmapCanvas::drawGlyph(CanvasTexture* texture, float left, float top, floa uint32_t g = (cgreen * sgreen + dgreen * (256 - sgreen) + 127) >> 8; uint32_t b = (cblue * sblue + dblue * (256 - sblue) + 127) >> 8; dline[x] = 0xff000000 | (r << 16) | (g << 8) | b; + x++; } } }