mirror of
https://github.com/libsdl-org/SDL.git
synced 2026-06-21 05:26:03 +00:00
[N-Gage] Remove optimisations except for native texture handling prior to some rework of the rendering back-end
[N-Gage] Set proper brush style to draw filled rects properly. [N-Gage] Add persistent buffers to avoid per-frame memory allocations (which are expensive) [N-Gage] Add support for SDL_TEXTURE_ACCESS_TARGET, fixes #13165 [N-Gage] Update README, add hint that the compiler does not support aggregate initializations for structs (knowing this, avoids a lot of headache during debugging) [N-Gage] Add basic fast-path optimisations for render operations. [N-Gage] Fix line drawing.
This commit is contained in:
parent
b181eb4ed0
commit
6e65c3fac4
7 changed files with 509 additions and 867 deletions
|
|
@ -23,65 +23,43 @@
|
|||
#include "SDL_render_ops.hpp"
|
||||
#include <3dtypes.h>
|
||||
|
||||
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT)
|
||||
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color)
|
||||
{
|
||||
TUint16 *src_pixels = static_cast<TUint16 *>(source);
|
||||
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
|
||||
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
// Pre-calculate LUT offsets to reduce addressing calculations.
|
||||
const TUint8 *lut_r = colorLUT;
|
||||
const TUint8 *lut_g = colorLUT + 256;
|
||||
const TUint8 *lut_b = colorLUT + 512;
|
||||
|
||||
// Process 4 pixels at a time.
|
||||
for (int y = 0; y < height; ++y) {
|
||||
const TInt rowOffset = y * pitchPixels;
|
||||
int x = 0;
|
||||
|
||||
// Process 4 pixels at once with optimized bit manipulation.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Load 4 pixels at once.
|
||||
TUint16 p0 = src_pixels[rowOffset + x];
|
||||
TUint16 p1 = src_pixels[rowOffset + x + 1];
|
||||
TUint16 p2 = src_pixels[rowOffset + x + 2];
|
||||
TUint16 p3 = src_pixels[rowOffset + x + 3];
|
||||
|
||||
// Pixel 0: Extract and modulate RGB4444 components.
|
||||
// RGB4444 format: RRRR GGGG BBBB xxxx
|
||||
TUint8 r0 = lut_r[(p0 >> 8) & 0xF0]; // Extract R (bits 12-15), shift to byte position
|
||||
TUint8 g0 = lut_g[(p0 >> 3) & 0xF8]; // Extract G (bits 6-9), scale to 8-bit
|
||||
TUint8 b0 = lut_b[(p0 << 3) & 0xF8]; // Extract B (bits 0-3), scale to 8-bit
|
||||
dst_pixels[rowOffset + x] = ((r0 & 0xF0) << 8) | ((g0 & 0xF0) << 3) | ((b0 & 0xF0) >> 1);
|
||||
|
||||
// Pixel 1
|
||||
TUint8 r1 = lut_r[(p1 >> 8) & 0xF0];
|
||||
TUint8 g1 = lut_g[(p1 >> 3) & 0xF8];
|
||||
TUint8 b1 = lut_b[(p1 << 3) & 0xF8];
|
||||
dst_pixels[rowOffset + x + 1] = ((r1 & 0xF0) << 8) | ((g1 & 0xF0) << 3) | ((b1 & 0xF0) >> 1);
|
||||
|
||||
// Pixel 2
|
||||
TUint8 r2 = lut_r[(p2 >> 8) & 0xF0];
|
||||
TUint8 g2 = lut_g[(p2 >> 3) & 0xF8];
|
||||
TUint8 b2 = lut_b[(p2 << 3) & 0xF8];
|
||||
dst_pixels[rowOffset + x + 2] = ((r2 & 0xF0) << 8) | ((g2 & 0xF0) << 3) | ((b2 & 0xF0) >> 1);
|
||||
|
||||
// Pixel 3
|
||||
TUint8 r3 = lut_r[(p3 >> 8) & 0xF0];
|
||||
TUint8 g3 = lut_g[(p3 >> 3) & 0xF8];
|
||||
TUint8 b3 = lut_b[(p3 << 3) & 0xF8];
|
||||
dst_pixels[rowOffset + x + 3] = ((r3 & 0xF0) << 8) | ((g3 & 0xF0) << 3) | ((b3 & 0xF0) >> 1);
|
||||
// Fast path: no color modulation (white color).
|
||||
if (color.r == 1.0f && color.g == 1.0f && color.b == 1.0f) {
|
||||
if (dest != source) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
TUint16 *src_row = src_pixels + (y * pitch / 2);
|
||||
TUint16 *dst_row = dst_pixels + (y * pitch / 2);
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_row[x] = src_row[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
TUint16 pixel = src_pixels[rowOffset + x];
|
||||
TUint8 r = lut_r[(pixel >> 8) & 0xF0];
|
||||
TUint8 g = lut_g[(pixel >> 3) & 0xF8];
|
||||
TUint8 b = lut_b[(pixel << 3) & 0xF8];
|
||||
dst_pixels[rowOffset + x] = ((r & 0xF0) << 8) | ((g & 0xF0) << 3) | ((b & 0xF0) >> 1);
|
||||
TFixed rf = Real2Fix(color.r);
|
||||
TFixed gf = Real2Fix(color.g);
|
||||
TFixed bf = Real2Fix(color.b);
|
||||
|
||||
int pitch_offset = pitch / 2;
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int row_offset = y * pitch_offset;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int idx = row_offset + x;
|
||||
TUint16 pixel = src_pixels[idx];
|
||||
TUint8 r = (pixel & 0xF800) >> 8;
|
||||
TUint8 g = (pixel & 0x07E0) >> 3;
|
||||
TUint8 b = (pixel & 0x001F) << 3;
|
||||
r = FixMul(r, rf);
|
||||
g = FixMul(g, gf);
|
||||
b = FixMul(b, bf);
|
||||
dst_pixels[idx] = (r << 8) | (g << 3) | (b >> 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -91,62 +69,56 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F
|
|||
TUint16 *src_pixels = static_cast<TUint16 *>(source);
|
||||
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
|
||||
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
// Pre-calculate flip flags to avoid repeated bitwise operations.
|
||||
const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0;
|
||||
const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0;
|
||||
|
||||
// Fast path: No flip; just copy entire buffer.
|
||||
if (!flipHorizontal && !flipVertical) {
|
||||
Mem::Copy(dest, source, pitch * height);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path: Vertical-only flip; copy rows in reverse order.
|
||||
if (flipVertical && !flipHorizontal) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
const int src_y = height - 1 - y;
|
||||
Mem::Copy(&dst_pixels[y * pitchPixels], &src_pixels[src_y * pitchPixels], pitch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow path: Horizontal or both flips; need pixel-level operations.
|
||||
// Pre-calculate width/height bounds for horizontal/vertical flipping.
|
||||
const int width_m1 = width - 1;
|
||||
const int height_m1 = height - 1;
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
// Calculate destination row offset once per row.
|
||||
const TInt dstRowOffset = y * pitchPixels;
|
||||
|
||||
// Calculate source Y coordinate once per row.
|
||||
const int src_y = flipVertical ? (height_m1 - y) : y;
|
||||
const TInt srcRowOffset = src_y * pitchPixels;
|
||||
|
||||
int x = 0;
|
||||
|
||||
// Process 4 pixels at once.
|
||||
for (; x < width - 3; x += 4) {
|
||||
if (flipHorizontal) {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + (width_m1 - x)];
|
||||
dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + (width_m1 - x - 1)];
|
||||
dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + (width_m1 - x - 2)];
|
||||
dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + (width_m1 - x - 3)];
|
||||
} else {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + x];
|
||||
dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + x + 1];
|
||||
dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + x + 2];
|
||||
dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + x + 3];
|
||||
// Fast path: no flip.
|
||||
if (flip == SDL_FLIP_NONE) {
|
||||
if (dest != source) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
TUint16 *src_row = src_pixels + (y * pitch / 2);
|
||||
TUint16 *dst_row = dst_pixels + (y * pitch / 2);
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_row[x] = src_row[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
const int src_x = flipHorizontal ? (width_m1 - x) : x;
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + src_x];
|
||||
int pitch_offset = pitch / 2;
|
||||
|
||||
// Fast path: horizontal flip only.
|
||||
if (flip == SDL_FLIP_HORIZONTAL) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
int src_row_offset = y * pitch_offset;
|
||||
int width_minus_1 = width - 1;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + (width_minus_1 - x)];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path: vertical flip only.
|
||||
if (flip == SDL_FLIP_VERTICAL) {
|
||||
int height_minus_1 = height - 1;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
int src_row_offset = (height_minus_1 - y) * pitch_offset;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + x];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Both horizontal and vertical flip
|
||||
int width_minus_1 = width - 1;
|
||||
int height_minus_1 = height - 1;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
int src_row_offset = (height_minus_1 - y) * pitch_offset;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + (width_minus_1 - x)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -156,152 +128,151 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T
|
|||
TUint16 *src_pixels = static_cast<TUint16 *>(source);
|
||||
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
|
||||
|
||||
TFixed cos_angle = 0;
|
||||
TFixed sin_angle = 0;
|
||||
|
||||
if (angle != 0) {
|
||||
FixSinCos(angle, sin_angle, cos_angle);
|
||||
}
|
||||
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
// Pre-check if rotation keeps all pixels within bounds to skip per-pixel checks.
|
||||
// Calculate the four corners of the image after rotation around center.
|
||||
bool allInBounds = true;
|
||||
if (angle != 0) {
|
||||
// Check corners: (0,0), (width-1,0), (0,height-1), (width-1,height-1)
|
||||
TFixed corners_x[4] = { -center_x, Int2Fix(width - 1) - center_x, -center_x, Int2Fix(width - 1) - center_x };
|
||||
TFixed corners_y[4] = { -center_y, -center_y, Int2Fix(height - 1) - center_y, Int2Fix(height - 1) - center_y };
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
TFixed rot_x = FixMul(corners_x[i], cos_angle) - FixMul(corners_y[i], sin_angle) + center_x;
|
||||
TFixed rot_y = FixMul(corners_x[i], sin_angle) + FixMul(corners_y[i], cos_angle) + center_y;
|
||||
int final_x = Fix2Int(rot_x);
|
||||
int final_y = Fix2Int(rot_y);
|
||||
|
||||
if (final_x < 0 || final_x >= width || final_y < 0 || final_y >= height) {
|
||||
allInBounds = false;
|
||||
break;
|
||||
// Fast path: no rotation.
|
||||
if (angle == 0) {
|
||||
if (dest != source) {
|
||||
int pitch_offset = pitch / 2;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
TUint16 *src_row = src_pixels + (y * pitch_offset);
|
||||
TUint16 *dst_row = dst_pixels + (y * pitch_offset);
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_row[x] = src_row[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Incremental DDA: Calculate per-pixel increments.
|
||||
// As we move right (x+1), the rotated position changes by (cos, -sin).
|
||||
const TFixed dx_cos = cos_angle;
|
||||
const TFixed dx_sin = -sin_angle;
|
||||
// Fast paths for 90-degree rotations
|
||||
TFixed angle_90 = Int2Fix(90);
|
||||
TFixed angle_180 = Int2Fix(180);
|
||||
TFixed angle_270 = Int2Fix(270);
|
||||
TFixed angle_360 = Int2Fix(360);
|
||||
|
||||
// Normalize angle to 0-360 range
|
||||
TFixed normalized_angle = angle;
|
||||
while (normalized_angle < 0) {
|
||||
normalized_angle += angle_360;
|
||||
}
|
||||
while (normalized_angle >= angle_360) {
|
||||
normalized_angle -= angle_360;
|
||||
}
|
||||
|
||||
int pitch_offset = pitch / 2;
|
||||
|
||||
// Fast path: 90-degree rotation (clockwise).
|
||||
if (normalized_angle == angle_90) {
|
||||
TFixed center_x_int = Fix2Int(center_x);
|
||||
TFixed center_y_int = Fix2Int(center_y);
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// Translate to origin.
|
||||
int tx = x - center_x_int;
|
||||
int ty = y - center_y_int;
|
||||
// Rotate 90 degrees clockwise: (x, y) -> (y, -x).
|
||||
int rx = ty;
|
||||
int ry = -tx;
|
||||
// Translate back.
|
||||
int src_x = rx + center_x_int;
|
||||
int src_y = ry + center_y_int;
|
||||
if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
|
||||
dst_pixels[y * pitch_offset + x] = src_pixels[src_y * pitch_offset + src_x];
|
||||
} else {
|
||||
dst_pixels[y * pitch_offset + x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path: 180-degree rotation.
|
||||
if (normalized_angle == angle_180) {
|
||||
TFixed center_x_int = Fix2Int(center_x);
|
||||
TFixed center_y_int = Fix2Int(center_y);
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// Translate to origin
|
||||
int tx = x - center_x_int;
|
||||
int ty = y - center_y_int;
|
||||
// Rotate 180 degrees: (x, y) -> (-x, -y)
|
||||
int rx = -tx;
|
||||
int ry = -ty;
|
||||
// Translate back
|
||||
int src_x = rx + center_x_int;
|
||||
int src_y = ry + center_y_int;
|
||||
if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[src_y * pitch_offset + src_x];
|
||||
} else {
|
||||
dst_pixels[dst_row_offset + x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path: 270-degree rotation (clockwise).
|
||||
if (normalized_angle == angle_270) {
|
||||
TFixed center_x_int = Fix2Int(center_x);
|
||||
TFixed center_y_int = Fix2Int(center_y);
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// Translate to origin.
|
||||
int tx = x - center_x_int;
|
||||
int ty = y - center_y_int;
|
||||
// Rotate 270 degrees clockwise (or 90 counter-clockwise): (x, y) -> (-y, x).
|
||||
int rx = -ty;
|
||||
int ry = tx;
|
||||
// Translate back.
|
||||
int src_x = rx + center_x_int;
|
||||
int src_y = ry + center_y_int;
|
||||
if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
|
||||
dst_pixels[y * pitch_offset + x] = src_pixels[src_y * pitch_offset + src_x];
|
||||
} else {
|
||||
dst_pixels[y * pitch_offset + x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
TFixed cos_angle = 0;
|
||||
TFixed sin_angle = 0;
|
||||
FixSinCos(angle, sin_angle, cos_angle);
|
||||
|
||||
// Pre-calculate the translation of center to origin.
|
||||
TFixed neg_center_x = -center_x;
|
||||
TFixed neg_center_y = -center_y;
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
// Calculate destination row offset once per row.
|
||||
const TInt dstRowOffset = y * pitchPixels;
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
TFixed y_fixed = Int2Fix(y) + neg_center_y;
|
||||
|
||||
// Calculate starting position for this row.
|
||||
// For y, rotation transforms: x' = x*cos - y*sin, y' = x*sin + y*cos
|
||||
// At x=0: x' = -y*sin, y' = y*cos (relative to center)
|
||||
const TFixed translated_y = Int2Fix(y) - center_y;
|
||||
const TFixed row_start_x = center_x - FixMul(translated_y, sin_angle);
|
||||
const TFixed row_start_y = center_y + FixMul(translated_y, cos_angle);
|
||||
// Pre-calculate these values for the entire row.
|
||||
TFixed cos_mul_ty = FixMul(y_fixed, cos_angle);
|
||||
TFixed sin_mul_ty = FixMul(y_fixed, sin_angle);
|
||||
|
||||
// Start at x=0 position.
|
||||
TFixed src_x = row_start_x;
|
||||
TFixed src_y = row_start_y;
|
||||
// Starting position for the row (x=0).
|
||||
// rotated_x = cos(angle) * (0 - center_x) + sin(angle) * (y - center_y) + center_x
|
||||
// rotated_y = cos(angle) * (y - center_y) - sin(angle) * (0 - center_x) + center_y
|
||||
TFixed rotated_x = sin_mul_ty + center_x + FixMul(neg_center_x, cos_angle);
|
||||
TFixed rotated_y = cos_mul_ty + center_y - FixMul(neg_center_x, sin_angle);
|
||||
|
||||
int x = 0;
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// Convert to integer coordinates.
|
||||
int final_x = Fix2Int(rotated_x);
|
||||
int final_y = Fix2Int(rotated_y);
|
||||
|
||||
if (allInBounds) {
|
||||
// Fast path: No bounds checking needed.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Pixel 0
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
int final_y0 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 1
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
int final_y1 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 2
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
int final_y2 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 3
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
int final_y3 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Write all 4 pixels without bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y0 * pitchPixels + final_x0];
|
||||
dst_pixels[dstRowOffset + x + 1] = src_pixels[final_y1 * pitchPixels + final_x1];
|
||||
dst_pixels[dstRowOffset + x + 2] = src_pixels[final_y2 * pitchPixels + final_x2];
|
||||
dst_pixels[dstRowOffset + x + 3] = src_pixels[final_y3 * pitchPixels + final_x3];
|
||||
// Check bounds and copy pixel.
|
||||
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[final_y * pitch_offset + final_x];
|
||||
} else {
|
||||
dst_pixels[dst_row_offset + x] = 0;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
int final_x = Fix2Int(src_x);
|
||||
int final_y = Fix2Int(src_y);
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
}
|
||||
} else {
|
||||
// Slow path: Bounds checking required.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Pixel 0
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
int final_y0 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 1
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
int final_y1 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 2
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
int final_y2 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 3
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
int final_y3 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Write all 4 pixels with bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0;
|
||||
dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0;
|
||||
dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0;
|
||||
dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
// Convert to integer coordinates.
|
||||
int final_x = Fix2Int(src_x);
|
||||
int final_y = Fix2Int(src_y);
|
||||
|
||||
// Check bounds.
|
||||
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
|
||||
} else {
|
||||
dst_pixels[dstRowOffset + x] = 0;
|
||||
}
|
||||
|
||||
// Incremental step: move to next pixel (just additions, no multiplications!).
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
}
|
||||
// Increment to next pixel (add rotation matrix column).
|
||||
rotated_x += cos_angle;
|
||||
rotated_y -= sin_angle;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -311,72 +282,46 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix
|
|||
TUint16 *src_pixels = static_cast<TUint16 *>(source);
|
||||
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
|
||||
|
||||
// Fast path: Identity scale; just copy entire buffer.
|
||||
const TFixed identity = Int2Fix(1);
|
||||
if (scale_x == identity && scale_y == identity) {
|
||||
Mem::Copy(dest, source, pitch * height);
|
||||
TFixed one_fixed = Int2Fix(1);
|
||||
|
||||
// Fast path: no scaling (1.0x scale).
|
||||
if (scale_x == one_fixed && scale_y == one_fixed) {
|
||||
if (dest != source) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
TUint16 *src_row = src_pixels + (y * pitch / 2);
|
||||
TUint16 *dst_row = dst_pixels + (y * pitch / 2);
|
||||
for (int x = 0; x < width; ++x) {
|
||||
dst_row[x] = src_row[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
// Pre-calculate inverse scale factors to use FixMul instead of FixDiv.
|
||||
// This is MUCH faster on N-Gage hardware (no division per pixel!).
|
||||
TFixed inv_scale_x = FixDiv(Int2Fix(1), scale_x);
|
||||
TFixed inv_scale_y = FixDiv(Int2Fix(1), scale_y);
|
||||
|
||||
// Pre-calculate center offset to reduce operations per pixel.
|
||||
TFixed center_x_fixed = center_x;
|
||||
TFixed center_y_fixed = center_y;
|
||||
int pitch_offset = pitch / 2;
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
// Calculate destination row offset once per row.
|
||||
TInt dstRowOffset = y * pitchPixels;
|
||||
int dst_row_offset = y * pitch_offset;
|
||||
TFixed y_fixed = Int2Fix(y);
|
||||
TFixed translated_y = y_fixed - center_y;
|
||||
TFixed scaled_y = FixDiv(translated_y, scale_y);
|
||||
|
||||
// Use inverse scale factor (multiply instead of divide).
|
||||
TFixed translated_y = Int2Fix(y) - center_y_fixed;
|
||||
TFixed scaled_y = FixMul(translated_y, inv_scale_y);
|
||||
int final_y = Fix2Int(scaled_y + center_y_fixed);
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// Translate point to origin.
|
||||
TFixed translated_x = Int2Fix(x) - center_x;
|
||||
|
||||
// Check if this row is within bounds.
|
||||
bool rowInBounds = (final_y >= 0 && final_y < height);
|
||||
TInt srcRowOffset = final_y * pitchPixels;
|
||||
// Scale point.
|
||||
TFixed scaled_x = FixDiv(translated_x, scale_x);
|
||||
|
||||
// Incremental DDA for X: pre-calculate starting position and increment.
|
||||
TFixed src_x_start = FixMul(-center_x_fixed, inv_scale_x) + center_x_fixed;
|
||||
TFixed src_x = src_x_start;
|
||||
// Translate point back.
|
||||
int final_x = Fix2Int(scaled_x + center_x);
|
||||
int final_y = Fix2Int(scaled_y + center_y);
|
||||
|
||||
int x = 0;
|
||||
|
||||
// Process 4 pixels at once.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Process 4 pixels using incremental approach.
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
src_x += inv_scale_x;
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
src_x += inv_scale_x;
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
src_x += inv_scale_x;
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
src_x += inv_scale_x;
|
||||
|
||||
// Write all 4 pixels with bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = (rowInBounds && final_x0 >= 0 && final_x0 < width) ? src_pixels[srcRowOffset + final_x0] : 0;
|
||||
dst_pixels[dstRowOffset + x + 1] = (rowInBounds && final_x1 >= 0 && final_x1 < width) ? src_pixels[srcRowOffset + final_x1] : 0;
|
||||
dst_pixels[dstRowOffset + x + 2] = (rowInBounds && final_x2 >= 0 && final_x2 < width) ? src_pixels[srcRowOffset + final_x2] : 0;
|
||||
dst_pixels[dstRowOffset + x + 3] = (rowInBounds && final_x3 >= 0 && final_x3 < width) ? src_pixels[srcRowOffset + final_x3] : 0;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
int final_x = Fix2Int(src_x);
|
||||
src_x += inv_scale_x;
|
||||
|
||||
if (rowInBounds && final_x >= 0 && final_x < width) {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + final_x];
|
||||
// Check bounds.
|
||||
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
|
||||
dst_pixels[dst_row_offset + x] = src_pixels[final_y * pitch_offset + final_x];
|
||||
} else {
|
||||
dst_pixels[dstRowOffset + x] = 0;
|
||||
dst_pixels[dst_row_offset + x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue