diff options
Diffstat (limited to 'src/3rdparty/freetype/src/smooth')
-rw-r--r-- | src/3rdparty/freetype/src/smooth/ftgrays.c | 829 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/ftgrays.h | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/ftsmerrs.h | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/ftsmooth.c | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/ftsmooth.h | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/module.mk | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/rules.mk | 2 | ||||
-rw-r--r-- | src/3rdparty/freetype/src/smooth/smooth.c | 2 |
8 files changed, 563 insertions, 280 deletions
diff --git a/src/3rdparty/freetype/src/smooth/ftgrays.c b/src/3rdparty/freetype/src/smooth/ftgrays.c index 681900fd40..622035aa79 100644 --- a/src/3rdparty/freetype/src/smooth/ftgrays.c +++ b/src/3rdparty/freetype/src/smooth/ftgrays.c @@ -4,7 +4,7 @@ * * A new `perfect' anti-aliasing renderer (body). * - * Copyright (C) 2000-2020 by + * Copyright (C) 2000-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -149,14 +149,10 @@ #define FT_INT_MAX INT_MAX #define FT_ULONG_MAX ULONG_MAX -#define ADD_LONG( a, b ) \ - (long)( (unsigned long)(a) + (unsigned long)(b) ) -#define SUB_LONG( a, b ) \ - (long)( (unsigned long)(a) - (unsigned long)(b) ) -#define MUL_LONG( a, b ) \ - (long)( (unsigned long)(a) * (unsigned long)(b) ) -#define NEG_LONG( a ) \ - (long)( -(unsigned long)(a) ) +#define ADD_INT( a, b ) \ + (int)( (unsigned int)(a) + (unsigned int)(b) ) + +#define FT_STATIC_BYTE_CAST( type, var ) (type)(unsigned char)(var) #define ft_memset memset @@ -168,10 +164,11 @@ typedef ptrdiff_t FT_PtrDist; -#define ErrRaster_Invalid_Mode -2 -#define ErrRaster_Invalid_Outline -1 -#define ErrRaster_Invalid_Argument -3 -#define ErrRaster_Memory_Overflow -4 +#define Smooth_Err_Ok 0 +#define Smooth_Err_Invalid_Outline -1 +#define Smooth_Err_Cannot_Render_Glyph -2 +#define Smooth_Err_Invalid_Argument -3 +#define Smooth_Err_Raster_Overflow -4 #define FT_BEGIN_HEADER #define FT_END_HEADER @@ -229,23 +226,26 @@ typedef ptrdiff_t FT_PtrDist; #define FT_ERROR( varformat ) FT_Message varformat #endif -#define FT_THROW( e ) \ - ( FT_Throw( FT_ERR_CAT( ErrRaster_, e ), \ - __LINE__, \ - __FILE__ ) | \ - FT_ERR_CAT( ErrRaster_, e ) ) +#define FT_THROW( e ) \ + ( FT_Throw( FT_ERR_CAT( Smooth_Err_, e ), \ + __LINE__, \ + __FILE__ ) | \ + FT_ERR_CAT( Smooth_Err_, e ) ) #else /* !FT_DEBUG_LEVEL_TRACE */ #define FT_TRACE5( x ) do { } while ( 0 ) /* nothing */ #define FT_TRACE7( x ) do { } while ( 0 ) /* nothing */ #define FT_ERROR( x ) do { } while ( 0 ) /* nothing */ -#define FT_THROW( e ) FT_ERR_CAT( ErrRaster_, e ) - +#define FT_THROW( e ) FT_ERR_CAT( Smooth_Err_, e ) #endif /* !FT_DEBUG_LEVEL_TRACE */ +#define FT_Trace_Enable() do { } while ( 0 ) /* nothing */ +#define FT_Trace_Disable() do { } while ( 0 ) /* nothing */ + + #define FT_DEFINE_OUTLINE_FUNCS( class_, \ move_to_, line_to_, \ conic_to_, cubic_to_, \ @@ -278,6 +278,8 @@ typedef ptrdiff_t FT_PtrDist; #else /* !STANDALONE_ */ +#include <ft2build.h> +#include FT_CONFIG_CONFIG_H #include "ftgrays.h" #include <freetype/internal/ftobjs.h> #include <freetype/internal/ftdebug.h> @@ -286,10 +288,6 @@ typedef ptrdiff_t FT_PtrDist; #include "ftsmerrs.h" -#define Smooth_Err_Invalid_Mode Smooth_Err_Cannot_Render_Glyph -#define Smooth_Err_Memory_Overflow Smooth_Err_Out_Of_Memory -#define ErrRaster_Memory_Overflow Smooth_Err_Out_Of_Memory - #endif /* !STANDALONE_ */ @@ -335,7 +333,9 @@ typedef ptrdiff_t FT_PtrDist; #define PIXEL_BITS 8 #define ONE_PIXEL ( 1 << PIXEL_BITS ) +#undef TRUNC #define TRUNC( x ) (TCoord)( (x) >> PIXEL_BITS ) +#undef FRACT #define FRACT( x ) (TCoord)( (x) & ( ONE_PIXEL - 1 ) ) #if PIXEL_BITS >= 6 @@ -362,7 +362,7 @@ typedef ptrdiff_t FT_PtrDist; } \ FT_END_STMNT -#ifdef __arm__ +#if defined( __GNUC__ ) && __GNUC__ < 7 && defined( __arm__ ) /* Work around a bug specific to GCC which make the compiler fail to */ /* optimize a division and modulo operation on the same parameters */ /* into a single call to `__aeabi_idivmod'. See */ @@ -382,14 +382,58 @@ typedef ptrdiff_t FT_PtrDist; #endif /* __arm__ */ - /* These macros speed up repetitive divisions by replacing them */ - /* with multiplications and right shifts. */ -#define FT_UDIVPREP( c, b ) \ - long b ## _r = c ? (long)( FT_ULONG_MAX >> PIXEL_BITS ) / ( b ) \ - : 0 -#define FT_UDIV( a, b ) \ - (TCoord)( ( (unsigned long)( a ) * (unsigned long)( b ## _r ) ) >> \ - ( sizeof( long ) * FT_CHAR_BIT - PIXEL_BITS ) ) + /* Calculating coverages for a slanted line requires a division each */ + /* time the line crosses from cell to cell. These macros speed up */ + /* the repetitive divisions by replacing them with multiplications */ + /* and right shifts so that at most two divisions are performed for */ + /* each slanted line. Nevertheless, these divisions are noticeable */ + /* in the overall performance because flattened curves produce a */ + /* very large number of slanted lines. */ + /* */ + /* The division results here are always within ONE_PIXEL. Therefore */ + /* the shift magnitude should be at least PIXEL_BITS wider than the */ + /* divisors to provide sufficient accuracy of the multiply-shift. */ + /* It should not exceed (64 - PIXEL_BITS) to prevent overflowing and */ + /* leave enough room for 64-bit unsigned multiplication however. */ +#define FT_UDIVPREP( c, b ) \ + FT_Int64 b ## _r = c ? (FT_Int64)0xFFFFFFFF / ( b ) : 0 +#define FT_UDIV( a, b ) \ + (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> 32 ) + + + /* Scale area and apply fill rule to calculate the coverage byte. */ + /* The top fill bit is used for the non-zero rule. The eighth */ + /* fill bit is used for the even-odd rule. The higher coverage */ + /* bytes are either clamped for the non-zero-rule or discarded */ + /* later for the even-odd rule. */ +#define FT_FILL_RULE( coverage, area, fill ) \ + FT_BEGIN_STMNT \ + coverage = (int)( area >> ( PIXEL_BITS * 2 + 1 - 8 ) ); \ + if ( coverage & fill ) \ + coverage = ~coverage; \ + if ( coverage > 255 && fill & INT_MIN ) \ + coverage = 255; \ + FT_END_STMNT + + + /* It is faster to write small spans byte-by-byte than calling */ + /* `memset'. This is mainly due to the cost of the function call. */ +#define FT_GRAY_SET( d, s, count ) \ + FT_BEGIN_STMNT \ + unsigned char* q = d; \ + switch ( count ) \ + { \ + case 7: *q++ = (unsigned char)s; /* fall through */ \ + case 6: *q++ = (unsigned char)s; /* fall through */ \ + case 5: *q++ = (unsigned char)s; /* fall through */ \ + case 4: *q++ = (unsigned char)s; /* fall through */ \ + case 3: *q++ = (unsigned char)s; /* fall through */ \ + case 2: *q++ = (unsigned char)s; /* fall through */ \ + case 1: *q = (unsigned char)s; /* fall through */ \ + case 0: break; \ + default: FT_MEM_SET( d, s, count ); \ + } \ + FT_END_STMNT /************************************************************************** @@ -432,7 +476,7 @@ typedef ptrdiff_t FT_PtrDist; #endif /* FT_Span buffer size for direct rendering only */ -#define FT_MAX_GRAY_SPANS 10 +#define FT_MAX_GRAY_SPANS 16 #if defined( _MSC_VER ) /* Visual C++ (and Intel C++) */ @@ -447,28 +491,24 @@ typedef ptrdiff_t FT_PtrDist; { ft_jmp_buf jump_buffer; - TCoord ex, ey; - TCoord min_ex, max_ex; + TCoord min_ex, max_ex; /* min and max integer pixel coordinates */ TCoord min_ey, max_ey; + TCoord count_ey; /* same as (max_ey - min_ey) */ - TArea area; - TCoord cover; - int invalid; + PCell cell; /* current cell */ + PCell cell_free; /* call allocation next free slot */ + PCell cell_null; /* last cell, used as dumpster and limit */ - PCell* ycells; - PCell cells; - FT_PtrDist max_cells; - FT_PtrDist num_cells; + PCell* ycells; /* array of cell linked-lists; one per */ + /* vertical coordinate in the current band */ - TPos x, y; + TPos x, y; /* last point position */ - FT_Outline outline; - TPixmap target; + FT_Outline outline; /* input outline */ + TPixmap target; /* target pixmap */ FT_Raster_Span_Func render_span; void* render_span_data; - FT_Span spans[FT_MAX_GRAY_SPANS]; - int num_spans; } gray_TWorker, *gray_PWorker; @@ -476,17 +516,25 @@ typedef ptrdiff_t FT_PtrDist; #pragma warning( pop ) #endif - #ifndef FT_STATIC_RASTER #define ras (*worker) #else static gray_TWorker ras; #endif + /* The |x| value of the null cell. Must be the largest possible */ + /* integer value stored in a `TCell.x` field. */ +#define CELL_MAX_X_VALUE INT_MAX + + +#define FT_INTEGRATE( ras, a, b ) \ + ras.cell->cover = ADD_INT( ras.cell->cover, a ), \ + ras.cell->area = ADD_INT( ras.cell->area, (a) * (TArea)(b) ) + typedef struct gray_TRaster_ { - void* memory; + void* memory; } gray_TRaster, *gray_PRaster; @@ -508,7 +556,7 @@ typedef ptrdiff_t FT_PtrDist; printf( "%3d:", y ); - for ( ; cell != NULL; cell = cell->next ) + for ( ; cell != ras.cell_null; cell = cell->next ) printf( " (%3d, c:%4d, a:%6d)", cell->x, cell->cover, cell->area ); printf( "\n" ); @@ -520,81 +568,67 @@ typedef ptrdiff_t FT_PtrDist; /************************************************************************** * - * Record the current cell in the linked list. + * Set the current cell to a new position. */ static void - gray_record_cell( RAS_ARG ) + gray_set_cell( RAS_ARG_ TCoord ex, + TCoord ey ) { - PCell *pcell, cell; - TCoord x = ras.ex; - + /* Move the cell pointer to a new position in the linked list. We use */ + /* a dumpster null cell for everything outside of the clipping region */ + /* during the render phase. This means that: */ + /* */ + /* . the new vertical position must be within min_ey..max_ey-1. */ + /* . the new horizontal position must be strictly less than max_ex */ + /* */ + /* Note that if a cell is to the left of the clipping region, it is */ + /* actually set to the (min_ex-1) horizontal position. */ - pcell = &ras.ycells[ras.ey - ras.min_ey]; - while ( ( cell = *pcell ) ) - { - if ( cell->x > x ) - break; + TCoord ey_index = ey - ras.min_ey; - if ( cell->x == x ) - goto Found; - pcell = &cell->next; - } + if ( ey_index < 0 || ey_index >= ras.count_ey || ex >= ras.max_ex ) + ras.cell = ras.cell_null; + else + { + PCell* pcell = ras.ycells + ey_index; + PCell cell; - if ( ras.num_cells >= ras.max_cells ) - ft_longjmp( ras.jump_buffer, 1 ); - /* insert new cell */ - cell = ras.cells + ras.num_cells++; - cell->x = x; - cell->area = ras.area; - cell->cover = ras.cover; + ex = FT_MAX( ex, ras.min_ex - 1 ); - cell->next = *pcell; - *pcell = cell; + while ( 1 ) + { + cell = *pcell; - return; + if ( cell->x > ex ) + break; - Found: - /* update old cell */ - cell->area += ras.area; - cell->cover += ras.cover; - } + if ( cell->x == ex ) + goto Found; + pcell = &cell->next; + } - /************************************************************************** - * - * Set the current cell to a new position. - */ - static void - gray_set_cell( RAS_ARG_ TCoord ex, - TCoord ey ) - { - /* Move the cell pointer to a new position. We set the `invalid' */ - /* flag to indicate that the cell isn't part of those we're interested */ - /* in during the render phase. This means that: */ - /* */ - /* . the new vertical position must be within min_ey..max_ey-1. */ - /* . the new horizontal position must be strictly less than max_ex */ - /* */ - /* Note that if a cell is to the left of the clipping region, it is */ - /* actually set to the (min_ex-1) horizontal position. */ + /* insert new cell */ + cell = ras.cell_free++; + if ( cell >= ras.cell_null ) + ft_longjmp( ras.jump_buffer, 1 ); - /* record the current one if it is valid and substantial */ - if ( !ras.invalid && ( ras.area || ras.cover ) ) - gray_record_cell( RAS_VAR ); + cell->x = ex; + cell->area = 0; + cell->cover = 0; - ras.area = 0; - ras.cover = 0; - ras.ex = FT_MAX( ex, ras.min_ex - 1 ); - ras.ey = ey; + cell->next = *pcell; + *pcell = cell; - ras.invalid = ( ey >= ras.max_ey || ey < ras.min_ey || - ex >= ras.max_ex ); + Found: + ras.cell = cell; + } } -#ifndef FT_LONG64 +#ifndef FT_INT64 /************************************************************************** * @@ -622,8 +656,8 @@ typedef ptrdiff_t FT_PtrDist; return; } - fx1 = FRACT( x1 ); - fx2 = FRACT( x2 ); + fx1 = FRACT( x1 ); + fx2 = FRACT( x2 ); /* everything is located in a single cell. That is easy! */ /* */ @@ -655,10 +689,9 @@ typedef ptrdiff_t FT_PtrDist; /* XXX: y-delta and x-delta below should be related. */ FT_DIV_MOD( TCoord, p, dx, delta, mod ); - ras.area += (TArea)( ( fx1 + first ) * delta ); - ras.cover += delta; - y1 += delta; - ex1 += incr; + FT_INTEGRATE( ras, delta, fx1 + first ); + y1 += delta; + ex1 += incr; gray_set_cell( RAS_VAR_ ex1, ey ); if ( ex1 != ex2 ) @@ -679,10 +712,9 @@ typedef ptrdiff_t FT_PtrDist; delta++; } - ras.area += (TArea)( ONE_PIXEL * delta ); - ras.cover += delta; - y1 += delta; - ex1 += incr; + FT_INTEGRATE( ras, delta, ONE_PIXEL ); + y1 += delta; + ex1 += incr; gray_set_cell( RAS_VAR_ ex1, ey ); } while ( ex1 != ex2 ); } @@ -690,10 +722,7 @@ typedef ptrdiff_t FT_PtrDist; fx1 = ONE_PIXEL - first; End: - dy = y2 - y1; - - ras.area += (TArea)( ( fx1 + fx2 ) * dy ); - ras.cover += dy; + FT_INTEGRATE( ras, y2 - y1, fx1 + fx2 ); } @@ -736,7 +765,6 @@ typedef ptrdiff_t FT_PtrDist; { TCoord ex = TRUNC( ras.x ); TCoord two_fx = FRACT( ras.x ) << 1; - TArea area; if ( dy > 0) @@ -750,27 +778,23 @@ typedef ptrdiff_t FT_PtrDist; incr = -1; } - delta = first - fy1; - ras.area += (TArea)two_fx * delta; - ras.cover += delta; - ey1 += incr; + delta = first - fy1; + FT_INTEGRATE( ras, delta, two_fx); + ey1 += incr; gray_set_cell( RAS_VAR_ ex, ey1 ); delta = first + first - ONE_PIXEL; - area = (TArea)two_fx * delta; while ( ey1 != ey2 ) { - ras.area += area; - ras.cover += delta; - ey1 += incr; + FT_INTEGRATE( ras, delta, two_fx); + ey1 += incr; gray_set_cell( RAS_VAR_ ex, ey1 ); } - delta = fy2 - ONE_PIXEL + first; - ras.area += (TArea)two_fx * delta; - ras.cover += delta; + delta = fy2 - ONE_PIXEL + first; + FT_INTEGRATE( ras, delta, two_fx); goto End; } @@ -883,8 +907,7 @@ typedef ptrdiff_t FT_PtrDist; do { fy2 = ONE_PIXEL; - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * fx1 * 2; + FT_INTEGRATE( ras, fy2 - fy1, fx1 * 2 ); fy1 = 0; ey1++; gray_set_cell( RAS_VAR_ ex1, ey1 ); @@ -893,8 +916,7 @@ typedef ptrdiff_t FT_PtrDist; do { fy2 = 0; - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * fx1 * 2; + FT_INTEGRATE( ras, fy2 - fy1, fx1 * 2 ); fy1 = ONE_PIXEL; ey1--; gray_set_cell( RAS_VAR_ ex1, ey1 ); @@ -902,7 +924,7 @@ typedef ptrdiff_t FT_PtrDist; } else /* any other line */ { - TPos prod = dx * (TPos)fy1 - dy * (TPos)fx1; + FT_Int64 prod = dx * (FT_Int64)fy1 - dy * (FT_Int64)fx1; FT_UDIVPREP( ex1 != ex2, dx ); FT_UDIVPREP( ey1 != ey2, dy ); @@ -912,72 +934,308 @@ typedef ptrdiff_t FT_PtrDist; /* also easily updated when moving from one cell to the next. */ do { - if ( prod <= 0 && - prod - dx * ONE_PIXEL > 0 ) /* left */ + if ( prod - dx * ONE_PIXEL > 0 && + prod <= 0 ) /* left */ { fx2 = 0; fy2 = FT_UDIV( -prod, -dx ); prod -= dy * ONE_PIXEL; - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * ( fx1 + fx2 ); + FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 ); fx1 = ONE_PIXEL; fy1 = fy2; ex1--; } - else if ( prod - dx * ONE_PIXEL <= 0 && - prod - dx * ONE_PIXEL + dy * ONE_PIXEL > 0 ) /* up */ + else if ( prod - dx * ONE_PIXEL + dy * ONE_PIXEL > 0 && + prod - dx * ONE_PIXEL <= 0 ) /* up */ { prod -= dx * ONE_PIXEL; fx2 = FT_UDIV( -prod, dy ); fy2 = ONE_PIXEL; - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * ( fx1 + fx2 ); + FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 ); fx1 = fx2; fy1 = 0; ey1++; } - else if ( prod - dx * ONE_PIXEL + dy * ONE_PIXEL <= 0 && - prod + dy * ONE_PIXEL >= 0 ) /* right */ + else if ( prod + dy * ONE_PIXEL >= 0 && + prod - dx * ONE_PIXEL + dy * ONE_PIXEL <= 0 ) /* right */ { prod += dy * ONE_PIXEL; fx2 = ONE_PIXEL; fy2 = FT_UDIV( prod, dx ); - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * ( fx1 + fx2 ); + FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 ); fx1 = 0; fy1 = fy2; ex1++; } - else /* ( prod + dy * ONE_PIXEL < 0 && - prod > 0 ) down */ + else /* ( prod > 0 && + prod + dy * ONE_PIXEL < 0 ) down */ { fx2 = FT_UDIV( prod, -dy ); fy2 = 0; prod += dx * ONE_PIXEL; - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * ( fx1 + fx2 ); + FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 ); fx1 = fx2; fy1 = ONE_PIXEL; ey1--; } gray_set_cell( RAS_VAR_ ex1, ey1 ); + } while ( ex1 != ex2 || ey1 != ey2 ); } fx2 = FRACT( to_x ); fy2 = FRACT( to_y ); - ras.cover += ( fy2 - fy1 ); - ras.area += ( fy2 - fy1 ) * ( fx1 + fx2 ); + FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 ); End: - ras.x = to_x; - ras.y = to_y; + ras.x = to_x; + ras.y = to_y; } #endif + /* + * Benchmarking shows that using DDA to flatten the quadratic Bézier arcs + * is slightly faster in the following cases: + * + * - When the host CPU is 64-bit. + * - When SSE2 SIMD registers and instructions are available (even on + * x86). + * + * For other cases, using binary splits is actually slightly faster. + */ +#if defined( __SSE2__ ) || \ + defined( __x86_64__ ) || \ + defined( _M_AMD64 ) || \ + ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 ) +# define FT_SSE2 1 +#else +# define FT_SSE2 0 +#endif + +#if FT_SSE2 || \ + defined( __aarch64__ ) || \ + defined( _M_ARM64 ) +# define BEZIER_USE_DDA 1 +#else +# define BEZIER_USE_DDA 0 +#endif + + /* + * For now, the code that depends on `BEZIER_USE_DDA` requires `FT_Int64` + * to be defined. If `FT_INT64` is not defined, meaning there is no + * 64-bit type available, disable it to avoid compilation errors. See for + * example https://gitlab.freedesktop.org/freetype/freetype/-/issues/1071. + */ +#if !defined( FT_INT64 ) +# undef BEZIER_USE_DDA +# define BEZIER_USE_DDA 0 +#endif + +#if BEZIER_USE_DDA + +#if FT_SSE2 +# include <emmintrin.h> +#endif + +#define LEFT_SHIFT( a, b ) (FT_Int64)( (FT_UInt64)(a) << (b) ) + + + static void + gray_render_conic( RAS_ARG_ const FT_Vector* control, + const FT_Vector* to ) + { + FT_Vector p0, p1, p2; + TPos ax, ay, bx, by, dx, dy; + int shift; + + FT_Int64 rx, ry; + FT_Int64 qx, qy; + FT_Int64 px, py; + + FT_UInt count; + + + p0.x = ras.x; + p0.y = ras.y; + p1.x = UPSCALE( control->x ); + p1.y = UPSCALE( control->y ); + p2.x = UPSCALE( to->x ); + p2.y = UPSCALE( to->y ); + + /* short-cut the arc that crosses the current band */ + if ( ( TRUNC( p0.y ) >= ras.max_ey && + TRUNC( p1.y ) >= ras.max_ey && + TRUNC( p2.y ) >= ras.max_ey ) || + ( TRUNC( p0.y ) < ras.min_ey && + TRUNC( p1.y ) < ras.min_ey && + TRUNC( p2.y ) < ras.min_ey ) ) + { + ras.x = p2.x; + ras.y = p2.y; + return; + } + + bx = p1.x - p0.x; + by = p1.y - p0.y; + ax = p2.x - p1.x - bx; /* p0.x + p2.x - 2 * p1.x */ + ay = p2.y - p1.y - by; /* p0.y + p2.y - 2 * p1.y */ + + dx = FT_ABS( ax ); + dy = FT_ABS( ay ); + if ( dx < dy ) + dx = dy; + + if ( dx <= ONE_PIXEL / 4 ) + { + gray_render_line( RAS_VAR_ p2.x, p2.y ); + return; + } + + /* We can calculate the number of necessary bisections because */ + /* each bisection predictably reduces deviation exactly 4-fold. */ + /* Even 32-bit deviation would vanish after 16 bisections. */ + shift = 0; + do + { + dx >>= 2; + shift += 1; + + } while ( dx > ONE_PIXEL / 4 ); + + /* + * The (P0,P1,P2) arc equation, for t in [0,1] range: + * + * P(t) = P0*(1-t)^2 + P1*2*t*(1-t) + P2*t^2 + * + * P(t) = P0 + 2*(P1-P0)*t + (P0+P2-2*P1)*t^2 + * = P0 + 2*B*t + A*t^2 + * + * for A = P0 + P2 - 2*P1 + * and B = P1 - P0 + * + * Let's consider the difference when advancing by a small + * parameter h: + * + * Q(h,t) = P(t+h) - P(t) = 2*B*h + A*h^2 + 2*A*h*t + * + * And then its own difference: + * + * R(h,t) = Q(h,t+h) - Q(h,t) = 2*A*h*h = R (constant) + * + * Since R is always a constant, it is possible to compute + * successive positions with: + * + * P = P0 + * Q = Q(h,0) = 2*B*h + A*h*h + * R = 2*A*h*h + * + * loop: + * P += Q + * Q += R + * EMIT(P) + * + * To ensure accurate results, perform computations on 64-bit + * values, after scaling them by 2^32. + * + * h = 1 / 2^N + * + * R << 32 = 2 * A << (32 - N - N) + * = A << (33 - 2*N) + * + * Q << 32 = (2 * B << (32 - N)) + (A << (32 - N - N)) + * = (B << (33 - N)) + (A << (32 - 2*N)) + */ + +#if FT_SSE2 + /* Experience shows that for small shift values, */ + /* SSE2 is actually slower. */ + if ( shift > 2 ) + { + union + { + struct { FT_Int64 ax, ay, bx, by; } i; + struct { __m128i a, b; } vec; + + } u; + + union + { + struct { FT_Int32 px_lo, px_hi, py_lo, py_hi; } i; + __m128i vec; + + } v; + + __m128i a, b; + __m128i r, q, q2; + __m128i p; + + + u.i.ax = ax; + u.i.ay = ay; + u.i.bx = bx; + u.i.by = by; + + a = _mm_load_si128( &u.vec.a ); + b = _mm_load_si128( &u.vec.b ); + + r = _mm_slli_epi64( a, 33 - 2 * shift ); + q = _mm_slli_epi64( b, 33 - shift ); + q2 = _mm_slli_epi64( a, 32 - 2 * shift ); + + q = _mm_add_epi64( q2, q ); + + v.i.px_lo = 0; + v.i.px_hi = p0.x; + v.i.py_lo = 0; + v.i.py_hi = p0.y; + + p = _mm_load_si128( &v.vec ); + + for ( count = 1U << shift; count > 0; count-- ) + { + p = _mm_add_epi64( p, q ); + q = _mm_add_epi64( q, r ); + + _mm_store_si128( &v.vec, p ); + + gray_render_line( RAS_VAR_ v.i.px_hi, v.i.py_hi ); + } + + return; + } +#endif /* FT_SSE2 */ + + rx = LEFT_SHIFT( ax, 33 - 2 * shift ); + ry = LEFT_SHIFT( ay, 33 - 2 * shift ); + + qx = LEFT_SHIFT( bx, 33 - shift ) + LEFT_SHIFT( ax, 32 - 2 * shift ); + qy = LEFT_SHIFT( by, 33 - shift ) + LEFT_SHIFT( ay, 32 - 2 * shift ); + + px = LEFT_SHIFT( p0.x, 32 ); + py = LEFT_SHIFT( p0.y, 32 ); + + for ( count = 1U << shift; count > 0; count-- ) + { + px += qx; + py += qy; + qx += rx; + qy += ry; + + gray_render_line( RAS_VAR_ (FT_Pos)( px >> 32 ), + (FT_Pos)( py >> 32 ) ); + } + } + +#else /* !BEZIER_USE_DDA */ + + /* + * Note that multiple attempts to speed up the function below + * with SSE2 intrinsics, using various data layouts, have turned + * out to be slower than the non-SIMD code below. + */ static void gray_split_conic( FT_Vector* base ) { @@ -1007,7 +1265,7 @@ typedef ptrdiff_t FT_PtrDist; FT_Vector bez_stack[16 * 2 + 1]; /* enough to accommodate bisections */ FT_Vector* arc = bez_stack; TPos dx, dy; - int draw, split; + int draw; arc[0].x = UPSCALE( to->x ); @@ -1050,7 +1308,9 @@ typedef ptrdiff_t FT_PtrDist; /* many times as there are trailing zeros in the counter. */ do { - split = draw & ( -draw ); /* isolate the rightmost 1-bit */ + int split = draw & ( -draw ); /* isolate the rightmost 1-bit */ + + while ( ( split >>= 1 ) ) { gray_split_conic( arc ); @@ -1063,7 +1323,17 @@ typedef ptrdiff_t FT_PtrDist; } while ( --draw ); } +#endif /* !BEZIER_USE_DDA */ + + /* + * For cubic Bézier, binary splits are still faster than DDA + * because the splits are adaptive to how quickly each sub-arc + * approaches their chord trisection points. + * + * It might be useful to experiment with SSE2 to speed up + * `gray_split_cubic`, though. + */ static void gray_split_cubic( FT_Vector* base ) { @@ -1205,125 +1475,133 @@ typedef ptrdiff_t FT_PtrDist; static void - gray_hline( RAS_ARG_ TCoord x, - TCoord y, - TArea coverage, - TCoord acount ) + gray_sweep( RAS_ARG ) { - /* scale the coverage from 0..(ONE_PIXEL*ONE_PIXEL*2) to 0..256 */ - coverage >>= PIXEL_BITS * 2 + 1 - 8; + int fill = ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ) ? 0x100 + : INT_MIN; + int coverage; + int y; - /* compute the line's coverage depending on the outline fill rule */ - if ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ) - { - coverage &= 511; - if ( coverage >= 256 ) - coverage = 511 - coverage; - } - else /* default non-zero winding rule */ + for ( y = ras.min_ey; y < ras.max_ey; y++ ) { - if ( coverage < 0 ) - coverage = ~coverage; /* the same as -coverage - 1 */ + PCell cell = ras.ycells[y - ras.min_ey]; + TCoord x = ras.min_ex; + TArea cover = 0; - if ( coverage >= 256 ) - coverage = 255; - } + unsigned char* line = ras.target.origin - ras.target.pitch * y; - if ( ras.num_spans >= 0 ) /* for FT_RASTER_FLAG_DIRECT only */ - { - FT_Span* span = ras.spans + ras.num_spans++; + + for ( ; cell != ras.cell_null; cell = cell->next ) + { + TArea area; - span->x = (short)x; - span->len = (unsigned short)acount; - span->coverage = (unsigned char)coverage; + if ( cover != 0 && cell->x > x ) + { + FT_FILL_RULE( coverage, cover, fill ); + FT_GRAY_SET( line + x, coverage, cell->x - x ); + } - if ( ras.num_spans == FT_MAX_GRAY_SPANS ) - { - /* flush the span buffer and reset the count */ - ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data ); - ras.num_spans = 0; - } - } - else - { - unsigned char* q = ras.target.origin - ras.target.pitch * y + x; - unsigned char c = (unsigned char)coverage; + cover += (TArea)cell->cover * ( ONE_PIXEL * 2 ); + area = cover - cell->area; + if ( area != 0 && cell->x >= ras.min_ex ) + { + FT_FILL_RULE( coverage, area, fill ); + line[cell->x] = (unsigned char)coverage; + } + + x = cell->x + 1; + } - /* For small-spans it is faster to do it by ourselves than - * calling `memset'. This is mainly due to the cost of the - * function call. - */ - switch ( acount ) + if ( cover != 0 ) /* only if cropped */ { - case 7: - *q++ = c; - /* fall through */ - case 6: - *q++ = c; - /* fall through */ - case 5: - *q++ = c; - /* fall through */ - case 4: - *q++ = c; - /* fall through */ - case 3: - *q++ = c; - /* fall through */ - case 2: - *q++ = c; - /* fall through */ - case 1: - *q = c; - /* fall through */ - case 0: - break; - default: - FT_MEM_SET( q, c, acount ); + FT_FILL_RULE( coverage, cover, fill ); + FT_GRAY_SET( line + x, coverage, ras.max_ex - x ); } } } static void - gray_sweep( RAS_ARG ) + gray_sweep_direct( RAS_ARG ) { + int fill = ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ) ? 0x100 + : INT_MIN; + int coverage; int y; + FT_Span span[FT_MAX_GRAY_SPANS]; + int n = 0; + for ( y = ras.min_ey; y < ras.max_ey; y++ ) { PCell cell = ras.ycells[y - ras.min_ey]; TCoord x = ras.min_ex; TArea cover = 0; - TArea area; - for ( ; cell != NULL; cell = cell->next ) + for ( ; cell != ras.cell_null; cell = cell->next ) { + TArea area; + + if ( cover != 0 && cell->x > x ) - gray_hline( RAS_VAR_ x, y, cover, cell->x - x ); + { + FT_FILL_RULE( coverage, cover, fill ); + + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)x; + span[n].len = (unsigned short)( cell->x - x ); + + if ( ++n == FT_MAX_GRAY_SPANS ) + { + /* flush the span buffer and reset the count */ + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; + } + } cover += (TArea)cell->cover * ( ONE_PIXEL * 2 ); area = cover - cell->area; if ( area != 0 && cell->x >= ras.min_ex ) - gray_hline( RAS_VAR_ cell->x, y, area, 1 ); + { + FT_FILL_RULE( coverage, area, fill ); + + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)cell->x; + span[n].len = 1; + + if ( ++n == FT_MAX_GRAY_SPANS ) + { + /* flush the span buffer and reset the count */ + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; + } + } x = cell->x + 1; } - if ( cover != 0 ) - gray_hline( RAS_VAR_ x, y, cover, ras.max_ex - x ); + if ( cover != 0 ) /* only if cropped */ + { + FT_FILL_RULE( coverage, cover, fill ); + + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)x; + span[n].len = (unsigned short)( ras.max_ex - x ); + + ++n; + } - if ( ras.num_spans > 0 ) /* for FT_RASTER_FLAG_DIRECT only */ + if ( n ) { /* flush the span buffer and reset the count */ - ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data ); - ras.num_spans = 0; + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; } } } @@ -1604,7 +1882,7 @@ typedef ptrdiff_t FT_PtrDist; } FT_TRACE5(( "FT_Outline_Decompose: Done\n", n )); - return 0; + return Smooth_Err_Ok; Exit: FT_TRACE5(( "FT_Outline_Decompose: Error 0x%x\n", error )); @@ -1645,18 +1923,15 @@ typedef ptrdiff_t FT_PtrDist; if ( continued ) FT_Trace_Enable(); - if ( !ras.invalid ) - gray_record_cell( RAS_VAR ); - - FT_TRACE7(( "band [%d..%d]: %ld cell%s\n", + FT_TRACE7(( "band [%d..%d]: %ld cell%s remaining/\n", ras.min_ey, ras.max_ey, - ras.num_cells, - ras.num_cells == 1 ? "" : "s" )); + ras.cell_null - ras.cell_free, + ras.cell_null - ras.cell_free == 1 ? "" : "s" )); } else { - error = FT_THROW( Memory_Overflow ); + error = FT_THROW( Raster_Overflow ); FT_TRACE7(( "band [%d..%d]: to be bisected\n", ras.min_ey, ras.max_ey )); @@ -1682,7 +1957,16 @@ typedef ptrdiff_t FT_PtrDist; int continued = 0; + /* Initialize the null cell at the end of the poll. */ + ras.cell_null = buffer + FT_MAX_GRAY_POOL - 1; + ras.cell_null->x = CELL_MAX_X_VALUE; + ras.cell_null->area = 0; + ras.cell_null->cover = 0; + ras.cell_null->next = NULL; + /* set up vertical bands */ + ras.ycells = (PCell*)buffer; + if ( height > n ) { /* two divisions rounded up */ @@ -1690,13 +1974,6 @@ typedef ptrdiff_t FT_PtrDist; height = ( height + n - 1 ) / n; } - /* memory management */ - n = ( height * sizeof ( PCell ) + sizeof ( TCell ) - 1 ) / sizeof ( TCell ); - - ras.cells = buffer + n; - ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n ); - ras.ycells = (PCell*)buffer; - for ( y = yMin; y < yMax; ) { ras.min_ey = y; @@ -1710,27 +1987,37 @@ typedef ptrdiff_t FT_PtrDist; do { TCoord width = band[0] - band[1]; + TCoord w; int error; - FT_MEM_ZERO( ras.ycells, height * sizeof ( PCell ) ); + for ( w = 0; w < width; ++w ) + ras.ycells[w] = ras.cell_null; - ras.num_cells = 0; - ras.invalid = 1; + /* memory management: skip ycells */ + n = ( (size_t)width * sizeof ( PCell ) + sizeof ( TCell ) - 1 ) / + sizeof ( TCell ); + + ras.cell_free = buffer + n; + ras.cell = ras.cell_null; ras.min_ey = band[1]; ras.max_ey = band[0]; + ras.count_ey = width; error = gray_convert_glyph_inner( RAS_VAR, continued ); continued = 1; if ( !error ) { - gray_sweep( RAS_VAR ); + if ( ras.render_span ) /* for FT_RASTER_FLAG_DIRECT only */ + gray_sweep_direct( RAS_VAR ); + else + gray_sweep( RAS_VAR ); band--; continue; } - else if ( error != ErrRaster_Memory_Overflow ) - return 1; + else if ( error != Smooth_Err_Raster_Overflow ) + return error; /* render pool overflow; we will reduce the render band by half */ width >>= 1; @@ -1739,7 +2026,7 @@ typedef ptrdiff_t FT_PtrDist; if ( width == 0 ) { FT_TRACE7(( "gray_convert_glyph: rotten glyph\n" )); - return 1; + return FT_THROW( Raster_Overflow ); } band++; @@ -1748,7 +2035,7 @@ typedef ptrdiff_t FT_PtrDist; } while ( band >= bands ); } - return 0; + return Smooth_Err_Ok; } @@ -1769,14 +2056,14 @@ typedef ptrdiff_t FT_PtrDist; /* this version does not support monochrome rendering */ if ( !( params->flags & FT_RASTER_FLAG_AA ) ) - return FT_THROW( Invalid_Mode ); + return FT_THROW( Cannot_Render_Glyph ); if ( !outline ) return FT_THROW( Invalid_Outline ); /* return immediately if the outline is empty */ if ( outline->n_points == 0 || outline->n_contours <= 0 ) - return 0; + return Smooth_Err_Ok; if ( !outline->contours || !outline->points ) return FT_THROW( Invalid_Outline ); @@ -1790,11 +2077,10 @@ typedef ptrdiff_t FT_PtrDist; if ( params->flags & FT_RASTER_FLAG_DIRECT ) { if ( !params->gray_spans ) - return 0; + return Smooth_Err_Ok; ras.render_span = (FT_Raster_Span_Func)params->gray_spans; ras.render_span_data = params->user; - ras.num_spans = 0; ras.min_ex = params->clip_box.xMin; ras.min_ey = params->clip_box.yMin; @@ -1809,7 +2095,7 @@ typedef ptrdiff_t FT_PtrDist; /* nothing to do */ if ( !target_map->width || !target_map->rows ) - return 0; + return Smooth_Err_Ok; if ( !target_map->buffer ) return FT_THROW( Invalid_Argument ); @@ -1824,7 +2110,6 @@ typedef ptrdiff_t FT_PtrDist; ras.render_span = (FT_Raster_Span_Func)NULL; ras.render_span_data = NULL; - ras.num_spans = -1; /* invalid */ ras.min_ex = 0; ras.min_ey = 0; @@ -1834,7 +2119,7 @@ typedef ptrdiff_t FT_PtrDist; /* exit if nothing to do */ if ( ras.max_ex <= ras.min_ex || ras.max_ey <= ras.min_ey ) - return 0; + return Smooth_Err_Ok; return gray_convert_glyph( RAS_VAR ); } @@ -1871,19 +2156,17 @@ typedef ptrdiff_t FT_PtrDist; #else /* !STANDALONE_ */ static int - gray_raster_new( FT_Memory memory, - FT_Raster* araster ) + gray_raster_new( FT_Memory memory, + gray_PRaster* araster ) { FT_Error error; gray_PRaster raster = NULL; - *araster = 0; - if ( !FT_ALLOC( raster, sizeof ( gray_TRaster ) ) ) - { + if ( !FT_NEW( raster ) ) raster->memory = memory; - *araster = (FT_Raster)raster; - } + + *araster = raster; return error; } diff --git a/src/3rdparty/freetype/src/smooth/ftgrays.h b/src/3rdparty/freetype/src/smooth/ftgrays.h index caba632833..13bf2baaa2 100644 --- a/src/3rdparty/freetype/src/smooth/ftgrays.h +++ b/src/3rdparty/freetype/src/smooth/ftgrays.h @@ -4,7 +4,7 @@ * * FreeType smooth renderer declaration * - * Copyright (C) 1996-2020 by + * Copyright (C) 1996-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, diff --git a/src/3rdparty/freetype/src/smooth/ftsmerrs.h b/src/3rdparty/freetype/src/smooth/ftsmerrs.h index e93f3df9b3..7bc6077988 100644 --- a/src/3rdparty/freetype/src/smooth/ftsmerrs.h +++ b/src/3rdparty/freetype/src/smooth/ftsmerrs.h @@ -4,7 +4,7 @@ * * smooth renderer error codes (specification only). * - * Copyright (C) 2001-2020 by + * Copyright (C) 2001-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, diff --git a/src/3rdparty/freetype/src/smooth/ftsmooth.c b/src/3rdparty/freetype/src/smooth/ftsmooth.c index 5d66bd6fc4..df227c3758 100644 --- a/src/3rdparty/freetype/src/smooth/ftsmooth.c +++ b/src/3rdparty/freetype/src/smooth/ftsmooth.c @@ -4,7 +4,7 @@ * * Anti-aliasing renderer interface (body). * - * Copyright (C) 2000-2020 by + * Copyright (C) 2000-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, diff --git a/src/3rdparty/freetype/src/smooth/ftsmooth.h b/src/3rdparty/freetype/src/smooth/ftsmooth.h index 22a88d54ec..87f09faea4 100644 --- a/src/3rdparty/freetype/src/smooth/ftsmooth.h +++ b/src/3rdparty/freetype/src/smooth/ftsmooth.h @@ -4,7 +4,7 @@ * * Anti-aliasing renderer interface (specification). * - * Copyright (C) 1996-2020 by + * Copyright (C) 1996-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, diff --git a/src/3rdparty/freetype/src/smooth/module.mk b/src/3rdparty/freetype/src/smooth/module.mk index 9b1507f1e0..7623551327 100644 --- a/src/3rdparty/freetype/src/smooth/module.mk +++ b/src/3rdparty/freetype/src/smooth/module.mk @@ -3,7 +3,7 @@ # -# Copyright (C) 1996-2020 by +# Copyright (C) 1996-2022 by # David Turner, Robert Wilhelm, and Werner Lemberg. # # This file is part of the FreeType project, and may only be used, modified, diff --git a/src/3rdparty/freetype/src/smooth/rules.mk b/src/3rdparty/freetype/src/smooth/rules.mk index b08056fac5..d5109313f9 100644 --- a/src/3rdparty/freetype/src/smooth/rules.mk +++ b/src/3rdparty/freetype/src/smooth/rules.mk @@ -3,7 +3,7 @@ # -# Copyright (C) 1996-2020 by +# Copyright (C) 1996-2022 by # David Turner, Robert Wilhelm, and Werner Lemberg. # # This file is part of the FreeType project, and may only be used, modified, diff --git a/src/3rdparty/freetype/src/smooth/smooth.c b/src/3rdparty/freetype/src/smooth/smooth.c index 04b531c087..f341e8f252 100644 --- a/src/3rdparty/freetype/src/smooth/smooth.c +++ b/src/3rdparty/freetype/src/smooth/smooth.c @@ -4,7 +4,7 @@ * * FreeType anti-aliasing rasterer module component (body only). * - * Copyright (C) 1996-2020 by + * Copyright (C) 1996-2022 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, |