summaryrefslogtreecommitdiff
path: root/libs/pixman-0.40.0/pixman/pixman-ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'libs/pixman-0.40.0/pixman/pixman-ssse3.c')
-rw-r--r--libs/pixman-0.40.0/pixman/pixman-ssse3.c351
1 files changed, 0 insertions, 351 deletions
diff --git a/libs/pixman-0.40.0/pixman/pixman-ssse3.c b/libs/pixman-0.40.0/pixman/pixman-ssse3.c
deleted file mode 100644
index 680d6b9..0000000
--- a/libs/pixman-0.40.0/pixman/pixman-ssse3.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright © 2013 Soren Sandmann Pedersen
- * Copyright © 2013 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Soren Sandmann (soren.sandmann@gmail.com)
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <tmmintrin.h>
-#include "pixman-private.h"
-#include "pixman-inlines.h"
-
-typedef struct
-{
- int y;
- uint64_t * buffer;
-} line_t;
-
-typedef struct
-{
- line_t lines[2];
- pixman_fixed_t y;
- pixman_fixed_t x;
- uint64_t data[1];
-} bilinear_info_t;
-
-static void
-ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
- int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
-{
- uint32_t *bits = image->bits + y * image->rowstride;
- __m128i vx = _mm_set_epi16 (
- - (x + 1), x, - (x + 1), x,
- - (x + ux + 1), x + ux, - (x + ux + 1), x + ux);
- __m128i vux = _mm_set_epi16 (
- - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
- - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
- __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
- __m128i *b = (__m128i *)line->buffer;
- __m128i vrl0, vrl1;
-
- while ((n -= 2) >= 0)
- {
- __m128i vw, vr, s;
-
- vrl1 = _mm_loadl_epi64 (
- (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
- /* vrl1: R1, L1 */
-
- final_pixel:
- vrl0 = _mm_loadl_epi64 (
- (__m128i *)(bits + pixman_fixed_to_int (x)));
- /* vrl0: R0, L0 */
-
- /* The weights are based on vx which is a vector of
- *
- * - (x + 1), x, - (x + 1), x,
- * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
- *
- * so the 16 bit weights end up like this:
- *
- * iw0, w0, iw0, w0, iw1, w1, iw1, w1
- *
- * and after shifting and packing, we get these bytes:
- *
- * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
- * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
- *
- * which means the first and the second input pixel
- * have to be interleaved like this:
- *
- * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
- * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
- *
- * before maddubsw can be used.
- */
-
- vw = _mm_add_epi16 (
- vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
- /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
- */
-
- vw = _mm_packus_epi16 (vw, vw);
- /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
- * iw0, w0, iw0, w0, iw1, w1, iw1, w1
- */
- vx = _mm_add_epi16 (vx, vux);
-
- x += 2 * ux;
-
- vr = _mm_unpacklo_epi16 (vrl1, vrl0);
- /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
-
- s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
- /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
-
- vr = _mm_unpackhi_epi8 (vr, s);
- /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
- * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
- */
-
- vr = _mm_maddubs_epi16 (vr, vw);
-
- /* When the weight is 0, the inverse weight is
- * 128 which can't be represented in a signed byte.
- * As a result maddubsw computes the following:
- *
- * r = l * -128 + r * 0
- *
- * rather than the desired
- *
- * r = l * 128 + r * 0
- *
- * We fix this by taking the absolute value of the
- * result.
- */
- vr = _mm_abs_epi16 (vr);
-
- /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
- _mm_store_si128 (b++, vr);
- }
-
- if (n == -1)
- {
- vrl1 = _mm_setzero_si128();
- goto final_pixel;
- }
-
- line->y = y;
-}
-
-static uint32_t *
-ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
-{
- pixman_fixed_t fx, ux;
- bilinear_info_t *info = iter->data;
- line_t *line0, *line1;
- int y0, y1;
- int32_t dist_y;
- __m128i vw;
- int i;
-
- fx = info->x;
- ux = iter->image->common.transform->matrix[0][0];
-
- y0 = pixman_fixed_to_int (info->y);
- y1 = y0 + 1;
-
- line0 = &info->lines[y0 & 0x01];
- line1 = &info->lines[y1 & 0x01];
-
- if (line0->y != y0)
- {
- ssse3_fetch_horizontal (
- &iter->image->bits, line0, y0, fx, ux, iter->width);
- }
-
- if (line1->y != y1)
- {
- ssse3_fetch_horizontal (
- &iter->image->bits, line1, y1, fx, ux, iter->width);
- }
-
- dist_y = pixman_fixed_to_bilinear_weight (info->y);
- dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
-
- vw = _mm_set_epi16 (
- dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
-
- for (i = 0; i + 3 < iter->width; i += 4)
- {
- __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
- __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
- __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
- __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
- __m128i r0, r1, tmp, p;
-
- r0 = _mm_mulhi_epu16 (
- _mm_sub_epi16 (bot0, top0), vw);
- tmp = _mm_cmplt_epi16 (bot0, top0);
- tmp = _mm_and_si128 (tmp, vw);
- r0 = _mm_sub_epi16 (r0, tmp);
- r0 = _mm_add_epi16 (r0, top0);
- r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
- /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
- r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
- /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
-
- r1 = _mm_mulhi_epu16 (
- _mm_sub_epi16 (bot1, top1), vw);
- tmp = _mm_cmplt_epi16 (bot1, top1);
- tmp = _mm_and_si128 (tmp, vw);
- r1 = _mm_sub_epi16 (r1, tmp);
- r1 = _mm_add_epi16 (r1, top1);
- r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
- r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
- /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
-
- p = _mm_packus_epi16 (r0, r1);
-
- _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
- }
-
- while (i < iter->width)
- {
- __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
- __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
- __m128i r0, tmp, p;
-
- r0 = _mm_mulhi_epu16 (
- _mm_sub_epi16 (bot0, top0), vw);
- tmp = _mm_cmplt_epi16 (bot0, top0);
- tmp = _mm_and_si128 (tmp, vw);
- r0 = _mm_sub_epi16 (r0, tmp);
- r0 = _mm_add_epi16 (r0, top0);
- r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
- /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
- r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
- /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
-
- p = _mm_packus_epi16 (r0, r0);
-
- if (iter->width - i == 1)
- {
- *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
- i++;
- }
- else
- {
- _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
- i += 2;
- }
- }
-
- info->y += iter->image->common.transform->matrix[1][1];
-
- return iter->buffer;
-}
-
-static void
-ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
-{
- free (iter->data);
-}
-
-static void
-ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
-{
- int width = iter->width;
- bilinear_info_t *info;
- pixman_vector_t v;
-
- /* Reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (iter->image->common.transform, &v))
- goto fail;
-
- info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
- if (!info)
- goto fail;
-
- info->x = v.vector[0] - pixman_fixed_1 / 2;
- info->y = v.vector[1] - pixman_fixed_1 / 2;
-
-#define ALIGN(addr) \
- ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
-
- /* It is safe to set the y coordinates to -1 initially
- * because COVER_CLIP_BILINEAR ensures that we will only
- * be asked to fetch lines in the [0, height) interval
- */
- info->lines[0].y = -1;
- info->lines[0].buffer = ALIGN (&(info->data[0]));
- info->lines[1].y = -1;
- info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
-
- iter->get_scanline = ssse3_fetch_bilinear_cover;
- iter->fini = ssse3_bilinear_cover_iter_fini;
-
- iter->data = info;
- return;
-
-fail:
- /* Something went wrong, either a bad matrix or OOM; in such cases,
- * we don't guarantee any particular rendering.
- */
- _pixman_log_error (
- FUNC, "Allocation failure or bad matrix, skipping rendering\n");
-
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- iter->fini = NULL;
-}
-
-static const pixman_iter_info_t ssse3_iters[] =
-{
- { PIXMAN_a8r8g8b8,
- (FAST_PATH_STANDARD_FLAGS |
- FAST_PATH_SCALE_TRANSFORM |
- FAST_PATH_BILINEAR_FILTER |
- FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
- ITER_NARROW | ITER_SRC,
- ssse3_bilinear_cover_iter_init,
- NULL, NULL
- },
-
- { PIXMAN_null },
-};
-
-static const pixman_fast_path_t ssse3_fast_paths[] =
-{
- { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp =
- _pixman_implementation_create (fallback, ssse3_fast_paths);
-
- imp->iter_info = ssse3_iters;
-
- return imp;
-}