FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
31  GetBitContext *gb, int pcm_bit_depth)
32 {
33  int x, y;
34  pixel *dst = (pixel *)_dst;
35 
36  stride /= sizeof(pixel);
37 
38  for (y = 0; y < height; y++) {
39  for (x = 0; x < width; x++)
40  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
41  dst += stride;
42  }
43 }
44 
45 static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
46  ptrdiff_t stride)
47 {
48  int x, y;
49  pixel *dst = (pixel *)_dst;
50 
51  stride /= sizeof(pixel);
52 
53  for (y = 0; y < 4; y++) {
54  for (x = 0; x < 4; x++) {
55  dst[x] = av_clip_pixel(dst[x] + *coeffs);
56  coeffs++;
57  }
58  dst += stride;
59  }
60 }
61 
62 static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
63  ptrdiff_t stride)
64 {
65  int x, y;
66  pixel *dst = (pixel *)_dst;
67 
68  stride /= sizeof(pixel);
69 
70  for (y = 0; y < 8; y++) {
71  for (x = 0; x < 8; x++) {
72  dst[x] = av_clip_pixel(dst[x] + *coeffs);
73  coeffs++;
74  }
75  dst += stride;
76  }
77 }
78 
79 static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
80  ptrdiff_t stride)
81 {
82  int x, y;
83  pixel *dst = (pixel *)_dst;
84 
85  stride /= sizeof(pixel);
86 
87  for (y = 0; y < 16; y++) {
88  for (x = 0; x < 16; x++) {
89  dst[x] = av_clip_pixel(dst[x] + *coeffs);
90  coeffs++;
91  }
92  dst += stride;
93  }
94 }
95 
96 static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
97  ptrdiff_t stride)
98 {
99  int x, y;
100  pixel *dst = (pixel *)_dst;
101 
102  stride /= sizeof(pixel);
103 
104  for (y = 0; y < 32; y++) {
105  for (x = 0; x < 32; x++) {
106  dst[x] = av_clip_pixel(dst[x] + *coeffs);
107  coeffs++;
108  }
109  dst += stride;
110  }
111 }
112 
113 
114 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
115 {
116  int16_t *coeffs = (int16_t *) _coeffs;
117  int x, y;
118  int size = 1 << log2_size;
119 
120  if (mode) {
121  coeffs += size;
122  for (y = 0; y < size - 1; y++) {
123  for (x = 0; x < size; x++)
124  coeffs[x] += coeffs[x - size];
125  coeffs += size;
126  }
127  } else {
128  for (y = 0; y < size; y++) {
129  for (x = 1; x < size; x++)
130  coeffs[x] += coeffs[x - 1];
131  coeffs += size;
132  }
133  }
134 }
135 
136 static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
137 {
138  int shift = 15 - BIT_DEPTH - log2_size;
139  int x, y;
140  int size = 1 << log2_size;
141  int16_t *coeffs = _coeffs;
142 
143 
144  if (shift > 0) {
145  int offset = 1 << (shift - 1);
146  for (y = 0; y < size; y++) {
147  for (x = 0; x < size; x++) {
148  *coeffs = (*coeffs + offset) >> shift;
149  coeffs++;
150  }
151  }
152  } else {
153  for (y = 0; y < size; y++) {
154  for (x = 0; x < size; x++) {
155  *coeffs = *coeffs << -shift;
156  coeffs++;
157  }
158  }
159  }
160 }
161 
162 #define SET(dst, x) (dst) = (x)
163 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
164 #define ADD_AND_SCALE(dst, x) \
165  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
166 
167 #define TR_4x4_LUMA(dst, src, step, assign) \
168  do { \
169  int c0 = src[0 * step] + src[2 * step]; \
170  int c1 = src[2 * step] + src[3 * step]; \
171  int c2 = src[0 * step] - src[3 * step]; \
172  int c3 = 74 * src[1 * step]; \
173  \
174  assign(dst[2 * step], 74 * (src[0 * step] - \
175  src[2 * step] + \
176  src[3 * step])); \
177  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
178  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
179  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
180  } while (0)
181 
182 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
183 {
184  int i;
185  int shift = 7;
186  int add = 1 << (shift - 1);
187  int16_t *src = coeffs;
188 
189  for (i = 0; i < 4; i++) {
190  TR_4x4_LUMA(src, src, 4, SCALE);
191  src++;
192  }
193 
194  shift = 20 - BIT_DEPTH;
195  add = 1 << (shift - 1);
196  for (i = 0; i < 4; i++) {
197  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
198  coeffs += 4;
199  }
200 }
201 
202 #undef TR_4x4_LUMA
203 
204 #define TR_4(dst, src, dstep, sstep, assign, end) \
205  do { \
206  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
207  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
208  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
209  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
210  \
211  assign(dst[0 * dstep], e0 + o0); \
212  assign(dst[1 * dstep], e1 + o1); \
213  assign(dst[2 * dstep], e1 - o1); \
214  assign(dst[3 * dstep], e0 - o0); \
215  } while (0)
216 
217 #define TR_8(dst, src, dstep, sstep, assign, end) \
218  do { \
219  int i, j; \
220  int e_8[4]; \
221  int o_8[4] = { 0 }; \
222  for (i = 0; i < 4; i++) \
223  for (j = 1; j < end; j += 2) \
224  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
225  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
226  \
227  for (i = 0; i < 4; i++) { \
228  assign(dst[i * dstep], e_8[i] + o_8[i]); \
229  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
230  } \
231  } while (0)
232 
233 #define TR_16(dst, src, dstep, sstep, assign, end) \
234  do { \
235  int i, j; \
236  int e_16[8]; \
237  int o_16[8] = { 0 }; \
238  for (i = 0; i < 8; i++) \
239  for (j = 1; j < end; j += 2) \
240  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
241  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
242  \
243  for (i = 0; i < 8; i++) { \
244  assign(dst[i * dstep], e_16[i] + o_16[i]); \
245  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
246  } \
247  } while (0)
248 
249 #define TR_32(dst, src, dstep, sstep, assign, end) \
250  do { \
251  int i, j; \
252  int e_32[16]; \
253  int o_32[16] = { 0 }; \
254  for (i = 0; i < 16; i++) \
255  for (j = 1; j < end; j += 2) \
256  o_32[i] += transform[j][i] * src[j * sstep]; \
257  TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
258  \
259  for (i = 0; i < 16; i++) { \
260  assign(dst[i * dstep], e_32[i] + o_32[i]); \
261  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
262  } \
263  } while (0)
264 
265 #define IDCT_VAR4(H) \
266  int limit2 = FFMIN(col_limit + 4, H)
267 #define IDCT_VAR8(H) \
268  int limit = FFMIN(col_limit, H); \
269  int limit2 = FFMIN(col_limit + 4, H)
270 #define IDCT_VAR16(H) IDCT_VAR8(H)
271 #define IDCT_VAR32(H) IDCT_VAR8(H)
272 
273 #define IDCT(H) \
274 static void FUNC(idct_##H ##x ##H )( \
275  int16_t *coeffs, int col_limit) { \
276  int i; \
277  int shift = 7; \
278  int add = 1 << (shift - 1); \
279  int16_t *src = coeffs; \
280  IDCT_VAR ##H(H); \
281  \
282  for (i = 0; i < H; i++) { \
283  TR_ ## H(src, src, H, H, SCALE, limit2); \
284  if (limit2 < H && i%4 == 0 && !!i) \
285  limit2 -= 4; \
286  src++; \
287  } \
288  \
289  shift = 20 - BIT_DEPTH; \
290  add = 1 << (shift - 1); \
291  for (i = 0; i < H; i++) { \
292  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
293  coeffs += H; \
294  } \
295 }
296 
297 #define IDCT_DC(H) \
298 static void FUNC(idct_##H ##x ##H ##_dc)( \
299  int16_t *coeffs) { \
300  int i, j; \
301  int shift = 14 - BIT_DEPTH; \
302  int add = 1 << (shift - 1); \
303  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
304  \
305  for (j = 0; j < H; j++) { \
306  for (i = 0; i < H; i++) { \
307  coeffs[i+j*H] = coeff; \
308  } \
309  } \
310 }
311 
312 IDCT( 4)
313 IDCT( 8)
314 IDCT(16)
315 IDCT(32)
316 
317 IDCT_DC( 4)
318 IDCT_DC( 8)
319 IDCT_DC(16)
320 IDCT_DC(32)
321 
322 #undef TR_4
323 #undef TR_8
324 #undef TR_16
325 #undef TR_32
326 
327 #undef SET
328 #undef SCALE
329 #undef ADD_AND_SCALE
330 
331 static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
332  ptrdiff_t stride, SAOParams *sao,
333  int *borders, int width, int height,
334  int c_idx)
335 {
336  pixel *dst = (pixel *)_dst;
337  pixel *src = (pixel *)_src;
338  int offset_table[32] = { 0 };
339  int k, y, x;
340  int shift = BIT_DEPTH - 5;
341  int *sao_offset_val = sao->offset_val[c_idx];
342  int sao_left_class = sao->band_position[c_idx];
343 
344  stride /= sizeof(pixel);
345 
346  for (k = 0; k < 4; k++)
347  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
348  for (y = 0; y < height; y++) {
349  for (x = 0; x < width; x++)
350  dst[x] = av_clip_pixel(src[x] + offset_table[av_clip_pixel(src[x] >> shift)]);
351  dst += stride;
352  src += stride;
353  }
354 }
355 
356 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
357 
358 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
359  ptrdiff_t stride, SAOParams *sao,
360  int width, int height,
361  int c_idx, int init_x, int init_y) {
362 
363  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
364  static const int8_t pos[4][2][2] = {
365  { { -1, 0 }, { 1, 0 } }, // horizontal
366  { { 0, -1 }, { 0, 1 } }, // vertical
367  { { -1, -1 }, { 1, 1 } }, // 45 degree
368  { { 1, -1 }, { -1, 1 } }, // 135 degree
369  };
370  int *sao_offset_val = sao->offset_val[c_idx];
371  int sao_eo_class = sao->eo_class[c_idx];
372  pixel *dst = (pixel *)_dst;
373  pixel *src = (pixel *)_src;
374 
375  int y_stride = init_y * stride;
376  int pos_0_0 = pos[sao_eo_class][0][0];
377  int pos_0_1 = pos[sao_eo_class][0][1];
378  int pos_1_0 = pos[sao_eo_class][1][0];
379  int pos_1_1 = pos[sao_eo_class][1][1];
380  int x, y;
381 
382  int y_stride_0_1 = (init_y + pos_0_1) * stride;
383  int y_stride_1_1 = (init_y + pos_1_1) * stride;
384  for (y = init_y; y < height; y++) {
385  for (x = init_x; x < width; x++) {
386  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
387  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
388  int offset_val = edge_idx[2 + diff0 + diff1];
389  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
390  }
391  y_stride += stride;
392  y_stride_0_1 += stride;
393  y_stride_1_1 += stride;
394  }
395 }
396 
397 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
398  ptrdiff_t stride, SAOParams *sao,
399  int *borders, int _width, int _height,
400  int c_idx, uint8_t *vert_edge,
401  uint8_t *horiz_edge, uint8_t *diag_edge)
402 {
403  int x, y;
404  pixel *dst = (pixel *)_dst;
405  pixel *src = (pixel *)_src;
406  int *sao_offset_val = sao->offset_val[c_idx];
407  int sao_eo_class = sao->eo_class[c_idx];
408  int init_x = 0, init_y = 0, width = _width, height = _height;
409 
410  stride /= sizeof(pixel);
411 
412  if (sao_eo_class != SAO_EO_VERT) {
413  if (borders[0]) {
414  int offset_val = sao_offset_val[0];
415  int y_stride = 0;
416  for (y = 0; y < height; y++) {
417  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
418  y_stride += stride;
419  }
420  init_x = 1;
421  }
422  if (borders[2]) {
423  int offset_val = sao_offset_val[0];
424  int x_stride = width - 1;
425  for (x = 0; x < height; x++) {
426  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
427  x_stride += stride;
428  }
429  width--;
430  }
431  }
432  if (sao_eo_class != SAO_EO_HORIZ) {
433  if (borders[1]) {
434  int offset_val = sao_offset_val[0];
435  for (x = init_x; x < width; x++)
436  dst[x] = av_clip_pixel(src[x] + offset_val);
437  init_y = 1;
438  }
439  if (borders[3]) {
440  int offset_val = sao_offset_val[0];
441  int y_stride = stride * (height - 1);
442  for (x = init_x; x < width; x++)
443  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
444  height--;
445  }
446  }
447 
448  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y);
449 }
450 
451 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
452  ptrdiff_t stride, SAOParams *sao,
453  int *borders, int _width, int _height,
454  int c_idx, uint8_t *vert_edge,
455  uint8_t *horiz_edge, uint8_t *diag_edge)
456 {
457  int x, y;
458  pixel *dst = (pixel *)_dst;
459  pixel *src = (pixel *)_src;
460  int *sao_offset_val = sao->offset_val[c_idx];
461  int sao_eo_class = sao->eo_class[c_idx];
462  int init_x = 0, init_y = 0, width = _width, height = _height;
463 
464  stride /= sizeof(pixel);
465 
466  if (sao_eo_class != SAO_EO_VERT) {
467  if (borders[0]) {
468  int offset_val = sao_offset_val[0];
469  int y_stride = 0;
470  for (y = 0; y < height; y++) {
471  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
472  y_stride += stride;
473  }
474  init_x = 1;
475  }
476  if (borders[2]) {
477  int offset_val = sao_offset_val[0];
478  int x_stride = width - 1;
479  for (x = 0; x < height; x++) {
480  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
481  x_stride += stride;
482  }
483  width--;
484  }
485  }
486  if (sao_eo_class != SAO_EO_HORIZ) {
487  if (borders[1]) {
488  int offset_val = sao_offset_val[0];
489  for (x = init_x; x < width; x++)
490  dst[x] = av_clip_pixel(src[x] + offset_val);
491  init_y = 1;
492  }
493  if (borders[3]) {
494  int offset_val = sao_offset_val[0];
495  int y_stride = stride * (height - 1);
496  for (x = init_x; x < width; x++)
497  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
498  height--;
499  }
500  }
501 
502  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y);
503 
504  {
505  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
506  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
507  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
508  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
509 
510  // Restore pixels that can't be modified
511  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
512  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
513  dst[y*stride] = src[y*stride];
514  }
515  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
516  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
517  dst[y*stride+width-1] = src[y*stride+width-1];
518  }
519 
520  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
521  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
522  dst[x] = src[x];
523  }
524  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
525  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
526  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
527  }
528  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
529  dst[0] = src[0];
530  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
531  dst[width-1] = src[width-1];
532  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
533  dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
534  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
535  dst[stride*(height-1)] = src[stride*(height-1)];
536 
537  }
538 }
539 
540 #undef CMP
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 //
544 ////////////////////////////////////////////////////////////////////////////////
545 static void FUNC(put_hevc_pel_pixels)(int16_t *dst, ptrdiff_t dststride,
546  uint8_t *_src, ptrdiff_t _srcstride,
547  int height, intptr_t mx, intptr_t my, int width)
548 {
549  int x, y;
550  pixel *src = (pixel *)_src;
551  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
552 
553  for (y = 0; y < height; y++) {
554  for (x = 0; x < width; x++)
555  dst[x] = src[x] << (14 - BIT_DEPTH);
556  src += srcstride;
557  dst += dststride;
558  }
559 }
560 
561 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
562  int height, intptr_t mx, intptr_t my, int width)
563 {
564  int y;
565  pixel *src = (pixel *)_src;
566  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
567  pixel *dst = (pixel *)_dst;
568  ptrdiff_t dststride = _dststride / sizeof(pixel);
569 
570  for (y = 0; y < height; y++) {
571  memcpy(dst, src, width * sizeof(pixel));
572  src += srcstride;
573  dst += dststride;
574  }
575 }
576 
577 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
578  int16_t *src2, ptrdiff_t src2stride,
579  int height, intptr_t mx, intptr_t my, int width)
580 {
581  int x, y;
582  pixel *src = (pixel *)_src;
583  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
584  pixel *dst = (pixel *)_dst;
585  ptrdiff_t dststride = _dststride / sizeof(pixel);
586 
587  int shift = 14 + 1 - BIT_DEPTH;
588 #if BIT_DEPTH < 14
589  int offset = 1 << (shift - 1);
590 #else
591  int offset = 0;
592 #endif
593 
594  for (y = 0; y < height; y++) {
595  for (x = 0; x < width; x++)
596  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
597  src += srcstride;
598  dst += dststride;
599  src2 += src2stride;
600  }
601 }
602 
603 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
604  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
605 {
606  int x, y;
607  pixel *src = (pixel *)_src;
608  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
609  pixel *dst = (pixel *)_dst;
610  ptrdiff_t dststride = _dststride / sizeof(pixel);
611  int shift = denom + 14 - BIT_DEPTH;
612 #if BIT_DEPTH < 14
613  int offset = 1 << (shift - 1);
614 #else
615  int offset = 0;
616 #endif
617 
618  ox = ox * (1 << (BIT_DEPTH - 8));
619  for (y = 0; y < height; y++) {
620  for (x = 0; x < width; x++)
621  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
622  src += srcstride;
623  dst += dststride;
624  }
625 }
626 
627 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
628  int16_t *src2, ptrdiff_t src2stride,
629  int height, int denom, int wx0, int wx1,
630  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
631 {
632  int x, y;
633  pixel *src = (pixel *)_src;
634  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
635  pixel *dst = (pixel *)_dst;
636  ptrdiff_t dststride = _dststride / sizeof(pixel);
637 
638  int shift = 14 + 1 - BIT_DEPTH;
639  int log2Wd = denom + shift - 1;
640 
641  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
642  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
643  for (y = 0; y < height; y++) {
644  for (x = 0; x < width; x++) {
645  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
646  }
647  src += srcstride;
648  dst += dststride;
649  src2 += src2stride;
650  }
651 }
652 
653 ////////////////////////////////////////////////////////////////////////////////
654 //
655 ////////////////////////////////////////////////////////////////////////////////
656 #define QPEL_FILTER(src, stride) \
657  (filter[0] * src[x - 3 * stride] + \
658  filter[1] * src[x - 2 * stride] + \
659  filter[2] * src[x - stride] + \
660  filter[3] * src[x ] + \
661  filter[4] * src[x + stride] + \
662  filter[5] * src[x + 2 * stride] + \
663  filter[6] * src[x + 3 * stride] + \
664  filter[7] * src[x + 4 * stride])
665 
666 static void FUNC(put_hevc_qpel_h)(int16_t *dst, ptrdiff_t dststride,
667  uint8_t *_src, ptrdiff_t _srcstride,
668  int height, intptr_t mx, intptr_t my, int width)
669 {
670  int x, y;
671  pixel *src = (pixel*)_src;
672  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
673  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
674  for (y = 0; y < height; y++) {
675  for (x = 0; x < width; x++)
676  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
677  src += srcstride;
678  dst += dststride;
679  }
680 }
681 
682 static void FUNC(put_hevc_qpel_v)(int16_t *dst, ptrdiff_t dststride,
683  uint8_t *_src, ptrdiff_t _srcstride,
684  int height, intptr_t mx, intptr_t my, int width)
685 {
686  int x, y;
687  pixel *src = (pixel*)_src;
688  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
689  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
690  for (y = 0; y < height; y++) {
691  for (x = 0; x < width; x++)
692  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
693  src += srcstride;
694  dst += dststride;
695  }
696 }
697 
698 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
699  ptrdiff_t dststride,
700  uint8_t *_src,
701  ptrdiff_t _srcstride,
702  int height, intptr_t mx,
703  intptr_t my, int width)
704 {
705  int x, y;
706  const int8_t *filter;
707  pixel *src = (pixel*)_src;
708  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
709  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
710  int16_t *tmp = tmp_array;
711 
712  src -= QPEL_EXTRA_BEFORE * srcstride;
713  filter = ff_hevc_qpel_filters[mx - 1];
714  for (y = 0; y < height + QPEL_EXTRA; y++) {
715  for (x = 0; x < width; x++)
716  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
717  src += srcstride;
718  tmp += MAX_PB_SIZE;
719  }
720 
721  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
722  filter = ff_hevc_qpel_filters[my - 1];
723  for (y = 0; y < height; y++) {
724  for (x = 0; x < width; x++)
725  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
726  tmp += MAX_PB_SIZE;
727  dst += dststride;
728  }
729 }
730 
731 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
732  uint8_t *_src, ptrdiff_t _srcstride,
733  int height, intptr_t mx, intptr_t my, int width)
734 {
735  int x, y;
736  pixel *src = (pixel*)_src;
737  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
738  pixel *dst = (pixel *)_dst;
739  ptrdiff_t dststride = _dststride / sizeof(pixel);
740  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
741  int shift = 14 - BIT_DEPTH;
742 
743 #if BIT_DEPTH < 14
744  int offset = 1 << (shift - 1);
745 #else
746  int offset = 0;
747 #endif
748 
749  for (y = 0; y < height; y++) {
750  for (x = 0; x < width; x++)
751  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
752  src += srcstride;
753  dst += dststride;
754  }
755 }
756 
757 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
758  int16_t *src2, ptrdiff_t src2stride,
759  int height, intptr_t mx, intptr_t my, int width)
760 {
761  int x, y;
762  pixel *src = (pixel*)_src;
763  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
764  pixel *dst = (pixel *)_dst;
765  ptrdiff_t dststride = _dststride / sizeof(pixel);
766 
767  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
768 
769  int shift = 14 + 1 - BIT_DEPTH;
770 #if BIT_DEPTH < 14
771  int offset = 1 << (shift - 1);
772 #else
773  int offset = 0;
774 #endif
775 
776  for (y = 0; y < height; y++) {
777  for (x = 0; x < width; x++)
778  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
779  src += srcstride;
780  dst += dststride;
781  src2 += src2stride;
782  }
783 }
784 
785 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
786  uint8_t *_src, ptrdiff_t _srcstride,
787  int height, intptr_t mx, intptr_t my, int width)
788 {
789  int x, y;
790  pixel *src = (pixel*)_src;
791  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
792  pixel *dst = (pixel *)_dst;
793  ptrdiff_t dststride = _dststride / sizeof(pixel);
794  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
795  int shift = 14 - BIT_DEPTH;
796 
797 #if BIT_DEPTH < 14
798  int offset = 1 << (shift - 1);
799 #else
800  int offset = 0;
801 #endif
802 
803  for (y = 0; y < height; y++) {
804  for (x = 0; x < width; x++)
805  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
806  src += srcstride;
807  dst += dststride;
808  }
809 }
810 
811 
812 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
813  int16_t *src2, ptrdiff_t src2stride,
814  int height, intptr_t mx, intptr_t my, int width)
815 {
816  int x, y;
817  pixel *src = (pixel*)_src;
818  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
819  pixel *dst = (pixel *)_dst;
820  ptrdiff_t dststride = _dststride / sizeof(pixel);
821 
822  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
823 
824  int shift = 14 + 1 - BIT_DEPTH;
825 #if BIT_DEPTH < 14
826  int offset = 1 << (shift - 1);
827 #else
828  int offset = 0;
829 #endif
830 
831  for (y = 0; y < height; y++) {
832  for (x = 0; x < width; x++)
833  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
834  src += srcstride;
835  dst += dststride;
836  src2 += src2stride;
837  }
838 }
839 
840 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
841  uint8_t *_src, ptrdiff_t _srcstride,
842  int height, intptr_t mx, intptr_t my, int width)
843 {
844  int x, y;
845  const int8_t *filter;
846  pixel *src = (pixel*)_src;
847  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
848  pixel *dst = (pixel *)_dst;
849  ptrdiff_t dststride = _dststride / sizeof(pixel);
850  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
851  int16_t *tmp = tmp_array;
852  int shift = 14 - BIT_DEPTH;
853 
854 #if BIT_DEPTH < 14
855  int offset = 1 << (shift - 1);
856 #else
857  int offset = 0;
858 #endif
859 
860  src -= QPEL_EXTRA_BEFORE * srcstride;
861  filter = ff_hevc_qpel_filters[mx - 1];
862  for (y = 0; y < height + QPEL_EXTRA; y++) {
863  for (x = 0; x < width; x++)
864  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
865  src += srcstride;
866  tmp += MAX_PB_SIZE;
867  }
868 
869  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
870  filter = ff_hevc_qpel_filters[my - 1];
871 
872  for (y = 0; y < height; y++) {
873  for (x = 0; x < width; x++)
874  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
875  tmp += MAX_PB_SIZE;
876  dst += dststride;
877  }
878 }
879 
880 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
881  int16_t *src2, ptrdiff_t src2stride,
882  int height, intptr_t mx, intptr_t my, int width)
883 {
884  int x, y;
885  const int8_t *filter;
886  pixel *src = (pixel*)_src;
887  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
888  pixel *dst = (pixel *)_dst;
889  ptrdiff_t dststride = _dststride / sizeof(pixel);
890  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
891  int16_t *tmp = tmp_array;
892  int shift = 14 + 1 - BIT_DEPTH;
893 #if BIT_DEPTH < 14
894  int offset = 1 << (shift - 1);
895 #else
896  int offset = 0;
897 #endif
898 
899  src -= QPEL_EXTRA_BEFORE * srcstride;
900  filter = ff_hevc_qpel_filters[mx - 1];
901  for (y = 0; y < height + QPEL_EXTRA; y++) {
902  for (x = 0; x < width; x++)
903  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
904  src += srcstride;
905  tmp += MAX_PB_SIZE;
906  }
907 
908  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
909  filter = ff_hevc_qpel_filters[my - 1];
910 
911  for (y = 0; y < height; y++) {
912  for (x = 0; x < width; x++)
913  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
914  tmp += MAX_PB_SIZE;
915  dst += dststride;
916  src2 += src2stride;
917  }
918 }
919 
920 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
921  uint8_t *_src, ptrdiff_t _srcstride,
922  int height, int denom, int wx, int ox,
923  intptr_t mx, intptr_t my, int width)
924 {
925  int x, y;
926  pixel *src = (pixel*)_src;
927  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
928  pixel *dst = (pixel *)_dst;
929  ptrdiff_t dststride = _dststride / sizeof(pixel);
930  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
931  int shift = denom + 14 - BIT_DEPTH;
932 #if BIT_DEPTH < 14
933  int offset = 1 << (shift - 1);
934 #else
935  int offset = 0;
936 #endif
937 
938  ox = ox * (1 << (BIT_DEPTH - 8));
939  for (y = 0; y < height; y++) {
940  for (x = 0; x < width; x++)
941  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
942  src += srcstride;
943  dst += dststride;
944  }
945 }
946 
947 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
948  int16_t *src2, ptrdiff_t src2stride,
949  int height, int denom, int wx0, int wx1,
950  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
951 {
952  int x, y;
953  pixel *src = (pixel*)_src;
954  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
955  pixel *dst = (pixel *)_dst;
956  ptrdiff_t dststride = _dststride / sizeof(pixel);
957 
958  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
959 
960  int shift = 14 + 1 - BIT_DEPTH;
961  int log2Wd = denom + shift - 1;
962 
963  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
964  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
965  for (y = 0; y < height; y++) {
966  for (x = 0; x < width; x++)
967  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
968  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
969  src += srcstride;
970  dst += dststride;
971  src2 += src2stride;
972  }
973 }
974 
975 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
976  uint8_t *_src, ptrdiff_t _srcstride,
977  int height, int denom, int wx, int ox,
978  intptr_t mx, intptr_t my, int width)
979 {
980  int x, y;
981  pixel *src = (pixel*)_src;
982  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
983  pixel *dst = (pixel *)_dst;
984  ptrdiff_t dststride = _dststride / sizeof(pixel);
985  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
986  int shift = denom + 14 - BIT_DEPTH;
987 #if BIT_DEPTH < 14
988  int offset = 1 << (shift - 1);
989 #else
990  int offset = 0;
991 #endif
992 
993  ox = ox * (1 << (BIT_DEPTH - 8));
994  for (y = 0; y < height; y++) {
995  for (x = 0; x < width; x++)
996  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
997  src += srcstride;
998  dst += dststride;
999  }
1000 }
1001 
1002 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1003  int16_t *src2, ptrdiff_t src2stride,
1004  int height, int denom, int wx0, int wx1,
1005  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1006 {
1007  int x, y;
1008  pixel *src = (pixel*)_src;
1009  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1010  pixel *dst = (pixel *)_dst;
1011  ptrdiff_t dststride = _dststride / sizeof(pixel);
1012 
1013  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
1014 
1015  int shift = 14 + 1 - BIT_DEPTH;
1016  int log2Wd = denom + shift - 1;
1017 
1018  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1019  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1020  for (y = 0; y < height; y++) {
1021  for (x = 0; x < width; x++)
1022  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1023  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1024  src += srcstride;
1025  dst += dststride;
1026  src2 += src2stride;
1027  }
1028 }
1029 
1030 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1031  uint8_t *_src, ptrdiff_t _srcstride,
1032  int height, int denom, int wx, int ox,
1033  intptr_t mx, intptr_t my, int width)
1034 {
1035  int x, y;
1036  const int8_t *filter;
1037  pixel *src = (pixel*)_src;
1038  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1039  pixel *dst = (pixel *)_dst;
1040  ptrdiff_t dststride = _dststride / sizeof(pixel);
1041  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1042  int16_t *tmp = tmp_array;
1043  int shift = denom + 14 - BIT_DEPTH;
1044 #if BIT_DEPTH < 14
1045  int offset = 1 << (shift - 1);
1046 #else
1047  int offset = 0;
1048 #endif
1049 
1050  src -= QPEL_EXTRA_BEFORE * srcstride;
1051  filter = ff_hevc_qpel_filters[mx - 1];
1052  for (y = 0; y < height + QPEL_EXTRA; y++) {
1053  for (x = 0; x < width; x++)
1054  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1055  src += srcstride;
1056  tmp += MAX_PB_SIZE;
1057  }
1058 
1059  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1060  filter = ff_hevc_qpel_filters[my - 1];
1061 
1062  ox = ox * (1 << (BIT_DEPTH - 8));
1063  for (y = 0; y < height; y++) {
1064  for (x = 0; x < width; x++)
1065  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1066  tmp += MAX_PB_SIZE;
1067  dst += dststride;
1068  }
1069 }
1070 
1071 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1072  int16_t *src2, ptrdiff_t src2stride,
1073  int height, int denom, int wx0, int wx1,
1074  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1075 {
1076  int x, y;
1077  const int8_t *filter;
1078  pixel *src = (pixel*)_src;
1079  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1080  pixel *dst = (pixel *)_dst;
1081  ptrdiff_t dststride = _dststride / sizeof(pixel);
1082  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1083  int16_t *tmp = tmp_array;
1084  int shift = 14 + 1 - BIT_DEPTH;
1085  int log2Wd = denom + shift - 1;
1086 
1087  src -= QPEL_EXTRA_BEFORE * srcstride;
1088  filter = ff_hevc_qpel_filters[mx - 1];
1089  for (y = 0; y < height + QPEL_EXTRA; y++) {
1090  for (x = 0; x < width; x++)
1091  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1092  src += srcstride;
1093  tmp += MAX_PB_SIZE;
1094  }
1095 
1096  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1097  filter = ff_hevc_qpel_filters[my - 1];
1098 
1099  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1100  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1101  for (y = 0; y < height; y++) {
1102  for (x = 0; x < width; x++)
1103  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1104  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1105  tmp += MAX_PB_SIZE;
1106  dst += dststride;
1107  src2 += src2stride;
1108  }
1109 }
1110 
1111 ////////////////////////////////////////////////////////////////////////////////
1112 //
1113 ////////////////////////////////////////////////////////////////////////////////
1114 #define EPEL_FILTER(src, stride) \
1115  (filter[0] * src[x - stride] + \
1116  filter[1] * src[x] + \
1117  filter[2] * src[x + stride] + \
1118  filter[3] * src[x + 2 * stride])
1119 
1120 static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
1121  uint8_t *_src, ptrdiff_t _srcstride,
1122  int height, intptr_t mx, intptr_t my, int width)
1123 {
1124  int x, y;
1125  pixel *src = (pixel *)_src;
1126  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1127  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1128  for (y = 0; y < height; y++) {
1129  for (x = 0; x < width; x++)
1130  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1131  src += srcstride;
1132  dst += dststride;
1133  }
1134 }
1135 
1136 static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
1137  uint8_t *_src, ptrdiff_t _srcstride,
1138  int height, intptr_t mx, intptr_t my, int width)
1139 {
1140  int x, y;
1141  pixel *src = (pixel *)_src;
1142  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1143  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1144 
1145  for (y = 0; y < height; y++) {
1146  for (x = 0; x < width; x++)
1147  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1148  src += srcstride;
1149  dst += dststride;
1150  }
1151 }
1152 
1153 static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
1154  uint8_t *_src, ptrdiff_t _srcstride,
1155  int height, intptr_t mx, intptr_t my, int width)
1156 {
1157  int x, y;
1158  pixel *src = (pixel *)_src;
1159  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1160  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1161  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1162  int16_t *tmp = tmp_array;
1163 
1164  src -= EPEL_EXTRA_BEFORE * srcstride;
1165 
1166  for (y = 0; y < height + EPEL_EXTRA; y++) {
1167  for (x = 0; x < width; x++)
1168  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1169  src += srcstride;
1170  tmp += MAX_PB_SIZE;
1171  }
1172 
1173  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1174  filter = ff_hevc_epel_filters[my - 1];
1175 
1176  for (y = 0; y < height; y++) {
1177  for (x = 0; x < width; x++)
1178  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1179  tmp += MAX_PB_SIZE;
1180  dst += dststride;
1181  }
1182 }
1183 
1184 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1185  int height, intptr_t mx, intptr_t my, int width)
1186 {
1187  int x, y;
1188  pixel *src = (pixel *)_src;
1189  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1190  pixel *dst = (pixel *)_dst;
1191  ptrdiff_t dststride = _dststride / sizeof(pixel);
1192  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1193  int shift = 14 - BIT_DEPTH;
1194 #if BIT_DEPTH < 14
1195  int offset = 1 << (shift - 1);
1196 #else
1197  int offset = 0;
1198 #endif
1199 
1200  for (y = 0; y < height; y++) {
1201  for (x = 0; x < width; x++)
1202  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1203  src += srcstride;
1204  dst += dststride;
1205  }
1206 }
1207 
1208 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1209  int16_t *src2, ptrdiff_t src2stride,
1210  int height, intptr_t mx, intptr_t my, int width)
1211 {
1212  int x, y;
1213  pixel *src = (pixel *)_src;
1214  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1215  pixel *dst = (pixel *)_dst;
1216  ptrdiff_t dststride = _dststride / sizeof(pixel);
1217  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1218  int shift = 14 + 1 - BIT_DEPTH;
1219 #if BIT_DEPTH < 14
1220  int offset = 1 << (shift - 1);
1221 #else
1222  int offset = 0;
1223 #endif
1224 
1225  for (y = 0; y < height; y++) {
1226  for (x = 0; x < width; x++) {
1227  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1228  }
1229  dst += dststride;
1230  src += srcstride;
1231  src2 += src2stride;
1232  }
1233 }
1234 
1235 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1236  int height, intptr_t mx, intptr_t my, int width)
1237 {
1238  int x, y;
1239  pixel *src = (pixel *)_src;
1240  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1241  pixel *dst = (pixel *)_dst;
1242  ptrdiff_t dststride = _dststride / sizeof(pixel);
1243  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1244  int shift = 14 - BIT_DEPTH;
1245 #if BIT_DEPTH < 14
1246  int offset = 1 << (shift - 1);
1247 #else
1248  int offset = 0;
1249 #endif
1250 
1251  for (y = 0; y < height; y++) {
1252  for (x = 0; x < width; x++)
1253  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1254  src += srcstride;
1255  dst += dststride;
1256  }
1257 }
1258 
1259 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1260  int16_t *src2, ptrdiff_t src2stride,
1261  int height, intptr_t mx, intptr_t my, int width)
1262 {
1263  int x, y;
1264  pixel *src = (pixel *)_src;
1265  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1266  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1267  pixel *dst = (pixel *)_dst;
1268  ptrdiff_t dststride = _dststride / sizeof(pixel);
1269  int shift = 14 + 1 - BIT_DEPTH;
1270 #if BIT_DEPTH < 14
1271  int offset = 1 << (shift - 1);
1272 #else
1273  int offset = 0;
1274 #endif
1275 
1276  for (y = 0; y < height; y++) {
1277  for (x = 0; x < width; x++)
1278  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1279  dst += dststride;
1280  src += srcstride;
1281  src2 += src2stride;
1282  }
1283 }
1284 
1285 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1286  int height, intptr_t mx, intptr_t my, int width)
1287 {
1288  int x, y;
1289  pixel *src = (pixel *)_src;
1290  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1291  pixel *dst = (pixel *)_dst;
1292  ptrdiff_t dststride = _dststride / sizeof(pixel);
1293  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1294  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1295  int16_t *tmp = tmp_array;
1296  int shift = 14 - BIT_DEPTH;
1297 #if BIT_DEPTH < 14
1298  int offset = 1 << (shift - 1);
1299 #else
1300  int offset = 0;
1301 #endif
1302 
1303  src -= EPEL_EXTRA_BEFORE * srcstride;
1304 
1305  for (y = 0; y < height + EPEL_EXTRA; y++) {
1306  for (x = 0; x < width; x++)
1307  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1308  src += srcstride;
1309  tmp += MAX_PB_SIZE;
1310  }
1311 
1312  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1313  filter = ff_hevc_epel_filters[my - 1];
1314 
1315  for (y = 0; y < height; y++) {
1316  for (x = 0; x < width; x++)
1317  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1318  tmp += MAX_PB_SIZE;
1319  dst += dststride;
1320  }
1321 }
1322 
1323 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1324  int16_t *src2, ptrdiff_t src2stride,
1325  int height, intptr_t mx, intptr_t my, int width)
1326 {
1327  int x, y;
1328  pixel *src = (pixel *)_src;
1329  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1330  pixel *dst = (pixel *)_dst;
1331  ptrdiff_t dststride = _dststride / sizeof(pixel);
1332  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1333  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1334  int16_t *tmp = tmp_array;
1335  int shift = 14 + 1 - BIT_DEPTH;
1336 #if BIT_DEPTH < 14
1337  int offset = 1 << (shift - 1);
1338 #else
1339  int offset = 0;
1340 #endif
1341 
1342  src -= EPEL_EXTRA_BEFORE * srcstride;
1343 
1344  for (y = 0; y < height + EPEL_EXTRA; y++) {
1345  for (x = 0; x < width; x++)
1346  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1347  src += srcstride;
1348  tmp += MAX_PB_SIZE;
1349  }
1350 
1351  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1352  filter = ff_hevc_epel_filters[my - 1];
1353 
1354  for (y = 0; y < height; y++) {
1355  for (x = 0; x < width; x++)
1356  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1357  tmp += MAX_PB_SIZE;
1358  dst += dststride;
1359  src2 += src2stride;
1360  }
1361 }
1362 
1363 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1364  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1365 {
1366  int x, y;
1367  pixel *src = (pixel *)_src;
1368  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1369  pixel *dst = (pixel *)_dst;
1370  ptrdiff_t dststride = _dststride / sizeof(pixel);
1371  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1372  int shift = denom + 14 - BIT_DEPTH;
1373 #if BIT_DEPTH < 14
1374  int offset = 1 << (shift - 1);
1375 #else
1376  int offset = 0;
1377 #endif
1378 
1379  ox = ox * (1 << (BIT_DEPTH - 8));
1380  for (y = 0; y < height; y++) {
1381  for (x = 0; x < width; x++) {
1382  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1383  }
1384  dst += dststride;
1385  src += srcstride;
1386  }
1387 }
1388 
1389 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1390  int16_t *src2, ptrdiff_t src2stride,
1391  int height, int denom, int wx0, int wx1,
1392  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1393 {
1394  int x, y;
1395  pixel *src = (pixel *)_src;
1396  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1397  pixel *dst = (pixel *)_dst;
1398  ptrdiff_t dststride = _dststride / sizeof(pixel);
1399  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1400  int shift = 14 + 1 - BIT_DEPTH;
1401  int log2Wd = denom + shift - 1;
1402 
1403  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1404  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1405  for (y = 0; y < height; y++) {
1406  for (x = 0; x < width; x++)
1407  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1408  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1409  src += srcstride;
1410  dst += dststride;
1411  src2 += src2stride;
1412  }
1413 }
1414 
1415 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1416  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1417 {
1418  int x, y;
1419  pixel *src = (pixel *)_src;
1420  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1421  pixel *dst = (pixel *)_dst;
1422  ptrdiff_t dststride = _dststride / sizeof(pixel);
1423  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1424  int shift = denom + 14 - BIT_DEPTH;
1425 #if BIT_DEPTH < 14
1426  int offset = 1 << (shift - 1);
1427 #else
1428  int offset = 0;
1429 #endif
1430 
1431  ox = ox * (1 << (BIT_DEPTH - 8));
1432  for (y = 0; y < height; y++) {
1433  for (x = 0; x < width; x++) {
1434  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1435  }
1436  dst += dststride;
1437  src += srcstride;
1438  }
1439 }
1440 
1441 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1442  int16_t *src2, ptrdiff_t src2stride,
1443  int height, int denom, int wx0, int wx1,
1444  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1445 {
1446  int x, y;
1447  pixel *src = (pixel *)_src;
1448  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1449  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1450  pixel *dst = (pixel *)_dst;
1451  ptrdiff_t dststride = _dststride / sizeof(pixel);
1452  int shift = 14 + 1 - BIT_DEPTH;
1453  int log2Wd = denom + shift - 1;
1454 
1455  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1456  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1457  for (y = 0; y < height; y++) {
1458  for (x = 0; x < width; x++)
1459  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1460  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1461  src += srcstride;
1462  dst += dststride;
1463  src2 += src2stride;
1464  }
1465 }
1466 
1467 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1468  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1469 {
1470  int x, y;
1471  pixel *src = (pixel *)_src;
1472  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1473  pixel *dst = (pixel *)_dst;
1474  ptrdiff_t dststride = _dststride / sizeof(pixel);
1475  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1476  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1477  int16_t *tmp = tmp_array;
1478  int shift = denom + 14 - BIT_DEPTH;
1479 #if BIT_DEPTH < 14
1480  int offset = 1 << (shift - 1);
1481 #else
1482  int offset = 0;
1483 #endif
1484 
1485  src -= EPEL_EXTRA_BEFORE * srcstride;
1486 
1487  for (y = 0; y < height + EPEL_EXTRA; y++) {
1488  for (x = 0; x < width; x++)
1489  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1490  src += srcstride;
1491  tmp += MAX_PB_SIZE;
1492  }
1493 
1494  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1495  filter = ff_hevc_epel_filters[my - 1];
1496 
1497  ox = ox * (1 << (BIT_DEPTH - 8));
1498  for (y = 0; y < height; y++) {
1499  for (x = 0; x < width; x++)
1500  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1501  tmp += MAX_PB_SIZE;
1502  dst += dststride;
1503  }
1504 }
1505 
1506 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1507  int16_t *src2, ptrdiff_t src2stride,
1508  int height, int denom, int wx0, int wx1,
1509  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1510 {
1511  int x, y;
1512  pixel *src = (pixel *)_src;
1513  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1514  pixel *dst = (pixel *)_dst;
1515  ptrdiff_t dststride = _dststride / sizeof(pixel);
1516  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1517  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1518  int16_t *tmp = tmp_array;
1519  int shift = 14 + 1 - BIT_DEPTH;
1520  int log2Wd = denom + shift - 1;
1521 
1522  src -= EPEL_EXTRA_BEFORE * srcstride;
1523 
1524  for (y = 0; y < height + EPEL_EXTRA; y++) {
1525  for (x = 0; x < width; x++)
1526  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1527  src += srcstride;
1528  tmp += MAX_PB_SIZE;
1529  }
1530 
1531  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1532  filter = ff_hevc_epel_filters[my - 1];
1533 
1534  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1535  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1536  for (y = 0; y < height; y++) {
1537  for (x = 0; x < width; x++)
1538  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1539  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1540  tmp += MAX_PB_SIZE;
1541  dst += dststride;
1542  src2 += src2stride;
1543  }
1544 }// line zero
1545 #define P3 pix[-4 * xstride]
1546 #define P2 pix[-3 * xstride]
1547 #define P1 pix[-2 * xstride]
1548 #define P0 pix[-1 * xstride]
1549 #define Q0 pix[0 * xstride]
1550 #define Q1 pix[1 * xstride]
1551 #define Q2 pix[2 * xstride]
1552 #define Q3 pix[3 * xstride]
1553 
1554 // line three. used only for deblocking decision
1555 #define TP3 pix[-4 * xstride + 3 * ystride]
1556 #define TP2 pix[-3 * xstride + 3 * ystride]
1557 #define TP1 pix[-2 * xstride + 3 * ystride]
1558 #define TP0 pix[-1 * xstride + 3 * ystride]
1559 #define TQ0 pix[0 * xstride + 3 * ystride]
1560 #define TQ1 pix[1 * xstride + 3 * ystride]
1561 #define TQ2 pix[2 * xstride + 3 * ystride]
1562 #define TQ3 pix[3 * xstride + 3 * ystride]
1563 
1565  ptrdiff_t _xstride, ptrdiff_t _ystride,
1566  int *_beta, int *_tc,
1567  uint8_t *_no_p, uint8_t *_no_q)
1568 {
1569  int d, j;
1570  pixel *pix = (pixel *)_pix;
1571  ptrdiff_t xstride = _xstride / sizeof(pixel);
1572  ptrdiff_t ystride = _ystride / sizeof(pixel);
1573 
1574  for (j = 0; j < 2; j++) {
1575  const int dp0 = abs(P2 - 2 * P1 + P0);
1576  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1577  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1578  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1579  const int d0 = dp0 + dq0;
1580  const int d3 = dp3 + dq3;
1581  const int beta = _beta[j] << (BIT_DEPTH - 8);
1582  const int tc = _tc[j] << (BIT_DEPTH - 8);
1583  const int no_p = _no_p[j];
1584  const int no_q = _no_q[j];
1585 
1586  if (d0 + d3 >= beta) {
1587  pix += 4 * ystride;
1588  continue;
1589  } else {
1590  const int beta_3 = beta >> 3;
1591  const int beta_2 = beta >> 2;
1592  const int tc25 = ((tc * 5 + 1) >> 1);
1593 
1594  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1595  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1596  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1597  // strong filtering
1598  const int tc2 = tc << 1;
1599  for (d = 0; d < 4; d++) {
1600  const int p3 = P3;
1601  const int p2 = P2;
1602  const int p1 = P1;
1603  const int p0 = P0;
1604  const int q0 = Q0;
1605  const int q1 = Q1;
1606  const int q2 = Q2;
1607  const int q3 = Q3;
1608  if (!no_p) {
1609  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1610  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1611  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1612  }
1613  if (!no_q) {
1614  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1615  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1616  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1617  }
1618  pix += ystride;
1619  }
1620  } else { // normal filtering
1621  int nd_p = 1;
1622  int nd_q = 1;
1623  const int tc_2 = tc >> 1;
1624  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1625  nd_p = 2;
1626  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1627  nd_q = 2;
1628 
1629  for (d = 0; d < 4; d++) {
1630  const int p2 = P2;
1631  const int p1 = P1;
1632  const int p0 = P0;
1633  const int q0 = Q0;
1634  const int q1 = Q1;
1635  const int q2 = Q2;
1636  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1637  if (abs(delta0) < 10 * tc) {
1638  delta0 = av_clip(delta0, -tc, tc);
1639  if (!no_p)
1640  P0 = av_clip_pixel(p0 + delta0);
1641  if (!no_q)
1642  Q0 = av_clip_pixel(q0 - delta0);
1643  if (!no_p && nd_p > 1) {
1644  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1645  P1 = av_clip_pixel(p1 + deltap1);
1646  }
1647  if (!no_q && nd_q > 1) {
1648  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1649  Q1 = av_clip_pixel(q1 + deltaq1);
1650  }
1651  }
1652  pix += ystride;
1653  }
1654  }
1655  }
1656  }
1657 }
1658 
1659 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1660  ptrdiff_t _ystride, int *_tc,
1661  uint8_t *_no_p, uint8_t *_no_q)
1662 {
1663  int d, j, no_p, no_q;
1664  pixel *pix = (pixel *)_pix;
1665  ptrdiff_t xstride = _xstride / sizeof(pixel);
1666  ptrdiff_t ystride = _ystride / sizeof(pixel);
1667 
1668  for (j = 0; j < 2; j++) {
1669  const int tc = _tc[j] << (BIT_DEPTH - 8);
1670  if (tc <= 0) {
1671  pix += 4 * ystride;
1672  continue;
1673  }
1674  no_p = _no_p[j];
1675  no_q = _no_q[j];
1676 
1677  for (d = 0; d < 4; d++) {
1678  int delta0;
1679  const int p1 = P1;
1680  const int p0 = P0;
1681  const int q0 = Q0;
1682  const int q1 = Q1;
1683  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1684  if (!no_p)
1685  P0 = av_clip_pixel(p0 + delta0);
1686  if (!no_q)
1687  Q0 = av_clip_pixel(q0 - delta0);
1688  pix += ystride;
1689  }
1690  }
1691 }
1692 
1693 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1694  int *tc, uint8_t *no_p,
1695  uint8_t *no_q)
1696 {
1697  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1698 }
1699 
1700 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1701  int *tc, uint8_t *no_p,
1702  uint8_t *no_q)
1703 {
1704  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1705 }
1706 
1707 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1708  int *beta, int *tc, uint8_t *no_p,
1709  uint8_t *no_q)
1710 {
1711  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1712  beta, tc, no_p, no_q);
1713 }
1714 
1715 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1716  int *beta, int *tc, uint8_t *no_p,
1717  uint8_t *no_q)
1718 {
1719  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1720  beta, tc, no_p, no_q);
1721 }
1722 
1723 #undef P3
1724 #undef P2
1725 #undef P1
1726 #undef P0
1727 #undef Q0
1728 #undef Q1
1729 #undef Q2
1730 #undef Q3
1731 
1732 #undef TP3
1733 #undef TP2
1734 #undef TP1
1735 #undef TP0
1736 #undef TQ0
1737 #undef TQ1
1738 #undef TQ2
1739 #undef TQ3