00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024
00025 #include "rangecoder.h"
00026 #include "mathops.h"
00027
00028 #include "mpegvideo.h"
00029
00030 #undef NDEBUG
00031 #include <assert.h>
00032
00033 static const int8_t quant3[256]={
00034 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00049 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00050 };
00051 static const int8_t quant3b[256]={
00052 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00068 };
00069 static const int8_t quant3bA[256]={
00070 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00086 };
00087 static const int8_t quant5[256]={
00088 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00104 };
00105 static const int8_t quant7[256]={
00106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00122 };
00123 static const int8_t quant9[256]={
00124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00140 };
00141 static const int8_t quant11[256]={
00142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00158 };
00159 static const int8_t quant13[256]={
00160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00176 };
00177
00178 #if 0 //64*cubic
00179 static const uint8_t obmc32[1024]={
00180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00212
00213 };
00214 static const uint8_t obmc16[256]={
00215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00231
00232 };
00233 #elif 1 // 64*linear
00234 static const uint8_t obmc32[1024]={
00235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00267
00268 };
00269 static const uint8_t obmc16[256]={
00270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00286
00287 };
00288 #else //64*cos
00289 static const uint8_t obmc32[1024]={
00290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00322
00323 };
00324 static const uint8_t obmc16[256]={
00325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00341
00342 };
00343 #endif
00344
00345
00346 static const uint8_t obmc8[64]={
00347 4, 12, 20, 28, 28, 20, 12, 4,
00348 12, 36, 60, 84, 84, 60, 36, 12,
00349 20, 60,100,140,140,100, 60, 20,
00350 28, 84,140,196,196,140, 84, 28,
00351 28, 84,140,196,196,140, 84, 28,
00352 20, 60,100,140,140,100, 60, 20,
00353 12, 36, 60, 84, 84, 60, 36, 12,
00354 4, 12, 20, 28, 28, 20, 12, 4,
00355
00356 };
00357
00358
00359 static const uint8_t obmc4[16]={
00360 16, 48, 48, 16,
00361 48,144,144, 48,
00362 48,144,144, 48,
00363 16, 48, 48, 16,
00364
00365 };
00366
00367 static const uint8_t * const obmc_tab[4]={
00368 obmc32, obmc16, obmc8, obmc4
00369 };
00370
00371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00372
00373 typedef struct BlockNode{
00374 int16_t mx;
00375 int16_t my;
00376 uint8_t ref;
00377 uint8_t color[3];
00378 uint8_t type;
00379
00380 #define BLOCK_INTRA 1
00381 #define BLOCK_OPT 2
00382
00383 uint8_t level;
00384 }BlockNode;
00385
00386 static const BlockNode null_block= {
00387 .color= {128,128,128},
00388 .mx= 0,
00389 .my= 0,
00390 .ref= 0,
00391 .type= 0,
00392 .level= 0,
00393 };
00394
00395 #define LOG2_MB_SIZE 4
00396 #define MB_SIZE (1<<LOG2_MB_SIZE)
00397 #define ENCODER_EXTRA_BITS 4
00398 #define HTAPS_MAX 8
00399
00400 typedef struct x_and_coeff{
00401 int16_t x;
00402 uint16_t coeff;
00403 } x_and_coeff;
00404
00405 typedef struct SubBand{
00406 int level;
00407 int stride;
00408 int width;
00409 int height;
00410 int qlog;
00411 DWTELEM *buf;
00412 IDWTELEM *ibuf;
00413 int buf_x_offset;
00414 int buf_y_offset;
00415 int stride_line;
00416 x_and_coeff * x_coeff;
00417 struct SubBand *parent;
00418 uint8_t state[ 7 + 512][32];
00419 }SubBand;
00420
00421 typedef struct Plane{
00422 int width;
00423 int height;
00424 SubBand band[MAX_DECOMPOSITIONS][4];
00425
00426 int htaps;
00427 int8_t hcoeff[HTAPS_MAX/2];
00428 int diag_mc;
00429 int fast_mc;
00430
00431 int last_htaps;
00432 int8_t last_hcoeff[HTAPS_MAX/2];
00433 int last_diag_mc;
00434 }Plane;
00435
00436 typedef struct SnowContext{
00437
00438
00439 AVCodecContext *avctx;
00440 RangeCoder c;
00441 DSPContext dsp;
00442 AVFrame new_picture;
00443 AVFrame input_picture;
00444 AVFrame current_picture;
00445 AVFrame last_picture[MAX_REF_FRAMES];
00446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00447 AVFrame mconly_picture;
00448
00449 uint8_t header_state[32];
00450 uint8_t block_state[128 + 32*128];
00451 int keyframe;
00452 int always_reset;
00453 int version;
00454 int spatial_decomposition_type;
00455 int last_spatial_decomposition_type;
00456 int temporal_decomposition_type;
00457 int spatial_decomposition_count;
00458 int last_spatial_decomposition_count;
00459 int temporal_decomposition_count;
00460 int max_ref_frames;
00461 int ref_frames;
00462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00463 uint32_t *ref_scores[MAX_REF_FRAMES];
00464 DWTELEM *spatial_dwt_buffer;
00465 IDWTELEM *spatial_idwt_buffer;
00466 int colorspace_type;
00467 int chroma_h_shift;
00468 int chroma_v_shift;
00469 int spatial_scalability;
00470 int qlog;
00471 int last_qlog;
00472 int lambda;
00473 int lambda2;
00474 int pass1_rc;
00475 int mv_scale;
00476 int last_mv_scale;
00477 int qbias;
00478 int last_qbias;
00479 #define QBIAS_SHIFT 3
00480 int b_width;
00481 int b_height;
00482 int block_max_depth;
00483 int last_block_max_depth;
00484 Plane plane[MAX_PLANES];
00485 BlockNode *block;
00486 #define ME_CACHE_SIZE 1024
00487 int me_cache[ME_CACHE_SIZE];
00488 int me_cache_generation;
00489 slice_buffer sb;
00490
00491 MpegEncContext m;
00492
00493 uint8_t *scratchbuf;
00494 }SnowContext;
00495
00496 typedef struct {
00497 IDWTELEM *b0;
00498 IDWTELEM *b1;
00499 IDWTELEM *b2;
00500 IDWTELEM *b3;
00501 int y;
00502 } DWTCompose;
00503
00504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00505
00506
00507 static void iterative_me(SnowContext *s);
00508
00509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00510 {
00511 int i;
00512
00513 buf->base_buffer = base_buffer;
00514 buf->line_count = line_count;
00515 buf->line_width = line_width;
00516 buf->data_count = max_allocated_lines;
00517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00519
00520 for(i = 0; i < max_allocated_lines; i++){
00521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00522 }
00523
00524 buf->data_stack_top = max_allocated_lines - 1;
00525 }
00526
00527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00528 {
00529 int offset;
00530 IDWTELEM * buffer;
00531
00532 assert(buf->data_stack_top >= 0);
00533
00534 if (buf->line[line])
00535 return buf->line[line];
00536
00537 offset = buf->line_width * line;
00538 buffer = buf->data_stack[buf->data_stack_top];
00539 buf->data_stack_top--;
00540 buf->line[line] = buffer;
00541
00542 return buffer;
00543 }
00544
00545 static void slice_buffer_release(slice_buffer * buf, int line)
00546 {
00547 int offset;
00548 IDWTELEM * buffer;
00549
00550 assert(line >= 0 && line < buf->line_count);
00551 assert(buf->line[line]);
00552
00553 offset = buf->line_width * line;
00554 buffer = buf->line[line];
00555 buf->data_stack_top++;
00556 buf->data_stack[buf->data_stack_top] = buffer;
00557 buf->line[line] = NULL;
00558 }
00559
00560 static void slice_buffer_flush(slice_buffer * buf)
00561 {
00562 int i;
00563 for(i = 0; i < buf->line_count; i++){
00564 if (buf->line[i])
00565 slice_buffer_release(buf, i);
00566 }
00567 }
00568
00569 static void slice_buffer_destroy(slice_buffer * buf)
00570 {
00571 int i;
00572 slice_buffer_flush(buf);
00573
00574 for(i = buf->data_count - 1; i >= 0; i--){
00575 av_freep(&buf->data_stack[i]);
00576 }
00577 av_freep(&buf->data_stack);
00578 av_freep(&buf->line);
00579 }
00580
00581 #ifdef __sgi
00582
00583 #undef qexp
00584 #endif
00585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00586 static uint8_t qexp[QROOT];
00587
00588 static inline int mirror(int v, int m){
00589 while((unsigned)v > (unsigned)m){
00590 v=-v;
00591 if(v<0) v+= 2*m;
00592 }
00593 return v;
00594 }
00595
00596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00597 int i;
00598
00599 if(v){
00600 const int a= FFABS(v);
00601 const int e= av_log2(a);
00602 #if 1
00603 const int el= FFMIN(e, 10);
00604 put_rac(c, state+0, 0);
00605
00606 for(i=0; i<el; i++){
00607 put_rac(c, state+1+i, 1);
00608 }
00609 for(; i<e; i++){
00610 put_rac(c, state+1+9, 1);
00611 }
00612 put_rac(c, state+1+FFMIN(i,9), 0);
00613
00614 for(i=e-1; i>=el; i--){
00615 put_rac(c, state+22+9, (a>>i)&1);
00616 }
00617 for(; i>=0; i--){
00618 put_rac(c, state+22+i, (a>>i)&1);
00619 }
00620
00621 if(is_signed)
00622 put_rac(c, state+11 + el, v < 0);
00623 #else
00624
00625 put_rac(c, state+0, 0);
00626 if(e<=9){
00627 for(i=0; i<e; i++){
00628 put_rac(c, state+1+i, 1);
00629 }
00630 put_rac(c, state+1+i, 0);
00631
00632 for(i=e-1; i>=0; i--){
00633 put_rac(c, state+22+i, (a>>i)&1);
00634 }
00635
00636 if(is_signed)
00637 put_rac(c, state+11 + e, v < 0);
00638 }else{
00639 for(i=0; i<e; i++){
00640 put_rac(c, state+1+FFMIN(i,9), 1);
00641 }
00642 put_rac(c, state+1+FFMIN(i,9), 0);
00643
00644 for(i=e-1; i>=0; i--){
00645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1);
00646 }
00647
00648 if(is_signed)
00649 put_rac(c, state+11 + FFMIN(e,10), v < 0);
00650 }
00651 #endif
00652 }else{
00653 put_rac(c, state+0, 1);
00654 }
00655 }
00656
00657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00658 if(get_rac(c, state+0))
00659 return 0;
00660 else{
00661 int i, e, a;
00662 e= 0;
00663 while(get_rac(c, state+1 + FFMIN(e,9))){
00664 e++;
00665 }
00666
00667 a= 1;
00668 for(i=e-1; i>=0; i--){
00669 a += a + get_rac(c, state+22 + FFMIN(i,9));
00670 }
00671
00672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10)))
00673 return -a;
00674 else
00675 return a;
00676 }
00677 }
00678
00679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00680 int i;
00681 int r= log2>=0 ? 1<<log2 : 1;
00682
00683 assert(v>=0);
00684 assert(log2>=-4);
00685
00686 while(v >= r){
00687 put_rac(c, state+4+log2, 1);
00688 v -= r;
00689 log2++;
00690 if(log2>0) r+=r;
00691 }
00692 put_rac(c, state+4+log2, 0);
00693
00694 for(i=log2-1; i>=0; i--){
00695 put_rac(c, state+31-i, (v>>i)&1);
00696 }
00697 }
00698
00699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00700 int i;
00701 int r= log2>=0 ? 1<<log2 : 1;
00702 int v=0;
00703
00704 assert(log2>=-4);
00705
00706 while(get_rac(c, state+4+log2)){
00707 v+= r;
00708 log2++;
00709 if(log2>0) r+=r;
00710 }
00711
00712 for(i=log2-1; i>=0; i--){
00713 v+= get_rac(c, state+31-i)<<i;
00714 }
00715
00716 return v;
00717 }
00718
00719 static av_always_inline void
00720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00721 int dst_step, int src_step, int ref_step,
00722 int width, int mul, int add, int shift,
00723 int highpass, int inverse){
00724 const int mirror_left= !highpass;
00725 const int mirror_right= (width&1) ^ highpass;
00726 const int w= (width>>1) - 1 + (highpass & width);
00727 int i;
00728
00729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00730 if(mirror_left){
00731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00732 dst += dst_step;
00733 src += src_step;
00734 }
00735
00736 for(i=0; i<w; i++){
00737 dst[i*dst_step] =
00738 LIFT(src[i*src_step],
00739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00740 inverse);
00741 }
00742
00743 if(mirror_right){
00744 dst[w*dst_step] =
00745 LIFT(src[w*src_step],
00746 ((mul*2*ref[w*ref_step]+add)>>shift),
00747 inverse);
00748 }
00749 }
00750
00751 static av_always_inline void
00752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00753 int dst_step, int src_step, int ref_step,
00754 int width, int mul, int add, int shift,
00755 int highpass, int inverse){
00756 const int mirror_left= !highpass;
00757 const int mirror_right= (width&1) ^ highpass;
00758 const int w= (width>>1) - 1 + (highpass & width);
00759 int i;
00760
00761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00762 if(mirror_left){
00763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00764 dst += dst_step;
00765 src += src_step;
00766 }
00767
00768 for(i=0; i<w; i++){
00769 dst[i*dst_step] =
00770 LIFT(src[i*src_step],
00771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00772 inverse);
00773 }
00774
00775 if(mirror_right){
00776 dst[w*dst_step] =
00777 LIFT(src[w*src_step],
00778 ((mul*2*ref[w*ref_step]+add)>>shift),
00779 inverse);
00780 }
00781 }
00782
00783 #ifndef liftS
00784 static av_always_inline void
00785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00786 int dst_step, int src_step, int ref_step,
00787 int width, int mul, int add, int shift,
00788 int highpass, int inverse){
00789 const int mirror_left= !highpass;
00790 const int mirror_right= (width&1) ^ highpass;
00791 const int w= (width>>1) - 1 + (highpass & width);
00792 int i;
00793
00794 assert(shift == 4);
00795 #define LIFTS(src, ref, inv) \
00796 ((inv) ? \
00797 (src) + (((ref) + 4*(src))>>shift): \
00798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00799 if(mirror_left){
00800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00801 dst += dst_step;
00802 src += src_step;
00803 }
00804
00805 for(i=0; i<w; i++){
00806 dst[i*dst_step] =
00807 LIFTS(src[i*src_step],
00808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00809 inverse);
00810 }
00811
00812 if(mirror_right){
00813 dst[w*dst_step] =
00814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00815 }
00816 }
00817 static av_always_inline void
00818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00819 int dst_step, int src_step, int ref_step,
00820 int width, int mul, int add, int shift,
00821 int highpass, int inverse){
00822 const int mirror_left= !highpass;
00823 const int mirror_right= (width&1) ^ highpass;
00824 const int w= (width>>1) - 1 + (highpass & width);
00825 int i;
00826
00827 assert(shift == 4);
00828 #define LIFTS(src, ref, inv) \
00829 ((inv) ? \
00830 (src) + (((ref) + 4*(src))>>shift): \
00831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00832 if(mirror_left){
00833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00834 dst += dst_step;
00835 src += src_step;
00836 }
00837
00838 for(i=0; i<w; i++){
00839 dst[i*dst_step] =
00840 LIFTS(src[i*src_step],
00841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00842 inverse);
00843 }
00844
00845 if(mirror_right){
00846 dst[w*dst_step] =
00847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00848 }
00849 }
00850 #endif
00851
00852 static void horizontal_decompose53i(DWTELEM *b, int width){
00853 DWTELEM temp[width];
00854 const int width2= width>>1;
00855 int x;
00856 const int w2= (width+1)>>1;
00857
00858 for(x=0; x<width2; x++){
00859 temp[x ]= b[2*x ];
00860 temp[x+w2]= b[2*x + 1];
00861 }
00862 if(width&1)
00863 temp[x ]= b[2*x ];
00864 #if 0
00865 {
00866 int A1,A2,A3,A4;
00867 A2= temp[1 ];
00868 A4= temp[0 ];
00869 A1= temp[0+width2];
00870 A1 -= (A2 + A4)>>1;
00871 A4 += (A1 + 1)>>1;
00872 b[0+width2] = A1;
00873 b[0 ] = A4;
00874 for(x=1; x+1<width2; x+=2){
00875 A3= temp[x+width2];
00876 A4= temp[x+1 ];
00877 A3 -= (A2 + A4)>>1;
00878 A2 += (A1 + A3 + 2)>>2;
00879 b[x+width2] = A3;
00880 b[x ] = A2;
00881
00882 A1= temp[x+1+width2];
00883 A2= temp[x+2 ];
00884 A1 -= (A2 + A4)>>1;
00885 A4 += (A1 + A3 + 2)>>2;
00886 b[x+1+width2] = A1;
00887 b[x+1 ] = A4;
00888 }
00889 A3= temp[width-1];
00890 A3 -= A2;
00891 A2 += (A1 + A3 + 2)>>2;
00892 b[width -1] = A3;
00893 b[width2-1] = A2;
00894 }
00895 #else
00896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
00898 #endif
00899 }
00900
00901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00902 int i;
00903
00904 for(i=0; i<width; i++){
00905 b1[i] -= (b0[i] + b2[i])>>1;
00906 }
00907 }
00908
00909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00910 int i;
00911
00912 for(i=0; i<width; i++){
00913 b1[i] += (b0[i] + b2[i] + 2)>>2;
00914 }
00915 }
00916
00917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00918 int y;
00919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
00921
00922 for(y=-2; y<height; y+=2){
00923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00925
00926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00928
00929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00931
00932 b0=b2;
00933 b1=b3;
00934 }
00935 }
00936
00937 static void horizontal_decompose97i(DWTELEM *b, int width){
00938 DWTELEM temp[width];
00939 const int w2= (width+1)>>1;
00940
00941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
00942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
00943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
00944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
00945 }
00946
00947
00948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00949 int i;
00950
00951 for(i=0; i<width; i++){
00952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00953 }
00954 }
00955
00956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00957 int i;
00958
00959 for(i=0; i<width; i++){
00960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00961 }
00962 }
00963
00964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00965 int i;
00966
00967 for(i=0; i<width; i++){
00968 #ifdef liftS
00969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00970 #else
00971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00972 #endif
00973 }
00974 }
00975
00976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00977 int i;
00978
00979 for(i=0; i<width; i++){
00980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00981 }
00982 }
00983
00984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00985 int y;
00986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
00987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
00988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
00989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
00990
00991 for(y=-4; y<height; y+=2){
00992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
00993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
00994
00995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
00996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
00997
00998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
00999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01002
01003 b0=b2;
01004 b1=b3;
01005 b2=b4;
01006 b3=b5;
01007 }
01008 }
01009
01010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01011 int level;
01012
01013 for(level=0; level<decomposition_count; level++){
01014 switch(type){
01015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01017 }
01018 }
01019 }
01020
01021 static void horizontal_compose53i(IDWTELEM *b, int width){
01022 IDWTELEM temp[width];
01023 const int width2= width>>1;
01024 const int w2= (width+1)>>1;
01025 int x;
01026
01027 #if 0
01028 int A1,A2,A3,A4;
01029 A2= temp[1 ];
01030 A4= temp[0 ];
01031 A1= temp[0+width2];
01032 A1 -= (A2 + A4)>>1;
01033 A4 += (A1 + 1)>>1;
01034 b[0+width2] = A1;
01035 b[0 ] = A4;
01036 for(x=1; x+1<width2; x+=2){
01037 A3= temp[x+width2];
01038 A4= temp[x+1 ];
01039 A3 -= (A2 + A4)>>1;
01040 A2 += (A1 + A3 + 2)>>2;
01041 b[x+width2] = A3;
01042 b[x ] = A2;
01043
01044 A1= temp[x+1+width2];
01045 A2= temp[x+2 ];
01046 A1 -= (A2 + A4)>>1;
01047 A4 += (A1 + A3 + 2)>>2;
01048 b[x+1+width2] = A1;
01049 b[x+1 ] = A4;
01050 }
01051 A3= temp[width-1];
01052 A3 -= A2;
01053 A2 += (A1 + A3 + 2)>>2;
01054 b[width -1] = A3;
01055 b[width2-1] = A2;
01056 #else
01057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
01058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01059 #endif
01060 for(x=0; x<width2; x++){
01061 b[2*x ]= temp[x ];
01062 b[2*x + 1]= temp[x+w2];
01063 }
01064 if(width&1)
01065 b[2*x ]= temp[x ];
01066 }
01067
01068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01069 int i;
01070
01071 for(i=0; i<width; i++){
01072 b1[i] += (b0[i] + b2[i])>>1;
01073 }
01074 }
01075
01076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01077 int i;
01078
01079 for(i=0; i<width; i++){
01080 b1[i] -= (b0[i] + b2[i] + 2)>>2;
01081 }
01082 }
01083
01084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
01087 cs->y = -1;
01088 }
01089
01090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01091 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01092 cs->b1 = buffer + mirror(-1 , height-1)*stride;
01093 cs->y = -1;
01094 }
01095
01096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01097 int y= cs->y;
01098
01099 IDWTELEM *b0= cs->b0;
01100 IDWTELEM *b1= cs->b1;
01101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01103
01104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01106
01107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01109
01110 cs->b0 = b2;
01111 cs->b1 = b3;
01112 cs->y += 2;
01113 }
01114
01115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01116 int y= cs->y;
01117 IDWTELEM *b0= cs->b0;
01118 IDWTELEM *b1= cs->b1;
01119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01121
01122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01124
01125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01127
01128 cs->b0 = b2;
01129 cs->b1 = b3;
01130 cs->y += 2;
01131 }
01132
01133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
01134 DWTCompose cs;
01135 spatial_compose53i_init(&cs, buffer, height, stride);
01136 while(cs.y <= height)
01137 spatial_compose53i_dy(&cs, buffer, width, height, stride);
01138 }
01139
01140
01141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01142 IDWTELEM temp[width];
01143 const int w2= (width+1)>>1;
01144
01145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
01146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
01147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
01148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
01149 }
01150
01151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01152 int i;
01153
01154 for(i=0; i<width; i++){
01155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01156 }
01157 }
01158
01159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01160 int i;
01161
01162 for(i=0; i<width; i++){
01163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01164 }
01165 }
01166
01167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01168 int i;
01169
01170 for(i=0; i<width; i++){
01171 #ifdef liftS
01172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01173 #else
01174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01175 #endif
01176 }
01177 }
01178
01179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01180 int i;
01181
01182 for(i=0; i<width; i++){
01183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01184 }
01185 }
01186
01187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01188 int i;
01189
01190 for(i=0; i<width; i++){
01191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01193 #ifdef liftS
01194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01195 #else
01196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01197 #endif
01198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01199 }
01200 }
01201
01202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
01205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01207 cs->y = -3;
01208 }
01209
01210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01211 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01212 cs->b1 = buffer + mirror(-3 , height-1)*stride;
01213 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01214 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01215 cs->y = -3;
01216 }
01217
01218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01219 int y = cs->y;
01220
01221 IDWTELEM *b0= cs->b0;
01222 IDWTELEM *b1= cs->b1;
01223 IDWTELEM *b2= cs->b2;
01224 IDWTELEM *b3= cs->b3;
01225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01227
01228 if(y>0 && y+4<height){
01229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01230 }else{
01231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01235 }
01236
01237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01239
01240 cs->b0=b2;
01241 cs->b1=b3;
01242 cs->b2=b4;
01243 cs->b3=b5;
01244 cs->y += 2;
01245 }
01246
01247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01248 int y = cs->y;
01249 IDWTELEM *b0= cs->b0;
01250 IDWTELEM *b1= cs->b1;
01251 IDWTELEM *b2= cs->b2;
01252 IDWTELEM *b3= cs->b3;
01253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01255
01256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01260
01261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01263
01264 cs->b0=b2;
01265 cs->b1=b3;
01266 cs->b2=b4;
01267 cs->b3=b5;
01268 cs->y += 2;
01269 }
01270
01271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
01272 DWTCompose cs;
01273 spatial_compose97i_init(&cs, buffer, height, stride);
01274 while(cs.y <= height)
01275 spatial_compose97i_dy(&cs, buffer, width, height, stride);
01276 }
01277
01278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01279 int level;
01280 for(level=decomposition_count-1; level>=0; level--){
01281 switch(type){
01282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01284 }
01285 }
01286 }
01287
01288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01289 int level;
01290 for(level=decomposition_count-1; level>=0; level--){
01291 switch(type){
01292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01294 }
01295 }
01296 }
01297
01298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01299 const int support = type==1 ? 3 : 5;
01300 int level;
01301 if(type==2) return;
01302
01303 for(level=decomposition_count-1; level>=0; level--){
01304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01305 switch(type){
01306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01307 break;
01308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01309 break;
01310 }
01311 }
01312 }
01313 }
01314
01315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01316 const int support = type==1 ? 3 : 5;
01317 int level;
01318 if(type==2) return;
01319
01320 for(level=decomposition_count-1; level>=0; level--){
01321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01322 switch(type){
01323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01324 break;
01325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01326 break;
01327 }
01328 }
01329 }
01330 }
01331
01332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01333 DWTCompose cs[MAX_DECOMPOSITIONS];
01334 int y;
01335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01336 for(y=0; y<height; y+=4)
01337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01338 }
01339
01340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01341 const int w= b->width;
01342 const int h= b->height;
01343 int x, y;
01344
01345 if(1){
01346 int run=0;
01347 int runs[w*h];
01348 int run_index=0;
01349 int max_index;
01350
01351 for(y=0; y<h; y++){
01352 for(x=0; x<w; x++){
01353 int v, p=0;
01354 int l=0, lt=0, t=0, rt=0;
01355 v= src[x + y*stride];
01356
01357 if(y){
01358 t= src[x + (y-1)*stride];
01359 if(x){
01360 lt= src[x - 1 + (y-1)*stride];
01361 }
01362 if(x + 1 < w){
01363 rt= src[x + 1 + (y-1)*stride];
01364 }
01365 }
01366 if(x){
01367 l= src[x - 1 + y*stride];
01368
01369
01370
01371
01372 }
01373 if(parent){
01374 int px= x>>1;
01375 int py= y>>1;
01376 if(px<b->parent->width && py<b->parent->height)
01377 p= parent[px + py*2*stride];
01378 }
01379 if(!(l|lt|t|rt|p)){
01380 if(v){
01381 runs[run_index++]= run;
01382 run=0;
01383 }else{
01384 run++;
01385 }
01386 }
01387 }
01388 }
01389 max_index= run_index;
01390 runs[run_index++]= run;
01391 run_index=0;
01392 run= runs[run_index++];
01393
01394 put_symbol2(&s->c, b->state[30], max_index, 0);
01395 if(run_index <= max_index)
01396 put_symbol2(&s->c, b->state[1], run, 3);
01397
01398 for(y=0; y<h; y++){
01399 if(s->c.bytestream_end - s->c.bytestream < w*40){
01400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01401 return -1;
01402 }
01403 for(x=0; x<w; x++){
01404 int v, p=0;
01405 int l=0, lt=0, t=0, rt=0;
01406 v= src[x + y*stride];
01407
01408 if(y){
01409 t= src[x + (y-1)*stride];
01410 if(x){
01411 lt= src[x - 1 + (y-1)*stride];
01412 }
01413 if(x + 1 < w){
01414 rt= src[x + 1 + (y-1)*stride];
01415 }
01416 }
01417 if(x){
01418 l= src[x - 1 + y*stride];
01419
01420
01421
01422
01423 }
01424 if(parent){
01425 int px= x>>1;
01426 int py= y>>1;
01427 if(px<b->parent->width && py<b->parent->height)
01428 p= parent[px + py*2*stride];
01429 }
01430 if(l|lt|t|rt|p){
01431 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01432
01433 put_rac(&s->c, &b->state[0][context], !!v);
01434 }else{
01435 if(!run){
01436 run= runs[run_index++];
01437
01438 if(run_index <= max_index)
01439 put_symbol2(&s->c, b->state[1], run, 3);
01440 assert(v);
01441 }else{
01442 run--;
01443 assert(!v);
01444 }
01445 }
01446 if(v){
01447 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01448 int l2= 2*FFABS(l) + (l<0);
01449 int t2= 2*FFABS(t) + (t<0);
01450
01451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01453 }
01454 }
01455 }
01456 }
01457 return 0;
01458 }
01459
01460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01461
01462
01463 return encode_subband_c0run(s, b, src, parent, stride, orientation);
01464
01465 }
01466
01467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01468 const int w= b->width;
01469 const int h= b->height;
01470 int x,y;
01471
01472 if(1){
01473 int run, runs;
01474 x_and_coeff *xc= b->x_coeff;
01475 x_and_coeff *prev_xc= NULL;
01476 x_and_coeff *prev2_xc= xc;
01477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01478 x_and_coeff *prev_parent_xc= parent_xc;
01479
01480 runs= get_symbol2(&s->c, b->state[30], 0);
01481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01482 else run= INT_MAX;
01483
01484 for(y=0; y<h; y++){
01485 int v=0;
01486 int lt=0, t=0, rt=0;
01487
01488 if(y && prev_xc->x == 0){
01489 rt= prev_xc->coeff;
01490 }
01491 for(x=0; x<w; x++){
01492 int p=0;
01493 const int l= v;
01494
01495 lt= t; t= rt;
01496
01497 if(y){
01498 if(prev_xc->x <= x)
01499 prev_xc++;
01500 if(prev_xc->x == x + 1)
01501 rt= prev_xc->coeff;
01502 else
01503 rt=0;
01504 }
01505 if(parent_xc){
01506 if(x>>1 > parent_xc->x){
01507 parent_xc++;
01508 }
01509 if(x>>1 == parent_xc->x){
01510 p= parent_xc->coeff;
01511 }
01512 }
01513 if(l|lt|t|rt|p){
01514 int context= av_log2(3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01515
01516 v=get_rac(&s->c, &b->state[0][context]);
01517 if(v){
01518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01520
01521 xc->x=x;
01522 (xc++)->coeff= v;
01523 }
01524 }else{
01525 if(!run){
01526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01527 else run= INT_MAX;
01528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01530
01531 xc->x=x;
01532 (xc++)->coeff= v;
01533 }else{
01534 int max_run;
01535 run--;
01536 v=0;
01537
01538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01539 else max_run= FFMIN(run, w-x-1);
01540 if(parent_xc)
01541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01542 x+= max_run;
01543 run-= max_run;
01544 }
01545 }
01546 }
01547 (xc++)->x= w+1;
01548 prev_xc= prev2_xc;
01549 prev2_xc= xc;
01550
01551 if(parent_xc){
01552 if(y&1){
01553 while(parent_xc->x != parent->width+1)
01554 parent_xc++;
01555 parent_xc++;
01556 prev_parent_xc= parent_xc;
01557 }else{
01558 parent_xc= prev_parent_xc;
01559 }
01560 }
01561 }
01562
01563 (xc++)->x= w+1;
01564 }
01565 }
01566
01567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01568 const int w= b->width;
01569 int y;
01570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01573 int new_index = 0;
01574
01575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01576 qadd= 0;
01577 qmul= 1<<QEXPSHIFT;
01578 }
01579
01580
01581 if (start_y != 0)
01582 new_index = save_state[0];
01583
01584
01585 for(y=start_y; y<h; y++){
01586 int x = 0;
01587 int v;
01588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01589 memset(line, 0, b->width*sizeof(IDWTELEM));
01590 v = b->x_coeff[new_index].coeff;
01591 x = b->x_coeff[new_index++].x;
01592 while(x < w){
01593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01594 register int u= -(v&1);
01595 line[x] = (t^u) - u;
01596
01597 v = b->x_coeff[new_index].coeff;
01598 x = b->x_coeff[new_index++].x;
01599 }
01600 }
01601
01602
01603 save_state[0] = new_index;
01604
01605 return;
01606 }
01607
01608 static void reset_contexts(SnowContext *s){
01609 int plane_index, level, orientation;
01610
01611 for(plane_index=0; plane_index<3; plane_index++){
01612 for(level=0; level<MAX_DECOMPOSITIONS; level++){
01613 for(orientation=level ? 1:0; orientation<4; orientation++){
01614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01615 }
01616 }
01617 }
01618 memset(s->header_state, MID_STATE, sizeof(s->header_state));
01619 memset(s->block_state, MID_STATE, sizeof(s->block_state));
01620 }
01621
01622 static int alloc_blocks(SnowContext *s){
01623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01625
01626 s->b_width = w;
01627 s->b_height= h;
01628
01629 av_free(s->block);
01630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01631 return 0;
01632 }
01633
01634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01635 uint8_t *bytestream= d->bytestream;
01636 uint8_t *bytestream_start= d->bytestream_start;
01637 *d= *s;
01638 d->bytestream= bytestream;
01639 d->bytestream_start= bytestream_start;
01640 }
01641
01642
01643 static int pix_sum(uint8_t * pix, int line_size, int w)
01644 {
01645 int s, i, j;
01646
01647 s = 0;
01648 for (i = 0; i < w; i++) {
01649 for (j = 0; j < w; j++) {
01650 s += pix[0];
01651 pix ++;
01652 }
01653 pix += line_size - w;
01654 }
01655 return s;
01656 }
01657
01658
01659 static int pix_norm1(uint8_t * pix, int line_size, int w)
01660 {
01661 int s, i, j;
01662 uint32_t *sq = ff_squareTbl + 256;
01663
01664 s = 0;
01665 for (i = 0; i < w; i++) {
01666 for (j = 0; j < w; j ++) {
01667 s += sq[pix[0]];
01668 pix ++;
01669 }
01670 pix += line_size - w;
01671 }
01672 return s;
01673 }
01674
01675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01676 const int w= s->b_width << s->block_max_depth;
01677 const int rem_depth= s->block_max_depth - level;
01678 const int index= (x + y*w) << rem_depth;
01679 const int block_w= 1<<rem_depth;
01680 BlockNode block;
01681 int i,j;
01682
01683 block.color[0]= l;
01684 block.color[1]= cb;
01685 block.color[2]= cr;
01686 block.mx= mx;
01687 block.my= my;
01688 block.ref= ref;
01689 block.type= type;
01690 block.level= level;
01691
01692 for(j=0; j<block_w; j++){
01693 for(i=0; i<block_w; i++){
01694 s->block[index + i + j*w]= block;
01695 }
01696 }
01697 }
01698
01699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01700 const int offset[3]= {
01701 y*c-> stride + x,
01702 ((y*c->uvstride + x)>>1),
01703 ((y*c->uvstride + x)>>1),
01704 };
01705 int i;
01706 for(i=0; i<3; i++){
01707 c->src[0][i]= src [i];
01708 c->ref[0][i]= ref [i] + offset[i];
01709 }
01710 assert(!ref_index);
01711 }
01712
01713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01715 if(s->ref_frames == 1){
01716 *mx = mid_pred(left->mx, top->mx, tr->mx);
01717 *my = mid_pred(left->my, top->my, tr->my);
01718 }else{
01719 const int *scale = scale_mv_ref[ref];
01720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01721 (top ->mx * scale[top ->ref] + 128) >>8,
01722 (tr ->mx * scale[tr ->ref] + 128) >>8);
01723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01724 (top ->my * scale[top ->ref] + 128) >>8,
01725 (tr ->my * scale[tr ->ref] + 128) >>8);
01726 }
01727 }
01728
01729
01730 #define P_LEFT P[1]
01731 #define P_TOP P[2]
01732 #define P_TOPRIGHT P[3]
01733 #define P_MEDIAN P[4]
01734 #define P_MV1 P[9]
01735 #define FLAG_QPEL 1 //must be 1
01736
01737 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01738 uint8_t p_buffer[1024];
01739 uint8_t i_buffer[1024];
01740 uint8_t p_state[sizeof(s->block_state)];
01741 uint8_t i_state[sizeof(s->block_state)];
01742 RangeCoder pc, ic;
01743 uint8_t *pbbak= s->c.bytestream;
01744 uint8_t *pbbak_start= s->c.bytestream_start;
01745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01746 const int w= s->b_width << s->block_max_depth;
01747 const int h= s->b_height << s->block_max_depth;
01748 const int rem_depth= s->block_max_depth - level;
01749 const int index= (x + y*w) << rem_depth;
01750 const int block_w= 1<<(LOG2_MB_SIZE - level);
01751 int trx= (x+1)<<rem_depth;
01752 int try= (y+1)<<rem_depth;
01753 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01754 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01759 int pl = left->color[0];
01760 int pcb= left->color[1];
01761 int pcr= left->color[2];
01762 int pmx, pmy;
01763 int mx=0, my=0;
01764 int l,cr,cb;
01765 const int stride= s->current_picture.linesize[0];
01766 const int uvstride= s->current_picture.linesize[1];
01767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
01768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01770 int P[10][2];
01771 int16_t last_mv[3][2];
01772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL);
01773 const int shift= 1+qpel;
01774 MotionEstContext *c= &s->m.me;
01775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01776 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01777 int my_context= av_log2(2*FFABS(left->my - top->my));
01778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01779 int ref, best_ref, ref_score, ref_mx, ref_my;
01780
01781 assert(sizeof(s->block_state) >= 256);
01782 if(s->keyframe){
01783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01784 return 0;
01785 }
01786
01787
01788
01789 P_LEFT[0]= left->mx;
01790 P_LEFT[1]= left->my;
01791 P_TOP [0]= top->mx;
01792 P_TOP [1]= top->my;
01793 P_TOPRIGHT[0]= tr->mx;
01794 P_TOPRIGHT[1]= tr->my;
01795
01796 last_mv[0][0]= s->block[index].mx;
01797 last_mv[0][1]= s->block[index].my;
01798 last_mv[1][0]= right->mx;
01799 last_mv[1][1]= right->my;
01800 last_mv[2][0]= bottom->mx;
01801 last_mv[2][1]= bottom->my;
01802
01803 s->m.mb_stride=2;
01804 s->m.mb_x=
01805 s->m.mb_y= 0;
01806 c->skip= 0;
01807
01808 assert(c-> stride == stride);
01809 assert(c->uvstride == uvstride);
01810
01811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01815
01816 c->xmin = - x*block_w - 16+2;
01817 c->ymin = - y*block_w - 16+2;
01818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01820
01821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
01822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
01823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
01824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
01825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
01827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01828
01829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01831
01832 if (!y) {
01833 c->pred_x= P_LEFT[0];
01834 c->pred_y= P_LEFT[1];
01835 } else {
01836 c->pred_x = P_MEDIAN[0];
01837 c->pred_y = P_MEDIAN[1];
01838 }
01839
01840 score= INT_MAX;
01841 best_ref= 0;
01842 for(ref=0; ref<s->ref_frames; ref++){
01843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01844
01845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, 0, last_mv,
01846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01847
01848 assert(ref_mx >= c->xmin);
01849 assert(ref_mx <= c->xmax);
01850 assert(ref_my >= c->ymin);
01851 assert(ref_my <= c->ymax);
01852
01853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01856 if(s->ref_mvs[ref]){
01857 s->ref_mvs[ref][index][0]= ref_mx;
01858 s->ref_mvs[ref][index][1]= ref_my;
01859 s->ref_scores[ref][index]= ref_score;
01860 }
01861 if(score > ref_score){
01862 score= ref_score;
01863 best_ref= ref;
01864 mx= ref_mx;
01865 my= ref_my;
01866 }
01867 }
01868
01869
01870
01871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01872 pc= s->c;
01873 pc.bytestream_start=
01874 pc.bytestream= p_buffer;
01875 memcpy(p_state, s->block_state, sizeof(s->block_state));
01876
01877 if(level!=s->block_max_depth)
01878 put_rac(&pc, &p_state[4 + s_context], 1);
01879 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01880 if(s->ref_frames > 1)
01881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01885 p_len= pc.bytestream - pc.bytestream_start;
01886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01887
01888 block_s= block_w*block_w;
01889 sum = pix_sum(current_data[0], stride, block_w);
01890 l= (sum + block_s/2)/block_s;
01891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01892
01893 block_s= block_w*block_w>>2;
01894 sum = pix_sum(current_data[1], uvstride, block_w>>1);
01895 cb= (sum + block_s/2)/block_s;
01896
01897 sum = pix_sum(current_data[2], uvstride, block_w>>1);
01898 cr= (sum + block_s/2)/block_s;
01899
01900
01901 ic= s->c;
01902 ic.bytestream_start=
01903 ic.bytestream= i_buffer;
01904 memcpy(i_state, s->block_state, sizeof(s->block_state));
01905 if(level!=s->block_max_depth)
01906 put_rac(&ic, &i_state[4 + s_context], 1);
01907 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01908 put_symbol(&ic, &i_state[32], l-pl , 1);
01909 put_symbol(&ic, &i_state[64], cb-pcb, 1);
01910 put_symbol(&ic, &i_state[96], cr-pcr, 1);
01911 i_len= ic.bytestream - ic.bytestream_start;
01912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01913
01914
01915 assert(iscore < 255*255*256 + s->lambda2*10);
01916 assert(iscore >= 0);
01917 assert(l>=0 && l<=255);
01918 assert(pl>=0 && pl<=255);
01919
01920 if(level==0){
01921 int varc= iscore >> 8;
01922 int vard= score >> 8;
01923 if (vard <= 64 || vard < varc)
01924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01925 else
01926 c->scene_change_score+= s->m.qscale;
01927 }
01928
01929 if(level!=s->block_max_depth){
01930 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01935 score2+= s->lambda2>>FF_LAMBDA_SHIFT;
01936
01937 if(score2 < score && score2 < iscore)
01938 return score2;
01939 }
01940
01941 if(iscore < score){
01942 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01943 memcpy(pbbak, i_buffer, i_len);
01944 s->c= ic;
01945 s->c.bytestream_start= pbbak_start;
01946 s->c.bytestream= pbbak + i_len;
01947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01948 memcpy(s->block_state, i_state, sizeof(s->block_state));
01949 return iscore;
01950 }else{
01951 memcpy(pbbak, p_buffer, p_len);
01952 s->c= pc;
01953 s->c.bytestream_start= pbbak_start;
01954 s->c.bytestream= pbbak + p_len;
01955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
01956 memcpy(s->block_state, p_state, sizeof(s->block_state));
01957 return score;
01958 }
01959 }
01960
01961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
01962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
01963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
01964 }else{
01965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
01966 }
01967 }
01968
01969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
01970 const int w= s->b_width << s->block_max_depth;
01971 const int rem_depth= s->block_max_depth - level;
01972 const int index= (x + y*w) << rem_depth;
01973 int trx= (x+1)<<rem_depth;
01974 BlockNode *b= &s->block[index];
01975 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01976 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01979 int pl = left->color[0];
01980 int pcb= left->color[1];
01981 int pcr= left->color[2];
01982 int pmx, pmy;
01983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
01985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
01986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01987
01988 if(s->keyframe){
01989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01990 return;
01991 }
01992
01993 if(level!=s->block_max_depth){
01994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
01995 put_rac(&s->c, &s->block_state[4 + s_context], 1);
01996 }else{
01997 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
01999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02002 return;
02003 }
02004 }
02005 if(b->type & BLOCK_INTRA){
02006 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02012 }else{
02013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02015 if(s->ref_frames > 1)
02016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02020 }
02021 }
02022
02023 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02024 const int w= s->b_width << s->block_max_depth;
02025 const int rem_depth= s->block_max_depth - level;
02026 const int index= (x + y*w) << rem_depth;
02027 int trx= (x+1)<<rem_depth;
02028 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02029 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02033
02034 if(s->keyframe){
02035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02036 return;
02037 }
02038
02039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02040 int type, mx, my;
02041 int l = left->color[0];
02042 int cb= left->color[1];
02043 int cr= left->color[2];
02044 int ref = 0;
02045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02048
02049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02050
02051 if(type){
02052 pred_mv(s, &mx, &my, 0, left, top, tr);
02053 l += get_symbol(&s->c, &s->block_state[32], 1);
02054 cb+= get_symbol(&s->c, &s->block_state[64], 1);
02055 cr+= get_symbol(&s->c, &s->block_state[96], 1);
02056 }else{
02057 if(s->ref_frames > 1)
02058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02059 pred_mv(s, &mx, &my, ref, left, top, tr);
02060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02062 }
02063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02064 }else{
02065 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02066 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02067 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02068 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02069 }
02070 }
02071
02072 static void encode_blocks(SnowContext *s, int search){
02073 int x, y;
02074 int w= s->b_width;
02075 int h= s->b_height;
02076
02077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02078 iterative_me(s);
02079
02080 for(y=0; y<h; y++){
02081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){
02082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02083 return;
02084 }
02085 for(x=0; x<w; x++){
02086 if(s->avctx->me_method == ME_ITER || !search)
02087 encode_q_branch2(s, 0, x, y);
02088 else
02089 encode_q_branch (s, 0, x, y);
02090 }
02091 }
02092 }
02093
02094 static void decode_blocks(SnowContext *s){
02095 int x, y;
02096 int w= s->b_width;
02097 int h= s->b_height;
02098
02099 for(y=0; y<h; y++){
02100 for(x=0; x<w; x++){
02101 decode_q_branch(s, 0, x, y);
02102 }
02103 }
02104 }
02105
02106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02107 static const uint8_t weight[64]={
02108 8,7,6,5,4,3,2,1,
02109 7,7,0,0,0,0,0,1,
02110 6,0,6,0,0,0,2,0,
02111 5,0,0,5,0,3,0,0,
02112 4,0,0,0,4,0,0,0,
02113 3,0,0,5,0,3,0,0,
02114 2,0,6,0,0,0,2,0,
02115 1,7,0,0,0,0,0,1,
02116 };
02117
02118 static const uint8_t brane[256]={
02119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02135 };
02136
02137 static const uint8_t needs[16]={
02138 0,1,0,0,
02139 2,4,2,0,
02140 0,1,0,0,
02141 15
02142 };
02143
02144 int x, y, b, r, l;
02145 int16_t tmpIt [64*(32+HTAPS_MAX)];
02146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02147 int16_t *tmpI= tmpIt;
02148 uint8_t *tmp2= tmp2t[0];
02149 const uint8_t *hpel[11];
02150 assert(dx<16 && dy<16);
02151 r= brane[dx + 16*dy]&15;
02152 l= brane[dx + 16*dy]>>4;
02153
02154 b= needs[l] | needs[r];
02155 if(p && !p->diag_mc)
02156 b= 15;
02157
02158 if(b&5){
02159 for(y=0; y < b_h+HTAPS_MAX-1; y++){
02160 for(x=0; x < b_w; x++){
02161 int a_1=src[x + HTAPS_MAX/2-4];
02162 int a0= src[x + HTAPS_MAX/2-3];
02163 int a1= src[x + HTAPS_MAX/2-2];
02164 int a2= src[x + HTAPS_MAX/2-1];
02165 int a3= src[x + HTAPS_MAX/2+0];
02166 int a4= src[x + HTAPS_MAX/2+1];
02167 int a5= src[x + HTAPS_MAX/2+2];
02168 int a6= src[x + HTAPS_MAX/2+3];
02169 int am=0;
02170 if(!p || p->fast_mc){
02171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02172 tmpI[x]= am;
02173 am= (am+16)>>5;
02174 }else{
02175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02176 tmpI[x]= am;
02177 am= (am+32)>>6;
02178 }
02179
02180 if(am&(~255)) am= ~(am>>31);
02181 tmp2[x]= am;
02182 }
02183 tmpI+= 64;
02184 tmp2+= stride;
02185 src += stride;
02186 }
02187 src -= stride*y;
02188 }
02189 src += HTAPS_MAX/2 - 1;
02190 tmp2= tmp2t[1];
02191
02192 if(b&2){
02193 for(y=0; y < b_h; y++){
02194 for(x=0; x < b_w+1; x++){
02195 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02196 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02197 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02198 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02199 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02200 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02201 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02202 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02203 int am=0;
02204 if(!p || p->fast_mc)
02205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02206 else
02207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02208
02209 if(am&(~255)) am= ~(am>>31);
02210 tmp2[x]= am;
02211 }
02212 src += stride;
02213 tmp2+= stride;
02214 }
02215 src -= stride*y;
02216 }
02217 src += stride*(HTAPS_MAX/2 - 1);
02218 tmp2= tmp2t[2];
02219 tmpI= tmpIt;
02220 if(b&4){
02221 for(y=0; y < b_h; y++){
02222 for(x=0; x < b_w; x++){
02223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02231 int am=0;
02232 if(!p || p->fast_mc)
02233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02234 else
02235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02236 if(am&(~255)) am= ~(am>>31);
02237 tmp2[x]= am;
02238 }
02239 tmpI+= 64;
02240 tmp2+= stride;
02241 }
02242 }
02243
02244 hpel[ 0]= src;
02245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02246 hpel[ 2]= src + 1;
02247
02248 hpel[ 4]= tmp2t[1];
02249 hpel[ 5]= tmp2t[2];
02250 hpel[ 6]= tmp2t[1] + 1;
02251
02252 hpel[ 8]= src + stride;
02253 hpel[ 9]= hpel[1] + stride;
02254 hpel[10]= hpel[8] + 1;
02255
02256 if(b==15){
02257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
02258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02261 dx&=7;
02262 dy&=7;
02263 for(y=0; y < b_h; y++){
02264 for(x=0; x < b_w; x++){
02265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
02267 }
02268 src1+=stride;
02269 src2+=stride;
02270 src3+=stride;
02271 src4+=stride;
02272 dst +=stride;
02273 }
02274 }else{
02275 const uint8_t *src1= hpel[l];
02276 const uint8_t *src2= hpel[r];
02277 int a= weight[((dx&7) + (8*(dy&7)))];
02278 int b= 8-a;
02279 for(y=0; y < b_h; y++){
02280 for(x=0; x < b_w; x++){
02281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02282 }
02283 src1+=stride;
02284 src2+=stride;
02285 dst +=stride;
02286 }
02287 }
02288 }
02289
02290 #define mca(dx,dy,b_w)\
02291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02293 assert(h==b_w);\
02294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02295 }
02296
02297 mca( 0, 0,16)
02298 mca( 8, 0,16)
02299 mca( 0, 8,16)
02300 mca( 8, 8,16)
02301 mca( 0, 0,8)
02302 mca( 8, 0,8)
02303 mca( 0, 8,8)
02304 mca( 8, 8,8)
02305
02306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02307 if(block->type & BLOCK_INTRA){
02308 int x, y;
02309 const int color = block->color[plane_index];
02310 const int color4= color*0x01010101;
02311 if(b_w==32){
02312 for(y=0; y < b_h; y++){
02313 *(uint32_t*)&dst[0 + y*stride]= color4;
02314 *(uint32_t*)&dst[4 + y*stride]= color4;
02315 *(uint32_t*)&dst[8 + y*stride]= color4;
02316 *(uint32_t*)&dst[12+ y*stride]= color4;
02317 *(uint32_t*)&dst[16+ y*stride]= color4;
02318 *(uint32_t*)&dst[20+ y*stride]= color4;
02319 *(uint32_t*)&dst[24+ y*stride]= color4;
02320 *(uint32_t*)&dst[28+ y*stride]= color4;
02321 }
02322 }else if(b_w==16){
02323 for(y=0; y < b_h; y++){
02324 *(uint32_t*)&dst[0 + y*stride]= color4;
02325 *(uint32_t*)&dst[4 + y*stride]= color4;
02326 *(uint32_t*)&dst[8 + y*stride]= color4;
02327 *(uint32_t*)&dst[12+ y*stride]= color4;
02328 }
02329 }else if(b_w==8){
02330 for(y=0; y < b_h; y++){
02331 *(uint32_t*)&dst[0 + y*stride]= color4;
02332 *(uint32_t*)&dst[4 + y*stride]= color4;
02333 }
02334 }else if(b_w==4){
02335 for(y=0; y < b_h; y++){
02336 *(uint32_t*)&dst[0 + y*stride]= color4;
02337 }
02338 }else{
02339 for(y=0; y < b_h; y++){
02340 for(x=0; x < b_w; x++){
02341 dst[x + y*stride]= color;
02342 }
02343 }
02344 }
02345 }else{
02346 uint8_t *src= s->last_picture[block->ref].data[plane_index];
02347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
02348 int mx= block->mx*scale;
02349 int my= block->my*scale;
02350 const int dx= mx&15;
02351 const int dy= my&15;
02352 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02353 sx += (mx>>4) - (HTAPS_MAX/2-1);
02354 sy += (my>>4) - (HTAPS_MAX/2-1);
02355 src += sx + sy*stride;
02356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02359 src= tmp + MB_SIZE;
02360 }
02361
02362
02363 assert(b_w>1 && b_h>1);
02364 assert((tab_index>=0 && tab_index<4) || b_w==32);
02365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02367 else if(b_w==32){
02368 int y;
02369 for(y=0; y<b_h; y+=16){
02370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02372 }
02373 }else if(b_w==b_h)
02374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02375 else if(b_w==2*b_h){
02376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
02377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02378 }else{
02379 assert(2*b_w==b_h);
02380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
02381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02382 }
02383 }
02384 }
02385
02386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02388 int y, x;
02389 IDWTELEM * dst;
02390 for(y=0; y<b_h; y++){
02391
02392 const uint8_t *obmc1= obmc + y*obmc_stride;
02393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02396 dst = slice_buffer_get_line(sb, src_y + y);
02397 for(x=0; x<b_w; x++){
02398 int v= obmc1[x] * block[3][x + y*src_stride]
02399 +obmc2[x] * block[2][x + y*src_stride]
02400 +obmc3[x] * block[1][x + y*src_stride]
02401 +obmc4[x] * block[0][x + y*src_stride];
02402
02403 v <<= 8 - LOG2_OBMC_MAX;
02404 if(FRAC_BITS != 8){
02405 v >>= 8 - FRAC_BITS;
02406 }
02407 if(add){
02408 v += dst[x + src_x];
02409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02410 if(v&(~255)) v= ~(v>>31);
02411 dst8[x + y*src_stride] = v;
02412 }else{
02413 dst[x + src_x] -= v;
02414 }
02415 }
02416 }
02417 }
02418
02419
02420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02421 const int b_width = s->b_width << s->block_max_depth;
02422 const int b_height= s->b_height << s->block_max_depth;
02423 const int b_stride= b_width;
02424 BlockNode *lt= &s->block[b_x + b_y*b_stride];
02425 BlockNode *rt= lt+1;
02426 BlockNode *lb= lt+b_stride;
02427 BlockNode *rb= lb+1;
02428 uint8_t *block[4];
02429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02430 uint8_t *tmp = s->scratchbuf;
02431 uint8_t *ptmp;
02432 int x,y;
02433
02434 if(b_x<0){
02435 lt= rt;
02436 lb= rb;
02437 }else if(b_x + 1 >= b_width){
02438 rt= lt;
02439 rb= lb;
02440 }
02441 if(b_y<0){
02442 lt= lb;
02443 rt= rb;
02444 }else if(b_y + 1 >= b_height){
02445 lb= lt;
02446 rb= rt;
02447 }
02448
02449 if(src_x<0){
02450 obmc -= src_x;
02451 b_w += src_x;
02452 if(!sliced && !offset_dst)
02453 dst -= src_x;
02454 src_x=0;
02455 }else if(src_x + b_w > w){
02456 b_w = w - src_x;
02457 }
02458 if(src_y<0){
02459 obmc -= src_y*obmc_stride;
02460 b_h += src_y;
02461 if(!sliced && !offset_dst)
02462 dst -= src_y*dst_stride;
02463 src_y=0;
02464 }else if(src_y + b_h> h){
02465 b_h = h - src_y;
02466 }
02467
02468 if(b_w<=0 || b_h<=0) return;
02469
02470 assert(src_stride > 2*MB_SIZE + 5);
02471
02472 if(!sliced && offset_dst)
02473 dst += src_x + src_y*dst_stride;
02474 dst8+= src_x + src_y*src_stride;
02475
02476
02477 ptmp= tmp + 3*tmp_step;
02478 block[0]= ptmp;
02479 ptmp+=tmp_step;
02480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02481
02482 if(same_block(lt, rt)){
02483 block[1]= block[0];
02484 }else{
02485 block[1]= ptmp;
02486 ptmp+=tmp_step;
02487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02488 }
02489
02490 if(same_block(lt, lb)){
02491 block[2]= block[0];
02492 }else if(same_block(rt, lb)){
02493 block[2]= block[1];
02494 }else{
02495 block[2]= ptmp;
02496 ptmp+=tmp_step;
02497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02498 }
02499
02500 if(same_block(lt, rb) ){
02501 block[3]= block[0];
02502 }else if(same_block(rt, rb)){
02503 block[3]= block[1];
02504 }else if(same_block(lb, rb)){
02505 block[3]= block[2];
02506 }else{
02507 block[3]= ptmp;
02508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02509 }
02510 #if 0
02511 for(y=0; y<b_h; y++){
02512 for(x=0; x<b_w; x++){
02513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02514 if(add) dst[x + y*dst_stride] += v;
02515 else dst[x + y*dst_stride] -= v;
02516 }
02517 }
02518 for(y=0; y<b_h; y++){
02519 uint8_t *obmc2= obmc + (obmc_stride>>1);
02520 for(x=0; x<b_w; x++){
02521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02522 if(add) dst[x + y*dst_stride] += v;
02523 else dst[x + y*dst_stride] -= v;
02524 }
02525 }
02526 for(y=0; y<b_h; y++){
02527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02528 for(x=0; x<b_w; x++){
02529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02530 if(add) dst[x + y*dst_stride] += v;
02531 else dst[x + y*dst_stride] -= v;
02532 }
02533 }
02534 for(y=0; y<b_h; y++){
02535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02536 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02537 for(x=0; x<b_w; x++){
02538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02539 if(add) dst[x + y*dst_stride] += v;
02540 else dst[x + y*dst_stride] -= v;
02541 }
02542 }
02543 #else
02544 if(sliced){
02545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02546 }else{
02547 for(y=0; y<b_h; y++){
02548
02549 const uint8_t *obmc1= obmc + y*obmc_stride;
02550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02553 for(x=0; x<b_w; x++){
02554 int v= obmc1[x] * block[3][x + y*src_stride]
02555 +obmc2[x] * block[2][x + y*src_stride]
02556 +obmc3[x] * block[1][x + y*src_stride]
02557 +obmc4[x] * block[0][x + y*src_stride];
02558
02559 v <<= 8 - LOG2_OBMC_MAX;
02560 if(FRAC_BITS != 8){
02561 v >>= 8 - FRAC_BITS;
02562 }
02563 if(add){
02564 v += dst[x + y*dst_stride];
02565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02566 if(v&(~255)) v= ~(v>>31);
02567 dst8[x + y*src_stride] = v;
02568 }else{
02569 dst[x + y*dst_stride] -= v;
02570 }
02571 }
02572 }
02573 }
02574 #endif
02575 }
02576
02577 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02578 Plane *p= &s->plane[plane_index];
02579 const int mb_w= s->b_width << s->block_max_depth;
02580 const int mb_h= s->b_height << s->block_max_depth;
02581 int x, y, mb_x;
02582 int block_size = MB_SIZE >> s->block_max_depth;
02583 int block_w = plane_index ? block_size/2 : block_size;
02584 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02585 int obmc_stride= plane_index ? block_size : 2*block_size;
02586 int ref_stride= s->current_picture.linesize[plane_index];
02587 uint8_t *dst8= s->current_picture.data[plane_index];
02588 int w= p->width;
02589 int h= p->height;
02590
02591 if(s->keyframe || (s->avctx->debug&512)){
02592 if(mb_y==mb_h)
02593 return;
02594
02595 if(add){
02596 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02597
02598 IDWTELEM * line = sb->line[y];
02599 for(x=0; x<w; x++){
02600
02601 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02602 v >>= FRAC_BITS;
02603 if(v&(~255)) v= ~(v>>31);
02604 dst8[x + y*ref_stride]= v;
02605 }
02606 }
02607 }else{
02608 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02609
02610 IDWTELEM * line = sb->line[y];
02611 for(x=0; x<w; x++){
02612 line[x] -= 128 << FRAC_BITS;
02613
02614 }
02615 }
02616 }
02617
02618 return;
02619 }
02620
02621 for(mb_x=0; mb_x<=mb_w; mb_x++){
02622 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02623 block_w*mb_x - block_w/2,
02624 block_w*mb_y - block_w/2,
02625 block_w, block_w,
02626 w, h,
02627 w, ref_stride, obmc_stride,
02628 mb_x - 1, mb_y - 1,
02629 add, 0, plane_index);
02630 }
02631 }
02632
02633 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02634 Plane *p= &s->plane[plane_index];
02635 const int mb_w= s->b_width << s->block_max_depth;
02636 const int mb_h= s->b_height << s->block_max_depth;
02637 int x, y, mb_x;
02638 int block_size = MB_SIZE >> s->block_max_depth;
02639 int block_w = plane_index ? block_size/2 : block_size;
02640 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02641 const int obmc_stride= plane_index ? block_size : 2*block_size;
02642 int ref_stride= s->current_picture.linesize[plane_index];
02643 uint8_t *dst8= s->current_picture.data[plane_index];
02644 int w= p->width;
02645 int h= p->height;
02646
02647 if(s->keyframe || (s->avctx->debug&512)){
02648 if(mb_y==mb_h)
02649 return;
02650
02651 if(add){
02652 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02653 for(x=0; x<w; x++){
02654 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02655 v >>= FRAC_BITS;
02656 if(v&(~255)) v= ~(v>>31);
02657 dst8[x + y*ref_stride]= v;
02658 }
02659 }
02660 }else{
02661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02662 for(x=0; x<w; x++){
02663 buf[x + y*w]-= 128<<FRAC_BITS;
02664 }
02665 }
02666 }
02667
02668 return;
02669 }
02670
02671 for(mb_x=0; mb_x<=mb_w; mb_x++){
02672 add_yblock(s, 0, NULL, buf, dst8, obmc,
02673 block_w*mb_x - block_w/2,
02674 block_w*mb_y - block_w/2,
02675 block_w, block_w,
02676 w, h,
02677 w, ref_stride, obmc_stride,
02678 mb_x - 1, mb_y - 1,
02679 add, 1, plane_index);
02680 }
02681 }
02682
02683 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02684 const int mb_h= s->b_height << s->block_max_depth;
02685 int mb_y;
02686 for(mb_y=0; mb_y<=mb_h; mb_y++)
02687 predict_slice(s, buf, plane_index, add, mb_y);
02688 }
02689
02690 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02691 int i, x2, y2;
02692 Plane *p= &s->plane[plane_index];
02693 const int block_size = MB_SIZE >> s->block_max_depth;
02694 const int block_w = plane_index ? block_size/2 : block_size;
02695 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02696 const int obmc_stride= plane_index ? block_size : 2*block_size;
02697 const int ref_stride= s->current_picture.linesize[plane_index];
02698 uint8_t *src= s-> input_picture.data[plane_index];
02699 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02700 const int b_stride = s->b_width << s->block_max_depth;
02701 const int w= p->width;
02702 const int h= p->height;
02703 int index= mb_x + mb_y*b_stride;
02704 BlockNode *b= &s->block[index];
02705 BlockNode backup= *b;
02706 int ab=0;
02707 int aa=0;
02708
02709 b->type|= BLOCK_INTRA;
02710 b->color[plane_index]= 0;
02711 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02712
02713 for(i=0; i<4; i++){
02714 int mb_x2= mb_x + (i &1) - 1;
02715 int mb_y2= mb_y + (i>>1) - 1;
02716 int x= block_w*mb_x2 + block_w/2;
02717 int y= block_w*mb_y2 + block_w/2;
02718
02719 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02720 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02721
02722 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02723 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02724 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02725 int obmc_v= obmc[index];
02726 int d;
02727 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02728 if(x<0) obmc_v += obmc[index + block_w];
02729 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02730 if(x+block_w>w) obmc_v += obmc[index - block_w];
02731
02732
02733 d = -dst[index] + (1<<(FRAC_BITS-1));
02734 dst[index] = d;
02735 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02736 aa += obmc_v * obmc_v;
02737 }
02738 }
02739 }
02740 *b= backup;
02741
02742 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255);
02743 }
02744
02745 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02746 const int b_stride = s->b_width << s->block_max_depth;
02747 const int b_height = s->b_height<< s->block_max_depth;
02748 int index= x + y*b_stride;
02749 const BlockNode *b = &s->block[index];
02750 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02751 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
02752 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
02753 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02754 int dmx, dmy;
02755
02756
02757
02758 if(x<0 || x>=b_stride || y>=b_height)
02759 return 0;
02760
02761
02762
02763
02764
02765
02766
02767
02768
02769 if(b->type & BLOCK_INTRA){
02770 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02771 + av_log2(2*FFABS(left->color[1] - b->color[1]))
02772 + av_log2(2*FFABS(left->color[2] - b->color[2])));
02773 }else{
02774 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02775 dmx-= b->mx;
02776 dmy-= b->my;
02777 return 2*(1 + av_log2(2*FFABS(dmx))
02778 + av_log2(2*FFABS(dmy))
02779 + av_log2(2*b->ref));
02780 }
02781 }
02782
02783 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02784 Plane *p= &s->plane[plane_index];
02785 const int block_size = MB_SIZE >> s->block_max_depth;
02786 const int block_w = plane_index ? block_size/2 : block_size;
02787 const int obmc_stride= plane_index ? block_size : 2*block_size;
02788 const int ref_stride= s->current_picture.linesize[plane_index];
02789 uint8_t *dst= s->current_picture.data[plane_index];
02790 uint8_t *src= s-> input_picture.data[plane_index];
02791 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02792 uint8_t *cur = s->scratchbuf;
02793 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02794 const int b_stride = s->b_width << s->block_max_depth;
02795 const int b_height = s->b_height<< s->block_max_depth;
02796 const int w= p->width;
02797 const int h= p->height;
02798 int distortion;
02799 int rate= 0;
02800 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02801 int sx= block_w*mb_x - block_w/2;
02802 int sy= block_w*mb_y - block_w/2;
02803 int x0= FFMAX(0,-sx);
02804 int y0= FFMAX(0,-sy);
02805 int x1= FFMIN(block_w*2, w-sx);
02806 int y1= FFMIN(block_w*2, h-sy);
02807 int i,x,y;
02808
02809 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02810
02811 for(y=y0; y<y1; y++){
02812 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02813 const IDWTELEM *pred1 = pred + y*obmc_stride;
02814 uint8_t *cur1 = cur + y*ref_stride;
02815 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02816 for(x=x0; x<x1; x++){
02817 #if FRAC_BITS >= LOG2_OBMC_MAX
02818 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02819 #else
02820 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02821 #endif
02822 v = (v + pred1[x]) >> FRAC_BITS;
02823 if(v&(~255)) v= ~(v>>31);
02824 dst1[x] = v;
02825 }
02826 }
02827
02828
02829 if(LOG2_OBMC_MAX == 8
02830 && (mb_x == 0 || mb_x == b_stride-1)
02831 && (mb_y == 0 || mb_y == b_height-1)){
02832 if(mb_x == 0)
02833 x1 = block_w;
02834 else
02835 x0 = block_w;
02836 if(mb_y == 0)
02837 y1 = block_w;
02838 else
02839 y0 = block_w;
02840 for(y=y0; y<y1; y++)
02841 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02842 }
02843
02844 if(block_w==16){
02845
02846
02847
02848
02849
02850
02851 if(s->avctx->me_cmp == FF_CMP_W97)
02852 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02853 else if(s->avctx->me_cmp == FF_CMP_W53)
02854 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02855 else{
02856 distortion = 0;
02857 for(i=0; i<4; i++){
02858 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02859 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02860 }
02861 }
02862 }else{
02863 assert(block_w==8);
02864 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02865 }
02866
02867 if(plane_index==0){
02868 for(i=0; i<4; i++){
02869
02870
02871
02872
02873 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02874 }
02875 if(mb_x == b_stride-2)
02876 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02877 }
02878 return distortion + rate*penalty_factor;
02879 }
02880
02881 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02882 int i, y2;
02883 Plane *p= &s->plane[plane_index];
02884 const int block_size = MB_SIZE >> s->block_max_depth;
02885 const int block_w = plane_index ? block_size/2 : block_size;
02886 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02887 const int obmc_stride= plane_index ? block_size : 2*block_size;
02888 const int ref_stride= s->current_picture.linesize[plane_index];
02889 uint8_t *dst= s->current_picture.data[plane_index];
02890 uint8_t *src= s-> input_picture.data[plane_index];
02891
02892
02893 static IDWTELEM zero_dst[4096];
02894 const int b_stride = s->b_width << s->block_max_depth;
02895 const int w= p->width;
02896 const int h= p->height;
02897 int distortion= 0;
02898 int rate= 0;
02899 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02900
02901 for(i=0; i<9; i++){
02902 int mb_x2= mb_x + (i%3) - 1;
02903 int mb_y2= mb_y + (i/3) - 1;
02904 int x= block_w*mb_x2 + block_w/2;
02905 int y= block_w*mb_y2 + block_w/2;
02906
02907 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02908 x, y, block_w, block_w, w, h, 0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02909
02910
02911 for(y2= y; y2<0; y2++)
02912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02913 for(y2= h; y2<y+block_w; y2++)
02914 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02915 if(x<0){
02916 for(y2= y; y2<y+block_w; y2++)
02917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02918 }
02919 if(x+block_w > w){
02920 for(y2= y; y2<y+block_w; y2++)
02921 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02922 }
02923
02924 assert(block_w== 8 || block_w==16);
02925 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02926 }
02927
02928 if(plane_index==0){
02929 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02930 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02931
02932
02933
02934
02935
02936
02937 if(merged)
02938 rate = get_block_bits(s, mb_x, mb_y, 2);
02939 for(i=merged?4:0; i<9; i++){
02940 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02941 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02942 }
02943 }
02944 return distortion + rate*penalty_factor;
02945 }
02946
02947 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
02948 const int b_stride= s->b_width << s->block_max_depth;
02949 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02950 BlockNode backup= *block;
02951 int rd, index, value;
02952
02953 assert(mb_x>=0 && mb_y>=0);
02954 assert(mb_x<b_stride);
02955
02956 if(intra){
02957 block->color[0] = p[0];
02958 block->color[1] = p[1];
02959 block->color[2] = p[2];
02960 block->type |= BLOCK_INTRA;
02961 }else{
02962 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
02963 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
02964 if(s->me_cache[index] == value)
02965 return 0;
02966 s->me_cache[index]= value;
02967
02968 block->mx= p[0];
02969 block->my= p[1];
02970 block->type &= ~BLOCK_INTRA;
02971 }
02972
02973 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
02974
02975
02976 if(rd < *best_rd){
02977 *best_rd= rd;
02978 return 1;
02979 }else{
02980 *block= backup;
02981 return 0;
02982 }
02983 }
02984
02985
02986
02987 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
02988 int p[2] = {p0, p1};
02989 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
02990 }
02991
02992 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
02993 const int b_stride= s->b_width << s->block_max_depth;
02994 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02995 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
02996 int rd, index, value;
02997
02998 assert(mb_x>=0 && mb_y>=0);
02999 assert(mb_x<b_stride);
03000 assert(((mb_x|mb_y)&1) == 0);
03001
03002 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03003 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03004 if(s->me_cache[index] == value)
03005 return 0;
03006 s->me_cache[index]= value;
03007
03008 block->mx= p0;
03009 block->my= p1;
03010 block->ref= ref;
03011 block->type &= ~BLOCK_INTRA;
03012 block[1]= block[b_stride]= block[b_stride+1]= *block;
03013
03014 rd= get_4block_rd(s, mb_x, mb_y, 0);
03015
03016
03017 if(rd < *best_rd){
03018 *best_rd= rd;
03019 return 1;
03020 }else{
03021 block[0]= backup[0];
03022 block[1]= backup[1];
03023 block[b_stride]= backup[2];
03024 block[b_stride+1]= backup[3];
03025 return 0;
03026 }
03027 }
03028
03029 static void iterative_me(SnowContext *s){
03030 int pass, mb_x, mb_y;
03031 const int b_width = s->b_width << s->block_max_depth;
03032 const int b_height= s->b_height << s->block_max_depth;
03033 const int b_stride= b_width;
03034 int color[3];
03035
03036 {
03037 RangeCoder r = s->c;
03038 uint8_t state[sizeof(s->block_state)];
03039 memcpy(state, s->block_state, sizeof(s->block_state));
03040 for(mb_y= 0; mb_y<s->b_height; mb_y++)
03041 for(mb_x= 0; mb_x<s->b_width; mb_x++)
03042 encode_q_branch(s, 0, mb_x, mb_y);
03043 s->c = r;
03044 memcpy(s->block_state, state, sizeof(s->block_state));
03045 }
03046
03047 for(pass=0; pass<25; pass++){
03048 int change= 0;
03049
03050 for(mb_y= 0; mb_y<b_height; mb_y++){
03051 for(mb_x= 0; mb_x<b_width; mb_x++){
03052 int dia_change, i, j, ref;
03053 int best_rd= INT_MAX, ref_rd;
03054 BlockNode backup, ref_b;
03055 const int index= mb_x + mb_y * b_stride;
03056 BlockNode *block= &s->block[index];
03057 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
03058 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
03059 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
03060 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
03061 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
03062 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
03063 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03064 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03065 const int b_w= (MB_SIZE >> s->block_max_depth);
03066 uint8_t obmc_edged[b_w*2][b_w*2];
03067
03068 if(pass && (block->type & BLOCK_OPT))
03069 continue;
03070 block->type |= BLOCK_OPT;
03071
03072 backup= *block;
03073
03074 if(!s->me_cache_generation)
03075 memset(s->me_cache, 0, sizeof(s->me_cache));
03076 s->me_cache_generation += 1<<22;
03077
03078
03079 {
03080 int x, y;
03081 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03082 if(mb_x==0)
03083 for(y=0; y<b_w*2; y++)
03084 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03085 if(mb_x==b_stride-1)
03086 for(y=0; y<b_w*2; y++)
03087 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03088 if(mb_y==0){
03089 for(x=0; x<b_w*2; x++)
03090 obmc_edged[0][x] += obmc_edged[b_w-1][x];
03091 for(y=1; y<b_w; y++)
03092 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03093 }
03094 if(mb_y==b_height-1){
03095 for(x=0; x<b_w*2; x++)
03096 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03097 for(y=b_w; y<b_w*2-1; y++)
03098 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03099 }
03100 }
03101
03102
03103 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
03104 uint8_t *src= s-> input_picture.data[0];
03105 uint8_t *dst= s->current_picture.data[0];
03106 const int stride= s->current_picture.linesize[0];
03107 const int block_w= MB_SIZE >> s->block_max_depth;
03108 const int sx= block_w*mb_x - block_w/2;
03109 const int sy= block_w*mb_y - block_w/2;
03110 const int w= s->plane[0].width;
03111 const int h= s->plane[0].height;
03112 int y;
03113
03114 for(y=sy; y<0; y++)
03115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03116 for(y=h; y<sy+block_w*2; y++)
03117 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03118 if(sx<0){
03119 for(y=sy; y<sy+block_w*2; y++)
03120 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03121 }
03122 if(sx+block_w*2 > w){
03123 for(y=sy; y<sy+block_w*2; y++)
03124 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03125 }
03126 }
03127
03128
03129 for(i=0; i<3; i++)
03130 color[i]= get_dc(s, mb_x, mb_y, i);
03131
03132
03133 if(pass > 0 && (block->type&BLOCK_INTRA)){
03134 int color0[3]= {block->color[0], block->color[1], block->color[2]};
03135 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03136 }else
03137 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03138
03139 ref_b= *block;
03140 ref_rd= best_rd;
03141 for(ref=0; ref < s->ref_frames; ref++){
03142 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03143 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2)
03144 continue;
03145 block->ref= ref;
03146 best_rd= INT_MAX;
03147
03148 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03149 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03150 if(tb)
03151 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03152 if(lb)
03153 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03154 if(rb)
03155 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03156 if(bb)
03157 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03158
03159
03160
03161 do{
03162 dia_change=0;
03163 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03164 for(j=0; j<i; j++){
03165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03168 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03169 }
03170 }
03171 }while(dia_change);
03172
03173 do{
03174 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03175 dia_change=0;
03176 for(i=0; i<8; i++)
03177 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03178 }while(dia_change);
03179
03180
03181 mvr[0][0]= block->mx;
03182 mvr[0][1]= block->my;
03183 if(ref_rd > best_rd){
03184 ref_rd= best_rd;
03185 ref_b= *block;
03186 }
03187 }
03188 best_rd= ref_rd;
03189 *block= ref_b;
03190 #if 1
03191 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03192
03193 #endif
03194 if(!same_block(block, &backup)){
03195 if(tb ) tb ->type &= ~BLOCK_OPT;
03196 if(lb ) lb ->type &= ~BLOCK_OPT;
03197 if(rb ) rb ->type &= ~BLOCK_OPT;
03198 if(bb ) bb ->type &= ~BLOCK_OPT;
03199 if(tlb) tlb->type &= ~BLOCK_OPT;
03200 if(trb) trb->type &= ~BLOCK_OPT;
03201 if(blb) blb->type &= ~BLOCK_OPT;
03202 if(brb) brb->type &= ~BLOCK_OPT;
03203 change ++;
03204 }
03205 }
03206 }
03207 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03208 if(!change)
03209 break;
03210 }
03211
03212 if(s->block_max_depth == 1){
03213 int change= 0;
03214 for(mb_y= 0; mb_y<b_height; mb_y+=2){
03215 for(mb_x= 0; mb_x<b_width; mb_x+=2){
03216 int i;
03217 int best_rd, init_rd;
03218 const int index= mb_x + mb_y * b_stride;
03219 BlockNode *b[4];
03220
03221 b[0]= &s->block[index];
03222 b[1]= b[0]+1;
03223 b[2]= b[0]+b_stride;
03224 b[3]= b[2]+1;
03225 if(same_block(b[0], b[1]) &&
03226 same_block(b[0], b[2]) &&
03227 same_block(b[0], b[3]))
03228 continue;
03229
03230 if(!s->me_cache_generation)
03231 memset(s->me_cache, 0, sizeof(s->me_cache));
03232 s->me_cache_generation += 1<<22;
03233
03234 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03235
03236
03237 check_4block_inter(s, mb_x, mb_y,
03238 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03239 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03240
03241 for(i=0; i<4; i++)
03242 if(!(b[i]->type&BLOCK_INTRA))
03243 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03244
03245 if(init_rd != best_rd)
03246 change++;
03247 }
03248 }
03249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03250 }
03251 }
03252
03253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03254 const int w= b->width;
03255 const int h= b->height;
03256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03258 int x,y, thres1, thres2;
03259
03260 if(s->qlog == LOSSLESS_QLOG){
03261 for(y=0; y<h; y++)
03262 for(x=0; x<w; x++)
03263 dst[x + y*stride]= src[x + y*stride];
03264 return;
03265 }
03266
03267 bias= bias ? 0 : (3*qmul)>>3;
03268 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03269 thres2= 2*thres1;
03270
03271 if(!bias){
03272 for(y=0; y<h; y++){
03273 for(x=0; x<w; x++){
03274 int i= src[x + y*stride];
03275
03276 if((unsigned)(i+thres1) > thres2){
03277 if(i>=0){
03278 i<<= QEXPSHIFT;
03279 i/= qmul;
03280 dst[x + y*stride]= i;
03281 }else{
03282 i= -i;
03283 i<<= QEXPSHIFT;
03284 i/= qmul;
03285 dst[x + y*stride]= -i;
03286 }
03287 }else
03288 dst[x + y*stride]= 0;
03289 }
03290 }
03291 }else{
03292 for(y=0; y<h; y++){
03293 for(x=0; x<w; x++){
03294 int i= src[x + y*stride];
03295
03296 if((unsigned)(i+thres1) > thres2){
03297 if(i>=0){
03298 i<<= QEXPSHIFT;
03299 i= (i + bias) / qmul;
03300 dst[x + y*stride]= i;
03301 }else{
03302 i= -i;
03303 i<<= QEXPSHIFT;
03304 i= (i + bias) / qmul;
03305 dst[x + y*stride]= -i;
03306 }
03307 }else
03308 dst[x + y*stride]= 0;
03309 }
03310 }
03311 }
03312 }
03313
03314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03315 const int w= b->width;
03316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03319 int x,y;
03320
03321 if(s->qlog == LOSSLESS_QLOG) return;
03322
03323 for(y=start_y; y<end_y; y++){
03324
03325 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03326 for(x=0; x<w; x++){
03327 int i= line[x];
03328 if(i<0){
03329 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03330 }else if(i>0){
03331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
03332 }
03333 }
03334 }
03335 }
03336
03337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03338 const int w= b->width;
03339 const int h= b->height;
03340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03343 int x,y;
03344
03345 if(s->qlog == LOSSLESS_QLOG) return;
03346
03347 for(y=0; y<h; y++){
03348 for(x=0; x<w; x++){
03349 int i= src[x + y*stride];
03350 if(i<0){
03351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03352 }else if(i>0){
03353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
03354 }
03355 }
03356 }
03357 }
03358
03359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03360 const int w= b->width;
03361 const int h= b->height;
03362 int x,y;
03363
03364 for(y=h-1; y>=0; y--){
03365 for(x=w-1; x>=0; x--){
03366 int i= x + y*stride;
03367
03368 if(x){
03369 if(use_median){
03370 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03371 else src[i] -= src[i - 1];
03372 }else{
03373 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03374 else src[i] -= src[i - 1];
03375 }
03376 }else{
03377 if(y) src[i] -= src[i - stride];
03378 }
03379 }
03380 }
03381 }
03382
03383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03384 const int w= b->width;
03385 int x,y;
03386
03387 IDWTELEM * line=0;
03388 IDWTELEM * prev;
03389
03390 if (start_y != 0)
03391 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03392
03393 for(y=start_y; y<end_y; y++){
03394 prev = line;
03395
03396 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03397 for(x=0; x<w; x++){
03398 if(x){
03399 if(use_median){
03400 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03401 else line[x] += line[x - 1];
03402 }else{
03403 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03404 else line[x] += line[x - 1];
03405 }
03406 }else{
03407 if(y) line[x] += prev[x];
03408 }
03409 }
03410 }
03411 }
03412
03413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03414 const int w= b->width;
03415 const int h= b->height;
03416 int x,y;
03417
03418 for(y=0; y<h; y++){
03419 for(x=0; x<w; x++){
03420 int i= x + y*stride;
03421
03422 if(x){
03423 if(use_median){
03424 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03425 else src[i] += src[i - 1];
03426 }else{
03427 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03428 else src[i] += src[i - 1];
03429 }
03430 }else{
03431 if(y) src[i] += src[i - stride];
03432 }
03433 }
03434 }
03435 }
03436
03437 static void encode_qlogs(SnowContext *s){
03438 int plane_index, level, orientation;
03439
03440 for(plane_index=0; plane_index<2; plane_index++){
03441 for(level=0; level<s->spatial_decomposition_count; level++){
03442 for(orientation=level ? 1:0; orientation<4; orientation++){
03443 if(orientation==2) continue;
03444 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03445 }
03446 }
03447 }
03448 }
03449
03450 static void encode_header(SnowContext *s){
03451 int plane_index, i;
03452 uint8_t kstate[32];
03453
03454 memset(kstate, MID_STATE, sizeof(kstate));
03455
03456 put_rac(&s->c, kstate, s->keyframe);
03457 if(s->keyframe || s->always_reset){
03458 reset_contexts(s);
03459 s->last_spatial_decomposition_type=
03460 s->last_qlog=
03461 s->last_qbias=
03462 s->last_mv_scale=
03463 s->last_block_max_depth= 0;
03464 for(plane_index=0; plane_index<2; plane_index++){
03465 Plane *p= &s->plane[plane_index];
03466 p->last_htaps=0;
03467 p->last_diag_mc=0;
03468 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03469 }
03470 }
03471 if(s->keyframe){
03472 put_symbol(&s->c, s->header_state, s->version, 0);
03473 put_rac(&s->c, s->header_state, s->always_reset);
03474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03475 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03476 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03477 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03478 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03479 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03480 put_rac(&s->c, s->header_state, s->spatial_scalability);
03481
03482 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03483
03484 encode_qlogs(s);
03485 }
03486
03487 if(!s->keyframe){
03488 int update_mc=0;
03489 for(plane_index=0; plane_index<2; plane_index++){
03490 Plane *p= &s->plane[plane_index];
03491 update_mc |= p->last_htaps != p->htaps;
03492 update_mc |= p->last_diag_mc != p->diag_mc;
03493 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03494 }
03495 put_rac(&s->c, s->header_state, update_mc);
03496 if(update_mc){
03497 for(plane_index=0; plane_index<2; plane_index++){
03498 Plane *p= &s->plane[plane_index];
03499 put_rac(&s->c, s->header_state, p->diag_mc);
03500 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03501 for(i= p->htaps/2; i; i--)
03502 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03503 }
03504 }
03505 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03506 put_rac(&s->c, s->header_state, 1);
03507 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03508 encode_qlogs(s);
03509 }else
03510 put_rac(&s->c, s->header_state, 0);
03511 }
03512
03513 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03514 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
03515 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
03516 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
03517 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03518
03519 }
03520
03521 static void update_last_header_values(SnowContext *s){
03522 int plane_index;
03523
03524 if(!s->keyframe){
03525 for(plane_index=0; plane_index<2; plane_index++){
03526 Plane *p= &s->plane[plane_index];
03527 p->last_diag_mc= p->diag_mc;
03528 p->last_htaps = p->htaps;
03529 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03530 }
03531 }
03532
03533 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
03534 s->last_qlog = s->qlog;
03535 s->last_qbias = s->qbias;
03536 s->last_mv_scale = s->mv_scale;
03537 s->last_block_max_depth = s->block_max_depth;
03538 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
03539 }
03540
03541 static void decode_qlogs(SnowContext *s){
03542 int plane_index, level, orientation;
03543
03544 for(plane_index=0; plane_index<3; plane_index++){
03545 for(level=0; level<s->spatial_decomposition_count; level++){
03546 for(orientation=level ? 1:0; orientation<4; orientation++){
03547 int q;
03548 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03549 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03550 else q= get_symbol(&s->c, s->header_state, 1);
03551 s->plane[plane_index].band[level][orientation].qlog= q;
03552 }
03553 }
03554 }
03555 }
03556
03557 static int decode_header(SnowContext *s){
03558 int plane_index, tmp;
03559 uint8_t kstate[32];
03560
03561 memset(kstate, MID_STATE, sizeof(kstate));
03562
03563 s->keyframe= get_rac(&s->c, kstate);
03564 if(s->keyframe || s->always_reset){
03565 reset_contexts(s);
03566 s->spatial_decomposition_type=
03567 s->qlog=
03568 s->qbias=
03569 s->mv_scale=
03570 s->block_max_depth= 0;
03571 }
03572 if(s->keyframe){
03573 s->version= get_symbol(&s->c, s->header_state, 0);
03574 if(s->version>0){
03575 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03576 return -1;
03577 }
03578 s->always_reset= get_rac(&s->c, s->header_state);
03579 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03580 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03581 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03582 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03583 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03584 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03585 s->spatial_scalability= get_rac(&s->c, s->header_state);
03586
03587 tmp= get_symbol(&s->c, s->header_state, 0)+1;
03588 if(tmp < 1 || tmp > MAX_REF_FRAMES){
03589 av_log(s->avctx, AV_LOG_ERROR, "reference frame count is %d\n", tmp);
03590 return -1;
03591 }
03592 s->max_ref_frames= tmp;
03593
03594 decode_qlogs(s);
03595 }
03596
03597 if(!s->keyframe){
03598 if(get_rac(&s->c, s->header_state)){
03599 for(plane_index=0; plane_index<2; plane_index++){
03600 int htaps, i, sum=0;
03601 Plane *p= &s->plane[plane_index];
03602 p->diag_mc= get_rac(&s->c, s->header_state);
03603 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03604 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03605 return -1;
03606 p->htaps= htaps;
03607 for(i= htaps/2; i; i--){
03608 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03609 sum += p->hcoeff[i];
03610 }
03611 p->hcoeff[0]= 32-sum;
03612 }
03613 s->plane[2].diag_mc= s->plane[1].diag_mc;
03614 s->plane[2].htaps = s->plane[1].htaps;
03615 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03616 }
03617 if(get_rac(&s->c, s->header_state)){
03618 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03619 decode_qlogs(s);
03620 }
03621 }
03622
03623 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03624 if(s->spatial_decomposition_type > 1){
03625 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03626 return -1;
03627 }
03628
03629 s->qlog += get_symbol(&s->c, s->header_state, 1);
03630 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
03631 s->qbias += get_symbol(&s->c, s->header_state, 1);
03632 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
03633 if(s->block_max_depth > 1 || s->block_max_depth < 0){
03634 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
03635 s->block_max_depth= 0;
03636 return -1;
03637 }
03638
03639 return 0;
03640 }
03641
03642 static void init_qexp(void){
03643 int i;
03644 double v=128;
03645
03646 for(i=0; i<QROOT; i++){
03647 qexp[i]= lrintf(v);
03648 v *= pow(2, 1.0 / QROOT);
03649 }
03650 }
03651
03652 static av_cold int common_init(AVCodecContext *avctx){
03653 SnowContext *s = avctx->priv_data;
03654 int width, height;
03655 int i, j;
03656
03657 s->avctx= avctx;
03658 s->max_ref_frames=1;
03659
03660 dsputil_init(&s->dsp, avctx);
03661
03662 #define mcf(dx,dy)\
03663 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
03664 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
03665 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
03666 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
03667 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
03668 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
03669
03670 mcf( 0, 0)
03671 mcf( 4, 0)
03672 mcf( 8, 0)
03673 mcf(12, 0)
03674 mcf( 0, 4)
03675 mcf( 4, 4)
03676 mcf( 8, 4)
03677 mcf(12, 4)
03678 mcf( 0, 8)
03679 mcf( 4, 8)
03680 mcf( 8, 8)
03681 mcf(12, 8)
03682 mcf( 0,12)
03683 mcf( 4,12)
03684 mcf( 8,12)
03685 mcf(12,12)
03686
03687 #define mcfh(dx,dy)\
03688 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
03689 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
03690 mc_block_hpel ## dx ## dy ## 16;\
03691 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
03692 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
03693 mc_block_hpel ## dx ## dy ## 8;
03694
03695 mcfh(0, 0)
03696 mcfh(8, 0)
03697 mcfh(0, 8)
03698 mcfh(8, 8)
03699
03700 if(!qexp[0])
03701 init_qexp();
03702
03703
03704
03705 width= s->avctx->width;
03706 height= s->avctx->height;
03707
03708 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
03709 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
03710
03711 for(i=0; i<MAX_REF_FRAMES; i++)
03712 for(j=0; j<MAX_REF_FRAMES; j++)
03713 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
03714
03715 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
03716 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
03717
03718 return 0;
03719 }
03720
03721 static int common_init_after_header(AVCodecContext *avctx){
03722 SnowContext *s = avctx->priv_data;
03723 int plane_index, level, orientation;
03724
03725 for(plane_index=0; plane_index<3; plane_index++){
03726 int w= s->avctx->width;
03727 int h= s->avctx->height;
03728
03729 if(plane_index){
03730 w>>= s->chroma_h_shift;
03731 h>>= s->chroma_v_shift;
03732 }
03733 s->plane[plane_index].width = w;
03734 s->plane[plane_index].height= h;
03735
03736 for(level=s->spatial_decomposition_count-1; level>=0; level--){
03737 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03738 SubBand *b= &s->plane[plane_index].band[level][orientation];
03739
03740 b->buf= s->spatial_dwt_buffer;
03741 b->level= level;
03742 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
03743 b->width = (w + !(orientation&1))>>1;
03744 b->height= (h + !(orientation>1))>>1;
03745
03746 b->stride_line = 1 << (s->spatial_decomposition_count - level);
03747 b->buf_x_offset = 0;
03748 b->buf_y_offset = 0;
03749
03750 if(orientation&1){
03751 b->buf += (w+1)>>1;
03752 b->buf_x_offset = (w+1)>>1;
03753 }
03754 if(orientation>1){
03755 b->buf += b->stride>>1;
03756 b->buf_y_offset = b->stride_line >> 1;
03757 }
03758 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
03759
03760 if(level)
03761 b->parent= &s->plane[plane_index].band[level-1][orientation];
03762
03763 av_freep(&b->x_coeff);
03764 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
03765 }
03766 w= (w+1)>>1;
03767 h= (h+1)>>1;
03768 }
03769 }
03770
03771 return 0;
03772 }
03773
03774 static int qscale2qlog(int qscale){
03775 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
03776 + 61*QROOT/8;
03777 }
03778
03779 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
03780 {
03781
03782
03783
03784 uint32_t coef_sum= 0;
03785 int level, orientation, delta_qlog;
03786
03787 for(level=0; level<s->spatial_decomposition_count; level++){
03788 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03789 SubBand *b= &s->plane[0].band[level][orientation];
03790 IDWTELEM *buf= b->ibuf;
03791 const int w= b->width;
03792 const int h= b->height;
03793 const int stride= b->stride;
03794 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
03795 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03796 const int qdiv= (1<<16)/qmul;
03797 int x, y;
03798
03799 for(y=0; y<h; y++)
03800 for(x=0; x<w; x++)
03801 buf[x+y*stride]= b->buf[x+y*stride];
03802 if(orientation==0)
03803 decorrelate(s, b, buf, stride, 1, 0);
03804 for(y=0; y<h; y++)
03805 for(x=0; x<w; x++)
03806 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
03807 }
03808 }
03809
03810
03811 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
03812 assert(coef_sum < INT_MAX);
03813
03814 if(pict->pict_type == FF_I_TYPE){
03815 s->m.current_picture.mb_var_sum= coef_sum;
03816 s->m.current_picture.mc_mb_var_sum= 0;
03817 }else{
03818 s->m.current_picture.mc_mb_var_sum= coef_sum;
03819 s->m.current_picture.mb_var_sum= 0;
03820 }
03821
03822 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
03823 if (pict->quality < 0)
03824 return INT_MIN;
03825 s->lambda= pict->quality * 3/2;
03826 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
03827 s->qlog+= delta_qlog;
03828 return delta_qlog;
03829 }
03830
03831 static void calculate_visual_weight(SnowContext *s, Plane *p){
03832 int width = p->width;
03833 int height= p->height;
03834 int level, orientation, x, y;
03835
03836 for(level=0; level<s->spatial_decomposition_count; level++){
03837 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03838 SubBand *b= &p->band[level][orientation];
03839 IDWTELEM *ibuf= b->ibuf;
03840 int64_t error=0;
03841
03842 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
03843 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
03844 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
03845 for(y=0; y<height; y++){
03846 for(x=0; x<width; x++){
03847 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
03848 error += d*d;
03849 }
03850 }
03851
03852 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
03853 }
03854 }
03855 }
03856
03857 #define QUANTIZE2 0
03858
03859 #if QUANTIZE2==1
03860 #define Q2_STEP 8
03861
03862 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
03863 SubBand *b= &p->band[level][orientation];
03864 int x, y;
03865 int xo=0;
03866 int yo=0;
03867 int step= 1 << (s->spatial_decomposition_count - level);
03868
03869 if(orientation&1)
03870 xo= step>>1;
03871 if(orientation&2)
03872 yo= step>>1;
03873
03874
03875
03876 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
03877 for(y=0; y<p->height; y++){
03878 for(x=0; x<p->width; x++){
03879 int sx= (x-xo + step/2) / step / Q2_STEP;
03880 int sy= (y-yo + step/2) / step / Q2_STEP;
03881 int v= r0[x + y*p->width] - r1[x + y*p->width];
03882 assert(sx>=0 && sy>=0 && sx < score_stride);
03883 v= ((v+8)>>4)<<4;
03884 score[sx + sy*score_stride] += v*v;
03885 assert(score[sx + sy*score_stride] >= 0);
03886 }
03887 }
03888 }
03889
03890 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
03891 int level, orientation;
03892
03893 for(level=0; level<s->spatial_decomposition_count; level++){
03894 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03895 SubBand *b= &p->band[level][orientation];
03896 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
03897
03898 dequantize(s, b, dst, b->stride);
03899 }
03900 }
03901 }
03902
03903 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
03904 int level, orientation, ys, xs, x, y, pass;
03905 IDWTELEM best_dequant[height * stride];
03906 IDWTELEM idwt2_buffer[height * stride];
03907 const int score_stride= (width + 10)/Q2_STEP;
03908 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03909 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03910 int threshold= (s->m.lambda * s->m.lambda) >> 6;
03911
03912
03913
03914
03915 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
03916
03917 for(level=0; level<s->spatial_decomposition_count; level++){
03918 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03919 SubBand *b= &p->band[level][orientation];
03920 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03921 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
03922 assert(src == b->buf);
03923
03924 quantize(s, b, dst, src, b->stride, s->qbias);
03925 }
03926 }
03927 for(pass=0; pass<1; pass++){
03928 if(s->qbias == 0)
03929 continue;
03930 for(level=0; level<s->spatial_decomposition_count; level++){
03931 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03932 SubBand *b= &p->band[level][orientation];
03933 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
03934 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03935
03936 for(ys= 0; ys<Q2_STEP; ys++){
03937 for(xs= 0; xs<Q2_STEP; xs++){
03938 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03939 dequantize_all(s, p, idwt2_buffer, width, height);
03940 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03941 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03942 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03943 for(y=ys; y<b->height; y+= Q2_STEP){
03944 for(x=xs; x<b->width; x+= Q2_STEP){
03945 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
03946 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
03947
03948 }
03949 }
03950 dequantize_all(s, p, idwt2_buffer, width, height);
03951 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03952 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03953 for(y=ys; y<b->height; y+= Q2_STEP){
03954 for(x=xs; x<b->width; x+= Q2_STEP){
03955 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
03956 if(score[score_idx] <= best_score[score_idx] + threshold){
03957 best_score[score_idx]= score[score_idx];
03958 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
03959 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
03960
03961 }
03962 }
03963 }
03964 }
03965 }
03966 }
03967 }
03968 }
03969 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03970 }
03971
03972 #endif
03973
03974 static av_cold int encode_init(AVCodecContext *avctx)
03975 {
03976 SnowContext *s = avctx->priv_data;
03977 int plane_index;
03978
03979 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
03980 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
03981 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
03982 return -1;
03983 }
03984
03985 if(avctx->prediction_method == DWT_97
03986 && (avctx->flags & CODEC_FLAG_QSCALE)
03987 && avctx->global_quality == 0){
03988 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
03989 return -1;
03990 }
03991
03992 s->spatial_decomposition_type= avctx->prediction_method;
03993
03994 s->chroma_h_shift= 1;
03995 s->chroma_v_shift= 1;
03996
03997 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
03998 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
03999
04000 for(plane_index=0; plane_index<3; plane_index++){
04001 s->plane[plane_index].diag_mc= 1;
04002 s->plane[plane_index].htaps= 6;
04003 s->plane[plane_index].hcoeff[0]= 40;
04004 s->plane[plane_index].hcoeff[1]= -10;
04005 s->plane[plane_index].hcoeff[2]= 2;
04006 s->plane[plane_index].fast_mc= 1;
04007 }
04008
04009 common_init(avctx);
04010 alloc_blocks(s);
04011
04012 s->version=0;
04013
04014 s->m.avctx = avctx;
04015 s->m.flags = avctx->flags;
04016 s->m.bit_rate= avctx->bit_rate;
04017
04018 s->m.me.temp =
04019 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
04020 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04021 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04022 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
04023 h263_encode_init(&s->m);
04024
04025 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
04026
04027 if(avctx->flags&CODEC_FLAG_PASS1){
04028 if(!avctx->stats_out)
04029 avctx->stats_out = av_mallocz(256);
04030 }
04031 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
04032 if(ff_rate_control_init(&s->m) < 0)
04033 return -1;
04034 }
04035 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
04036
04037 avctx->coded_frame= &s->current_picture;
04038 switch(avctx->pix_fmt){
04039
04040
04041 case PIX_FMT_YUV420P:
04042 case PIX_FMT_GRAY8:
04043
04044
04045 s->colorspace_type= 0;
04046 break;
04047
04048
04049
04050 default:
04051 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
04052 return -1;
04053 }
04054
04055 s->chroma_h_shift= 1;
04056 s->chroma_v_shift= 1;
04057
04058 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
04059 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
04060
04061 s->avctx->get_buffer(s->avctx, &s->input_picture);
04062
04063 if(s->avctx->me_method == ME_ITER){
04064 int i;
04065 int size= s->b_width * s->b_height << 2*s->block_max_depth;
04066 for(i=0; i<s->max_ref_frames; i++){
04067 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
04068 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
04069 }
04070 }
04071
04072 return 0;
04073 }
04074
04075 #define USE_HALFPEL_PLANE 0
04076
04077 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
04078 int p,x,y;
04079
04080 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
04081
04082 for(p=0; p<3; p++){
04083 int is_chroma= !!p;
04084 int w= s->avctx->width >>is_chroma;
04085 int h= s->avctx->height >>is_chroma;
04086 int ls= frame->linesize[p];
04087 uint8_t *src= frame->data[p];
04088
04089 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04090 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04091 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04092
04093 halfpel[0][p]= src;
04094 for(y=0; y<h; y++){
04095 for(x=0; x<w; x++){
04096 int i= y*ls + x;
04097
04098 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
04099 }
04100 }
04101 for(y=0; y<h; y++){
04102 for(x=0; x<w; x++){
04103 int i= y*ls + x;
04104
04105 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04106 }
04107 }
04108 src= halfpel[1][p];
04109 for(y=0; y<h; y++){
04110 for(x=0; x<w; x++){
04111 int i= y*ls + x;
04112
04113 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04114 }
04115 }
04116
04117
04118 }
04119 }
04120
04121 static int frame_start(SnowContext *s){
04122 AVFrame tmp;
04123 int w= s->avctx->width;
04124 int h= s->avctx->height;
04125
04126 if(s->current_picture.data[0]){
04127 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
04128 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
04129 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
04130 }
04131
04132 tmp= s->last_picture[s->max_ref_frames-1];
04133 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
04134 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
04135 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
04136 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
04137 s->last_picture[0]= s->current_picture;
04138 s->current_picture= tmp;
04139
04140 if(s->keyframe){
04141 s->ref_frames= 0;
04142 }else{
04143 int i;
04144 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
04145 if(i && s->last_picture[i-1].key_frame)
04146 break;
04147 s->ref_frames= i;
04148 }
04149
04150 s->current_picture.reference= 1;
04151 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
04152 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
04153 return -1;
04154 }
04155
04156 s->current_picture.key_frame= s->keyframe;
04157
04158 return 0;
04159 }
04160
04161 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
04162 SnowContext *s = avctx->priv_data;
04163 RangeCoder * const c= &s->c;
04164 AVFrame *pict = data;
04165 const int width= s->avctx->width;
04166 const int height= s->avctx->height;
04167 int level, orientation, plane_index, i, y;
04168 uint8_t rc_header_bak[sizeof(s->header_state)];
04169 uint8_t rc_block_bak[sizeof(s->block_state)];
04170
04171 ff_init_range_encoder(c, buf, buf_size);
04172 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04173
04174 for(i=0; i<3; i++){
04175 int shift= !!i;
04176 for(y=0; y<(height>>shift); y++)
04177 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
04178 &pict->data[i][y * pict->linesize[i]],
04179 width>>shift);
04180 }
04181 s->new_picture = *pict;
04182
04183 s->m.picture_number= avctx->frame_number;
04184 if(avctx->flags&CODEC_FLAG_PASS2){
04185 s->m.pict_type =
04186 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
04187 s->keyframe= pict->pict_type==FF_I_TYPE;
04188 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
04189 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
04190 if (pict->quality < 0)
04191 return -1;
04192 }
04193 }else{
04194 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
04195 s->m.pict_type=
04196 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
04197 }
04198
04199 if(s->pass1_rc && avctx->frame_number == 0)
04200 pict->quality= 2*FF_QP2LAMBDA;
04201 if(pict->quality){
04202 s->qlog= qscale2qlog(pict->quality);
04203 s->lambda = pict->quality * 3/2;
04204 }
04205 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
04206 s->qlog= LOSSLESS_QLOG;
04207 s->lambda = 0;
04208 }
04209
04210 frame_start(s);
04211
04212 s->m.current_picture_ptr= &s->m.current_picture;
04213 if(pict->pict_type == FF_P_TYPE){
04214 int block_width = (width +15)>>4;
04215 int block_height= (height+15)>>4;
04216 int stride= s->current_picture.linesize[0];
04217
04218 assert(s->current_picture.data[0]);
04219 assert(s->last_picture[0].data[0]);
04220
04221 s->m.avctx= s->avctx;
04222 s->m.current_picture.data[0]= s->current_picture.data[0];
04223 s->m. last_picture.data[0]= s->last_picture[0].data[0];
04224 s->m. new_picture.data[0]= s-> input_picture.data[0];
04225 s->m. last_picture_ptr= &s->m. last_picture;
04226 s->m.linesize=
04227 s->m. last_picture.linesize[0]=
04228 s->m. new_picture.linesize[0]=
04229 s->m.current_picture.linesize[0]= stride;
04230 s->m.uvlinesize= s->current_picture.linesize[1];
04231 s->m.width = width;
04232 s->m.height= height;
04233 s->m.mb_width = block_width;
04234 s->m.mb_height= block_height;
04235 s->m.mb_stride= s->m.mb_width+1;
04236 s->m.b8_stride= 2*s->m.mb_width+1;
04237 s->m.f_code=1;
04238 s->m.pict_type= pict->pict_type;
04239 s->m.me_method= s->avctx->me_method;
04240 s->m.me.scene_change_score=0;
04241 s->m.flags= s->avctx->flags;
04242 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
04243 s->m.out_format= FMT_H263;
04244 s->m.unrestricted_mv= 1;
04245
04246 s->m.lambda = s->lambda;
04247 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
04248 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
04249
04250 s->m.dsp= s->dsp;
04251 ff_init_me(&s->m);
04252 s->dsp= s->m.dsp;
04253 }
04254
04255 if(s->pass1_rc){
04256 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
04257 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
04258 }
04259
04260 redo_frame:
04261
04262 if(pict->pict_type == FF_I_TYPE)
04263 s->spatial_decomposition_count= 5;
04264 else
04265 s->spatial_decomposition_count= 5;
04266
04267 s->m.pict_type = pict->pict_type;
04268 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
04269
04270 common_init_after_header(avctx);
04271
04272 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
04273 for(plane_index=0; plane_index<3; plane_index++){
04274 calculate_visual_weight(s, &s->plane[plane_index]);
04275 }
04276 }
04277
04278 encode_header(s);
04279 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04280 encode_blocks(s, 1);
04281 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
04282
04283 for(plane_index=0; plane_index<3; plane_index++){
04284 Plane *p= &s->plane[plane_index];
04285 int w= p->width;
04286 int h= p->height;
04287 int x, y;
04288
04289
04290 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
04291
04292 if(pict->data[plane_index])
04293 for(y=0; y<h; y++){
04294 for(x=0; x<w; x++){
04295 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
04296 }
04297 }
04298 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
04299
04300 if( plane_index==0
04301 && pict->pict_type == FF_P_TYPE
04302 && !(avctx->flags&CODEC_FLAG_PASS2)
04303 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
04304 ff_init_range_encoder(c, buf, buf_size);
04305 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04306 pict->pict_type= FF_I_TYPE;
04307 s->keyframe=1;
04308 s->current_picture.key_frame=1;
04309 goto redo_frame;
04310 }
04311
04312 if(s->qlog == LOSSLESS_QLOG){
04313 for(y=0; y<h; y++){
04314 for(x=0; x<w; x++){
04315 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
04316 }
04317 }
04318 }else{
04319 for(y=0; y<h; y++){
04320 for(x=0; x<w; x++){
04321 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
04322 }
04323 }
04324 }
04325
04326
04327
04328
04329 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04330
04331 if(s->pass1_rc && plane_index==0){
04332 int delta_qlog = ratecontrol_1pass(s, pict);
04333 if (delta_qlog <= INT_MIN)
04334 return -1;
04335 if(delta_qlog){
04336
04337 ff_init_range_encoder(c, buf, buf_size);
04338 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
04339 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
04340 encode_header(s);
04341 encode_blocks(s, 0);
04342 }
04343 }
04344
04345 for(level=0; level<s->spatial_decomposition_count; level++){
04346 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04347 SubBand *b= &p->band[level][orientation];
04348
04349 if(!QUANTIZE2)
04350 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
04351 if(orientation==0)
04352 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
04353 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
04354 assert(b->parent==NULL || b->parent->stride == b->stride*2);
04355 if(orientation==0)
04356 correlate(s, b, b->ibuf, b->stride, 1, 0);
04357 }
04358 }
04359
04360 for(level=0; level<s->spatial_decomposition_count; level++){
04361 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04362 SubBand *b= &p->band[level][orientation];
04363
04364 dequantize(s, b, b->ibuf, b->stride);
04365 }
04366 }
04367
04368 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04369 if(s->qlog == LOSSLESS_QLOG){
04370 for(y=0; y<h; y++){
04371 for(x=0; x<w; x++){
04372 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
04373 }
04374 }
04375 }
04376 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04377 }else{
04378
04379 if(pict->pict_type == FF_I_TYPE){
04380 for(y=0; y<h; y++){
04381 for(x=0; x<w; x++){
04382 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
04383 pict->data[plane_index][y*pict->linesize[plane_index] + x];
04384 }
04385 }
04386 }else{
04387 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
04388 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04389 }
04390 }
04391 if(s->avctx->flags&CODEC_FLAG_PSNR){
04392 int64_t error= 0;
04393
04394 if(pict->data[plane_index])
04395 for(y=0; y<h; y++){
04396 for(x=0; x<w; x++){
04397 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
04398 error += d*d;
04399 }
04400 }
04401 s->avctx->error[plane_index] += error;
04402 s->current_picture.error[plane_index] = error;
04403 }
04404
04405 }
04406
04407 update_last_header_values(s);
04408
04409 if(s->last_picture[s->max_ref_frames-1].data[0]){
04410 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04411 for(i=0; i<9; i++)
04412 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04413 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04414 }
04415
04416 s->current_picture.coded_picture_number = avctx->frame_number;
04417 s->current_picture.pict_type = pict->pict_type;
04418 s->current_picture.quality = pict->quality;
04419 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04420 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
04421 s->m.current_picture.display_picture_number =
04422 s->m.current_picture.coded_picture_number = avctx->frame_number;
04423 s->m.current_picture.quality = pict->quality;
04424 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
04425 if(s->pass1_rc)
04426 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
04427 return -1;
04428 if(avctx->flags&CODEC_FLAG_PASS1)
04429 ff_write_pass1_stats(&s->m);
04430 s->m.last_pict_type = s->m.pict_type;
04431 avctx->frame_bits = s->m.frame_bits;
04432 avctx->mv_bits = s->m.mv_bits;
04433 avctx->misc_bits = s->m.misc_bits;
04434 avctx->p_tex_bits = s->m.p_tex_bits;
04435
04436 emms_c();
04437
04438 return ff_rac_terminate(c);
04439 }
04440
04441 static av_cold void common_end(SnowContext *s){
04442 int plane_index, level, orientation, i;
04443
04444 av_freep(&s->spatial_dwt_buffer);
04445 av_freep(&s->spatial_idwt_buffer);
04446
04447 s->m.me.temp= NULL;
04448 av_freep(&s->m.me.scratchpad);
04449 av_freep(&s->m.me.map);
04450 av_freep(&s->m.me.score_map);
04451 av_freep(&s->m.obmc_scratchpad);
04452
04453 av_freep(&s->block);
04454 av_freep(&s->scratchbuf);
04455
04456 for(i=0; i<MAX_REF_FRAMES; i++){
04457 av_freep(&s->ref_mvs[i]);
04458 av_freep(&s->ref_scores[i]);
04459 if(s->last_picture[i].data[0])
04460 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
04461 }
04462
04463 for(plane_index=0; plane_index<3; plane_index++){
04464 for(level=s->spatial_decomposition_count-1; level>=0; level--){
04465 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04466 SubBand *b= &s->plane[plane_index].band[level][orientation];
04467
04468 av_freep(&b->x_coeff);
04469 }
04470 }
04471 }
04472 }
04473
04474 static av_cold int encode_end(AVCodecContext *avctx)
04475 {
04476 SnowContext *s = avctx->priv_data;
04477
04478 common_end(s);
04479 av_free(avctx->stats_out);
04480
04481 return 0;
04482 }
04483
04484 static av_cold int decode_init(AVCodecContext *avctx)
04485 {
04486 avctx->pix_fmt= PIX_FMT_YUV420P;
04487
04488 common_init(avctx);
04489
04490 return 0;
04491 }
04492
04493 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
04494 SnowContext *s = avctx->priv_data;
04495 RangeCoder * const c= &s->c;
04496 int bytes_read;
04497 AVFrame *picture = data;
04498 int level, orientation, plane_index, i;
04499
04500 ff_init_range_decoder(c, buf, buf_size);
04501 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04502
04503 s->current_picture.pict_type= FF_I_TYPE;
04504 if(decode_header(s)<0)
04505 return -1;
04506 common_init_after_header(avctx);
04507
04508
04509 slice_buffer_destroy(&s->sb);
04510 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
04511
04512 for(plane_index=0; plane_index<3; plane_index++){
04513 Plane *p= &s->plane[plane_index];
04514 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
04515 && p->hcoeff[1]==-10
04516 && p->hcoeff[2]==2;
04517 }
04518
04519 alloc_blocks(s);
04520
04521 frame_start(s);
04522
04523 if(avctx->debug&FF_DEBUG_PICT_INFO)
04524 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
04525
04526 decode_blocks(s);
04527
04528 for(plane_index=0; plane_index<3; plane_index++){
04529 Plane *p= &s->plane[plane_index];
04530 int w= p->width;
04531 int h= p->height;
04532 int x, y;
04533 int decode_state[MAX_DECOMPOSITIONS][4][1];
04534
04535 if(s->avctx->debug&2048){
04536 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
04537 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04538
04539 for(y=0; y<h; y++){
04540 for(x=0; x<w; x++){
04541 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
04542 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
04543 }
04544 }
04545 }
04546
04547 {
04548 for(level=0; level<s->spatial_decomposition_count; level++){
04549 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04550 SubBand *b= &p->band[level][orientation];
04551 unpack_coeffs(s, b, b->parent, orientation);
04552 }
04553 }
04554 }
04555
04556 {
04557 const int mb_h= s->b_height << s->block_max_depth;
04558 const int block_size = MB_SIZE >> s->block_max_depth;
04559 const int block_w = plane_index ? block_size/2 : block_size;
04560 int mb_y;
04561 DWTCompose cs[MAX_DECOMPOSITIONS];
04562 int yd=0, yq=0;
04563 int y;
04564 int end_y;
04565
04566 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
04567 for(mb_y=0; mb_y<=mb_h; mb_y++){
04568
04569 int slice_starty = block_w*mb_y;
04570 int slice_h = block_w*(mb_y+1);
04571 if (!(s->keyframe || s->avctx->debug&512)){
04572 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
04573 slice_h -= (block_w >> 1);
04574 }
04575
04576 for(level=0; level<s->spatial_decomposition_count; level++){
04577 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04578 SubBand *b= &p->band[level][orientation];
04579 int start_y;
04580 int end_y;
04581 int our_mb_start = mb_y;
04582 int our_mb_end = (mb_y + 1);
04583 const int extra= 3;
04584 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
04585 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
04586 if (!(s->keyframe || s->avctx->debug&512)){
04587 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04588 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04589 }
04590 start_y = FFMIN(b->height, start_y);
04591 end_y = FFMIN(b->height, end_y);
04592
04593 if (start_y != end_y){
04594 if (orientation == 0){
04595 SubBand * correlate_band = &p->band[0][0];
04596 int correlate_end_y = FFMIN(b->height, end_y + 1);
04597 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
04598 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
04599 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
04600 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
04601 }
04602 else
04603 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
04604 }
04605 }
04606 }
04607
04608 for(; yd<slice_h; yd+=4){
04609 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
04610 }
04611
04612 if(s->qlog == LOSSLESS_QLOG){
04613 for(; yq<slice_h && yq<h; yq++){
04614 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
04615 for(x=0; x<w; x++){
04616 line[x] <<= FRAC_BITS;
04617 }
04618 }
04619 }
04620
04621 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
04622
04623 y = FFMIN(p->height, slice_starty);
04624 end_y = FFMIN(p->height, slice_h);
04625 while(y < end_y)
04626 slice_buffer_release(&s->sb, y++);
04627 }
04628
04629 slice_buffer_flush(&s->sb);
04630 }
04631
04632 }
04633
04634 emms_c();
04635
04636 if(s->last_picture[s->max_ref_frames-1].data[0]){
04637 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04638 for(i=0; i<9; i++)
04639 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04640 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04641 }
04642
04643 if(!(s->avctx->debug&2048))
04644 *picture= s->current_picture;
04645 else
04646 *picture= s->mconly_picture;
04647
04648 *data_size = sizeof(AVFrame);
04649
04650 bytes_read= c->bytestream - c->bytestream_start;
04651 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
04652
04653 return bytes_read;
04654 }
04655
04656 static av_cold int decode_end(AVCodecContext *avctx)
04657 {
04658 SnowContext *s = avctx->priv_data;
04659
04660 slice_buffer_destroy(&s->sb);
04661
04662 common_end(s);
04663
04664 return 0;
04665 }
04666
04667 AVCodec snow_decoder = {
04668 "snow",
04669 CODEC_TYPE_VIDEO,
04670 CODEC_ID_SNOW,
04671 sizeof(SnowContext),
04672 decode_init,
04673 NULL,
04674 decode_end,
04675 decode_frame,
04676 0 ,
04677 NULL,
04678 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04679 };
04680
04681 #if CONFIG_SNOW_ENCODER
04682 AVCodec snow_encoder = {
04683 "snow",
04684 CODEC_TYPE_VIDEO,
04685 CODEC_ID_SNOW,
04686 sizeof(SnowContext),
04687 encode_init,
04688 encode_frame,
04689 encode_end,
04690 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04691 };
04692 #endif
04693
04694
04695 #ifdef TEST
04696 #undef malloc
04697 #undef free
04698 #undef printf
04699 #undef random
04700
04701 int main(void){
04702 int width=256;
04703 int height=256;
04704 int buffer[2][width*height];
04705 SnowContext s;
04706 int i;
04707 s.spatial_decomposition_count=6;
04708 s.spatial_decomposition_type=1;
04709
04710 printf("testing 5/3 DWT\n");
04711 for(i=0; i<width*height; i++)
04712 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04713
04714 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04715 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04716
04717 for(i=0; i<width*height; i++)
04718 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04719
04720 printf("testing 9/7 DWT\n");
04721 s.spatial_decomposition_type=0;
04722 for(i=0; i<width*height; i++)
04723 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04724
04725 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04726 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04727
04728 for(i=0; i<width*height; i++)
04729 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04730
04731 #if 0
04732 printf("testing AC coder\n");
04733 memset(s.header_state, 0, sizeof(s.header_state));
04734 ff_init_range_encoder(&s.c, buffer[0], 256*256);
04735 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04736
04737 for(i=-256; i<256; i++){
04738 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
04739 }
04740 ff_rac_terminate(&s.c);
04741
04742 memset(s.header_state, 0, sizeof(s.header_state));
04743 ff_init_range_decoder(&s.c, buffer[0], 256*256);
04744 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04745
04746 for(i=-256; i<256; i++){
04747 int j;
04748 j= get_symbol(&s.c, s.header_state, 1);
04749 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
04750 }
04751 #endif
04752 {
04753 int level, orientation, x, y;
04754 int64_t errors[8][4];
04755 int64_t g=0;
04756
04757 memset(errors, 0, sizeof(errors));
04758 s.spatial_decomposition_count=3;
04759 s.spatial_decomposition_type=0;
04760 for(level=0; level<s.spatial_decomposition_count; level++){
04761 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04762 int w= width >> (s.spatial_decomposition_count-level);
04763 int h= height >> (s.spatial_decomposition_count-level);
04764 int stride= width << (s.spatial_decomposition_count-level);
04765 DWTELEM *buf= buffer[0];
04766 int64_t error=0;
04767
04768 if(orientation&1) buf+=w;
04769 if(orientation>1) buf+=stride>>1;
04770
04771 memset(buffer[0], 0, sizeof(int)*width*height);
04772 buf[w/2 + h/2*stride]= 256*256;
04773 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04774 for(y=0; y<height; y++){
04775 for(x=0; x<width; x++){
04776 int64_t d= buffer[0][x + y*width];
04777 error += d*d;
04778 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
04779 }
04780 if(FFABS(height/2-y)<9 && level==2) printf("\n");
04781 }
04782 error= (int)(sqrt(error)+0.5);
04783 errors[level][orientation]= error;
04784 if(g) g=av_gcd(g, error);
04785 else g= error;
04786 }
04787 }
04788 printf("static int const visual_weight[][4]={\n");
04789 for(level=0; level<s.spatial_decomposition_count; level++){
04790 printf(" {");
04791 for(orientation=0; orientation<4; orientation++){
04792 printf("%8"PRId64",", errors[level][orientation]/g);
04793 }
04794 printf("},\n");
04795 }
04796 printf("};\n");
04797 {
04798 int level=2;
04799 int w= width >> (s.spatial_decomposition_count-level);
04800
04801 int stride= width << (s.spatial_decomposition_count-level);
04802 DWTELEM *buf= buffer[0];
04803 int64_t error=0;
04804
04805 buf+=w;
04806 buf+=stride>>1;
04807
04808 memset(buffer[0], 0, sizeof(int)*width*height);
04809 #if 1
04810 for(y=0; y<height; y++){
04811 for(x=0; x<width; x++){
04812 int tab[4]={0,2,3,1};
04813 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
04814 }
04815 }
04816 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04817 #else
04818 for(y=0; y<h; y++){
04819 for(x=0; x<w; x++){
04820 buf[x + y*stride ]=169;
04821 buf[x + y*stride-w]=64;
04822 }
04823 }
04824 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04825 #endif
04826 for(y=0; y<height; y++){
04827 for(x=0; x<width; x++){
04828 int64_t d= buffer[0][x + y*width];
04829 error += d*d;
04830 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
04831 }
04832 if(FFABS(height/2-y)<9) printf("\n");
04833 }
04834 }
04835
04836 }
04837 return 0;
04838 }
04839 #endif