00001
00002
00003
00004
00005
00006
00007
00008 #ifndef ZenLib_MemoryUtilsH
00009 #define ZenLib_MemoryUtilsH
00010
00011
00012
00013 #include "ZenLib/Conf.h"
00014 #include "ZenLib/Conf.h"
00015
00016
00017 #include <cstring>
00018 #ifdef ZENLIB_MEMUTILS_SSE2
00019 #include <emmintrin.h>
00020 #endif //ZENLIB_MEMUTILS_SSE2
00021
00022 namespace ZenLib
00023 {
00024
00025 #ifndef ZENLIB_MEMUTILS_SSE2
00026
00027
00028 #define malloc_Aligned128 (size) \
00029 malloc (size)
00030 #define free_Aligned128 (ptr) \
00031 free (ptr)
00032
00033
00034
00035 #define memcpy_Unaligned_Unaligned memcpy
00036 #define memcpy_Aligned128_Unaligned memcpy
00037
00038
00039
00040 #define memcpy_Unaligned_Aligned128 memcpy
00041 #define memcpy_Aligned128_Aligned128 memcpy
00042
00043
00044
00045 #define memcpy_Unaligned_Unaligned_Once128 memcpy
00046
00047
00048
00049 #define memcpy_Aligned128_Aligned128_Once128 memcpy
00050
00051
00052
00053 #define memcpy_Unaligned_Unaligned_Once1024 memcpy
00054
00055
00056
00057 #define memcpy_Aligned128_Aligned128_Once1024 memcpy
00058
00059
00060
00061 #define memcpy_Unaligned_Aligned128_Size128 memcpy
00062 #define memcpy_Aligned128_Aligned128_Size128 memcpy
00063
00064 #else // ZENLIB_MEMUTILS_SSE2
00065
00066
00067
00068
00069 inline void* malloc_Aligned128 (size_t size)
00070 {
00071 return _aligned_malloc (size, 16);
00072 }
00073
00074 inline void free_Aligned128 ( void *ptr )
00075 {
00076 _aligned_free (ptr);
00077 }
00078
00079
00080
00081
00082 inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num)
00083 {
00084 size_t extra=num&0xF;
00085 __m128i* destination16=(__m128i*)destination;
00086 const __m128i* source16=(const __m128i*)source;
00087
00088 num>>=4;
00089 while (num--)
00090 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00091
00092 char* destination1=(char*)destination16;
00093 char* source1=(char*)source16;
00094 while (extra--)
00095 *destination1++=*source1++;
00096 }
00097
00098 inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num)
00099 {
00100 size_t extra=num&0xF;
00101 __m128i* destination16=(__m128i*)destination;
00102 const __m128i* source16=(const __m128i*)source;
00103
00104 num>>=4;
00105 while (num--)
00106 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
00107
00108 char* destination1=(char*)destination16;
00109 char* source1=(char*)source16;
00110 while (extra--)
00111 *destination1++=*source1++;
00112 }
00113
00114
00115
00116
00117 inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num)
00118 {
00119 size_t extra=num&0xF;
00120 __m128i* destination16=(__m128i*)destination;
00121 const __m128i* source16=(const __m128i*)source;
00122
00123 num>>=4;
00124 while (num--)
00125 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
00126
00127 char* destination1=(char*)destination16;
00128 char* source1=(char*)source16;
00129 while (extra--)
00130 *destination1++=*source1++;
00131 }
00132
00133
00134
00135
00136 inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source)
00137 {
00138 _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source));
00139 }
00140
00141
00142
00143
00144 inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num)
00145 {
00146 size_t extra=num&0xF;
00147 __m128i* destination16=(__m128i*)destination;
00148 const __m128i* source16=(const __m128i*)source;
00149
00150 num>>=4;
00151 while (num--)
00152 _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
00153
00154 char* destination1=(char*)destination16;
00155 char* source1=(char*)source16;
00156 while (extra--)
00157 *destination1++=*source1++;
00158 }
00159
00160 inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num)
00161 {
00162 __m128i* destination16=(__m128i*)destination;
00163 const __m128i* source16=(__m128i*)source;
00164
00165 num>>=4;
00166 while (num--)
00167 _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
00168 }
00169
00170
00171
00172
00173 inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t)
00174 {
00175 __m128i* destination16=(__m128i*)destination;
00176 const __m128i* source16=(__m128i*)source;
00177
00178 size_t num=8;
00179 while (num--)
00180 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00181 }
00182
00183
00184
00185
00186 inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source)
00187 {
00188 _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source));
00189 }
00190
00191
00192
00193
00194 inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num)
00195 {
00196 __m128i* destination16=(__m128i*)destination;
00197 const __m128i* source16=(const __m128i*)source;
00198
00199 num>>=4;
00200 while (num--)
00201 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00202 }
00203
00204 inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num)
00205 {
00206 __m128i* destination16=(__m128i*)destination;
00207 const __m128i* source16=(__m128i*)source;
00208
00209 num>>=4;
00210 while (num--)
00211 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
00212 }
00213
00214
00215
00216
00217 inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num)
00218 {
00219 __m128i* destination16=(__m128i*)destination;
00220 const __m128i* source16=(__m128i*)source;
00221
00222 num>>=4;
00223 while (num--)
00224 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
00225 }
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 #endif // ZENLIB_MEMUTILS_SSE2
00259
00260 }
00261
00262 #endif