diff --git a/CMakeLists.txt b/CMakeLists.txt index ee7db5d5c..a21010e8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,11 @@ add_subdirectory(vgui2/vgui_controls) add_subdirectory(vgui2/vgui_surfacelib) add_subdirectory(soundsystem/lowlevel) add_subdirectory(thirdparty/quickhull) -add_subdirectory(thirdparty/gperftools-2.8.1) #We include this version instead of distro-pkg because there is a false positive in ASAN +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k") + # We use sustem gperftools-2.5 on OS Elbrus +else() + add_subdirectory(thirdparty/gperftools-2.8.1) #We include this version instead of distro-pkg because there is a false positive in ASAN +endif() add_subdirectory(thirdparty/protobuf-2.5.0/cmake) add_subdirectory(utils/bzip2) add_subdirectory(utils/jpeglib) diff --git a/cmake/source_exe_posix_base.cmake b/cmake/source_exe_posix_base.cmake index 62df57e10..551f6ea13 100644 --- a/cmake/source_exe_posix_base.cmake +++ b/cmake/source_exe_posix_base.cmake @@ -42,7 +42,11 @@ if( LINUXALL AND NOT DEDICATED ) if( LINUX64 ) #target_link_libraries(${OUTBINNAME} "${SRCDIR}/thirdparty/gperftools-2.0/.libs/x86_64/libtcmalloc_minimal.so")# [$LINUX64] #SWITCH BACK to a new version in /thirdparty. Unfortunately ASAN detects a false positive in this library and we need to edit the source. - target_link_libraries(${OUTBINNAME} tcmalloc_minimal) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k") + target_link_libraries(${OUTBINNAME} "/usr/lib/libtcmalloc_minimal.so.4.3.0") # use sustem gperftools-2.5 on OS Elbrus + else() + target_link_libraries(${OUTBINNAME} tcmalloc_minimal) + endif() else() #$ImpLibExternal "$SRCDIR/thirdparty/gperftools-2.0/.libs/tcmalloc_minimal" [$LINUX32] message(FATAL_ERROR "linux32 not supported in cmake") diff --git a/cmake/source_posix_base.cmake b/cmake/source_posix_base.cmake index 57baf2d73..1140ce425 100644 --- a/cmake/source_posix_base.cmake +++ b/cmake/source_posix_base.cmake @@ -43,7 +43,12 @@ else() message("^^ Not Setting -O for Target") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}") + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k") + # O3 on mcst-lcc approximately equal to O2 at gcc X86/ARM + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 ${LINUX_FLAGS_COMMON}") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}") + endif() endif() endif() endif() diff --git a/engine/engine_inc.cmake b/engine/engine_inc.cmake index 88251372d..e5cafb550 100644 --- a/engine/engine_inc.cmake +++ b/engine/engine_inc.cmake @@ -50,7 +50,11 @@ if( LINUXALL AND (NOT DEDICATED) ) target_link_libraries(${OUTBINNAME} SDL2 rt openal) endif() if( LINUXALL ) - target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib32 -L/usr/lib) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k") + target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib) + else() + target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib32 -L/usr/lib) + endif() target_compile_options(${OUTBINNAME} PRIVATE -Wno-narrowing -fpermissive) #downgrade some errors to fix build endif() diff --git a/ivp/ivp_collision/ivp_compact_ledge.hxx b/ivp/ivp_collision/ivp_compact_ledge.hxx index 176a35c82..4239d56a3 100755 --- a/ivp/ivp_collision/ivp_compact_ledge.hxx +++ b/ivp/ivp_collision/ivp_compact_ledge.hxx @@ -255,7 +255,7 @@ const IVP_Compact_Triangle *IVP_Compact_Edge::get_triangle() const //lwss - x64 fixes ( original is x86 ) #if defined(__i386__) return (IVP_Compact_Triangle *)(((unsigned int)this) & 0xfffffff0); -#elif defined( __x86_64__ ) +#elif defined(__x86_64__) || defined(__e2k__) return (IVP_Compact_Triangle *)(((unsigned long int)this) & 0xFFFFFFFFFFFFFFF0); #else #error fix this for your platform diff --git a/materialsystem/shadersystem.cpp b/materialsystem/shadersystem.cpp index f5c7a9927..bfe6297df 100644 --- a/materialsystem/shadersystem.cpp +++ b/materialsystem/shadersystem.cpp @@ -315,14 +315,14 @@ void CShaderSystem::LoadAllShaderDLLs( ) #if defined( _PS3 ) || defined( _OSX ) LoadShaderDLL( "stdshader_dx9" DLL_EXT_STRING ); #else // _PS3 || _OSX - +#ifndef __e2k__ // Don't load stdshader_dbg module on Elbrus (prevent "Module stdshader_dbg failed to load! Error: ((null))" message) // 360 has the the debug shaders in its dx9 dll if ( IsPC() || !IsX360() ) { // Always need the debug shaders LoadShaderDLL( "stdshader_dbg" ); } - +#endif // Load up standard shader DLLs... int dxSupportLevel = HardwareConfig()->GetMaxDXSupportLevel(); Assert( dxSupportLevel >= 60 ); diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp index 73b2343c8..1f42cda82 100644 --- a/mathlib/sse.cpp +++ b/mathlib/sse.cpp @@ -80,7 +80,54 @@ void __cdecl _SSE_VectorMA( const float *start, float scale, const float *direc // SSE implementations of optimized routines: //----------------------------------------------------------------------------- +#ifdef POSIX +const __m128 f3 = _mm_set_ss(3.0f); // 3 as SSE value +const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value +#endif + +float _SSE_RSqrtAccurate(float a) +{ + +#ifdef _WIN32 + float x; + float half = 0.5f; + float three = 3.f; + + __asm + { + movss xmm3, a; + movss xmm1, half; + movss xmm2, three; + rsqrtss xmm0, xmm3; + + mulss xmm3, xmm0; + mulss xmm1, xmm0; + mulss xmm3, xmm0; + subss xmm2, xmm3; + mulss xmm1, xmm2; + + movss x, xmm1; + } + return x; +#elif POSIX + __m128 xx = _mm_load_ss( &a ); + __m128 xr = _mm_rsqrt_ss( xx ); + __m128 xt; + + xt = _mm_mul_ss( xr, xr ); + xt = _mm_mul_ss( xt, xx ); + xt = _mm_sub_ss( f3, xt ); + xt = _mm_mul_ss( xt, f05 ); + xr = _mm_mul_ss( xr, xt ); + + _mm_store_ss( &a, xr ); + return a; +#else + #error "Not Implemented" +#endif + +} float FASTCALL _SSE_VectorNormalize (Vector& vec) { @@ -91,7 +138,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec) #ifdef _WIN32 __declspec(align(16)) float result[4]; #elif POSIX - float result[4] __attribute__((aligned(16))); + float result[4] __attribute__((aligned(16))); #endif float *v = &vec[0]; @@ -133,7 +180,11 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec) r[ 0 ] = vec.x * recipSqrt; r[ 1 ] = vec.y * recipSqrt; r[ 2 ] = vec.z * recipSqrt; - +#elif defined __e2k__ + float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] ); + r[0] = v[0] * rsqrt; + r[1] = v[1] * rsqrt; + r[2] = v[2] * rsqrt; #elif POSIX __asm__ __volatile__( #ifdef ALIGNED_VECTOR @@ -451,7 +502,7 @@ float FastCos( float x ) movss x, xmm0 } -#elif defined( _WIN64 ) +#elif defined( _WIN64 ) || defined( __e2k__ ) return cosf( x ); #elif POSIX diff --git a/public/glmgr/glmgrbasics.h b/public/glmgr/glmgrbasics.h index 3d1de7989..2e0e15c8d 100644 --- a/public/glmgr/glmgrbasics.h +++ b/public/glmgr/glmgrbasics.h @@ -178,11 +178,13 @@ float GLMKnobToggle( char *knobname ); #if GLMDEBUG inline void GLMDebugger( void ) { +#ifndef __e2k__ if (GLMDebugChannelMask() & (1<<eDebugger)) { asm ( "int $3" ); } - +#endif // ifndef __e2k__ + if (GLMDebugChannelMask() & (1<<eGLProfiler)) { // we call an obscure GL function which we know has been breakpointed in the OGLP function list diff --git a/public/localize/ilocalize.h b/public/localize/ilocalize.h index a1ca1a93a..0f3d20137 100644 --- a/public/localize/ilocalize.h +++ b/public/localize/ilocalize.h @@ -17,7 +17,7 @@ // unicode character type // for more unicode manipulation functions #include <wchar.h> -#if !defined( _WCHAR_T_DEFINED ) && !defined( _PS3 ) && !defined(__clang__) +#if !defined( _WCHAR_T_DEFINED ) && !defined( _PS3 ) && !defined(__clang__) && !defined(__e2k__) typedef unsigned short wchar_t; #define _WCHAR_T_DEFINED #endif diff --git a/public/materialsystem/imesh.h b/public/materialsystem/imesh.h index 3168572e5..291355b69 100644 --- a/public/materialsystem/imesh.h +++ b/public/materialsystem/imesh.h @@ -1324,7 +1324,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex ) movntps [edi + 16], xmm1 movntps [edi + 32], xmm2 } -#elif defined(GNUC) +#elif defined(GNUC) && !defined(__e2k__) const void *pRead = &vertex; void *pCurrPos = m_pCurrPosition; __asm__ __volatile__ ( @@ -1335,7 +1335,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex ) "movntps %%xmm0, (%1)\n" "movntps %%xmm1, 16(%1)\n" "movntps %%xmm2, 32(%1)\n" - "movntps %%xmm3, 48(%1)\n" + "movntps %%xmm3, 48(%1)\n" :: "r" (pRead), "r" (pCurrPos) : "memory"); #else Error( "Implement CMeshBuilder::FastVertexSSE((dx8)" ); diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h index 48cb49988..95378ea46 100644 --- a/public/mathlib/mathlib.h +++ b/public/mathlib/mathlib.h @@ -532,7 +532,7 @@ void inline SinCos( float radians, float * RESTRICT sine, float * RESTRICT cosin fstp DWORD PTR [edx] fstp DWORD PTR [eax] } -#elif defined( GNUC ) +#elif defined( GNUC ) && !defined( __e2k__ ) //lwss - remove 'register' keyword //register double __cosr, __sinr; double __cosr, __sinr; @@ -1682,7 +1682,7 @@ FORCEINLINE int RoundFloatToInt(float f) fld f fistp nResult } -#elif GNUC +#elif defined( GNUC ) && !defined( __e2k__ ) __asm __volatile__ ( "fistpl %0;": "=m" (nResult): "t" (f) : "st" ); @@ -1729,7 +1729,7 @@ FORCEINLINE unsigned char RoundFloatToByte(float f) fld f fistp nResult } -#elif GNUC +#elif defined( GNUC ) && !defined( __e2k__ ) __asm __volatile__ ( "fistpl %0;": "=m" (nResult): "t" (f) : "st" ); @@ -1767,7 +1767,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) return __fctiw( f ); #endif #else // !X360 - + #if defined( COMPILER_MSVC32 ) unsigned char nResult[8]; __asm @@ -1776,7 +1776,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) fistp qword ptr nResult } return *((unsigned long*)nResult); -#elif defined( COMPILER_GCC ) +#elif defined( COMPILER_GCC ) && !defined( __e2k__ ) unsigned char nResult[8]; __asm __volatile__ ( "fistpl %0;": "=m" (nResult): "t" (f) : "st" diff --git a/public/saverestoretypes.h b/public/saverestoretypes.h index 7eff091a0..699606f52 100644 --- a/public/saverestoretypes.h +++ b/public/saverestoretypes.h @@ -64,7 +64,7 @@ class CSaveRestoreSegment const char *StringFromSymbol( int token ); private: -#ifndef _WIN32 +#if !defined _WIN32 && !defined __e2k__ unsigned _rotr ( unsigned val, int shift); #endif unsigned int HashString( const char *pszToken ); @@ -521,7 +521,7 @@ inline const char *CSaveRestoreSegment::StringFromSymbol( int token ) return "<<illegal>>"; } -#ifndef _WIN32 +#if !defined _WIN32 && !defined __e2k__ inline unsigned CSaveRestoreSegment::_rotr ( unsigned val, int shift) { register unsigned lobit; /* non-zero means lo bit set */ diff --git a/public/steam/steamtypes.h b/public/steam/steamtypes.h index f74df1a5b..aa55fa069 100644 --- a/public/steam/steamtypes.h +++ b/public/steam/steamtypes.h @@ -24,7 +24,7 @@ typedef unsigned char uint8; #define POSIX 1 #endif -#if defined(__x86_64__) || defined(_WIN64) +#if defined(__x86_64__) || defined(_WIN64) || defined(__e2k__) #define X64BITS #endif diff --git a/public/tier0/hardware_clock_fast.h b/public/tier0/hardware_clock_fast.h index 6706877e1..1a92405dd 100644 --- a/public/tier0/hardware_clock_fast.h +++ b/public/tier0/hardware_clock_fast.h @@ -4,7 +4,7 @@ #include "tier0/platform.h" -#ifdef GNUC +#if defined GNUC && !defined __e2k__ inline int GetHardwareClockFast( void ) { unsigned long long int nRet; @@ -35,15 +35,18 @@ inline int GetHardwareClockFast() } #else +#ifdef __e2k__ +#include <x86intrin.h> +#else #include <intrin.h> - +#endif // ifdef __e2k__ inline int GetHardwareClockFast() { return __rdtsc(); } -#endif +#endif // ifdef _X360 -#endif +#endif // defined GNUC && !defined __e2k__ -#endif \ No newline at end of file +#endif // ifndef TIER0_HARDWARE_TIMER \ No newline at end of file diff --git a/public/tier0/microprofiler.h b/public/tier0/microprofiler.h index 3f75c89a7..e7d80ed11 100644 --- a/public/tier0/microprofiler.h +++ b/public/tier0/microprofiler.h @@ -26,8 +26,11 @@ PLATFORM_INTERFACE int64 GetHardwareClockReliably(); #include <intrin.h> // get __rdtsc #endif +#ifdef __e2k__ +#include <x86intrin.h> +#endif -#if defined(_LINUX) || defined( OSX ) +#if (defined(_LINUX) || defined( OSX )) && !defined(__e2k__) inline unsigned long long GetTimebaseRegister( void ) { #ifdef PLATFORM_64BITS diff --git a/public/tier0/platform.h b/public/tier0/platform.h index d54c70991..7ac05b0df 100644 --- a/public/tier0/platform.h +++ b/public/tier0/platform.h @@ -9,10 +9,14 @@ #ifndef PLATFORM_H #define PLATFORM_H -#if defined(__x86_64__) || defined(_WIN64) +#if defined(__x86_64__) || defined(_WIN64) || defined(__e2k__) #define PLATFORM_64BITS 1 #endif +#if defined(__e2k__) +#define PLATFORM_E2K 1 +#endif + #if defined( LINUX ) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 // based on some Jonathan Wakely macros on the net... #define GCC_DIAG_STR(s) #s @@ -1154,7 +1158,7 @@ typedef void * HINSTANCE; #endif #elif defined( OSX ) #define DebuggerBreak() if ( Plat_IsInDebugSession() ) asm( "int3" ); else { raise(SIGTRAP); } - #elif defined( PLATFORM_CYGWIN ) || defined( PLATFORM_POSIX ) + #elif ( defined( PLATFORM_CYGWIN ) || defined( PLATFORM_POSIX ) ) && !defined( __e2k__ ) #define DebuggerBreak() __asm__( "int $0x3;") #else #define DebuggerBreak() raise(SIGTRAP) @@ -1386,7 +1390,7 @@ typedef int socklen_t; // Works for PS3 inline void SetupFPUControlWord() { -#ifdef _PS3 +#if defined ( _PS3 ) || defined ( __e2k__ ) // TODO: PS3 compiler spits out the following errors: // C:/tmp/ccIN0aaa.s: Assembler messages: // C:/tmp/ccIN0aaa.s(80): Error: Unrecognized opcode: `fnstcw' @@ -1829,6 +1833,10 @@ extern "C" unsigned __int64 __rdtsc(); #pragma intrinsic(__rdtsc) #endif +#if defined( __e2k__ ) +#include <x86intrin.h> // get __rdtsc +#endif + inline uint64 Plat_Rdtsc() { #if defined( _X360 ) @@ -1850,6 +1858,8 @@ inline uint64 Plat_Rdtsc() uint32 lo, hi; __asm__ __volatile__ ( "rdtsc" : "=a" (lo), "=d" (hi)); return ( ( ( uint64 )hi ) << 32 ) | lo; +#elif defined( __e2k__ ) + return ( uint64 )__rdtsc(); #else #error #endif diff --git a/public/tier0/threadtools.h b/public/tier0/threadtools.h index b543b5687..69c2f07df 100644 --- a/public/tier0/threadtools.h +++ b/public/tier0/threadtools.h @@ -235,7 +235,11 @@ inline void ThreadPause() #if defined( COMPILER_PS3 ) __db16cyc(); #elif defined( COMPILER_GCC ) - __asm __volatile( "pause" ); + #ifdef __e2k__ + __asm__ __volatile__ ("nop" : : ); + #else + __asm __volatile( "pause" ); + #endif #elif defined ( COMPILER_MSVC64 ) _mm_pause(); #elif defined( COMPILER_MSVC32 ) @@ -306,6 +310,9 @@ inline int32 ThreadInterlockedDecrement( int32 volatile *p ) inline int32 ThreadInterlockedExchange( int32 volatile *p, int32 value ) { Assert( (size_t)p % 4 == 0 ); +#ifdef __e2k__ + return __sync_lock_test_and_set( p, value ); +#else int32 nRet; // Note: The LOCK instruction prefix is assumed on the XCHG instruction and GCC gets very confused on the Mac when we use it. @@ -315,6 +322,7 @@ inline int32 ThreadInterlockedExchange( int32 volatile *p, int32 value ) : "r" (p), "0" (value) : "memory"); return nRet; +#endif // ifdef __e2k__ } inline int32 ThreadInterlockedExchangeAdd( int32 volatile *p, int32 value ) diff --git a/public/tier0/tslist.h b/public/tier0/tslist.h index 09c031772..ff4b0ecef 100644 --- a/public/tier0/tslist.h +++ b/public/tier0/tslist.h @@ -34,9 +34,9 @@ //----------------------------------------------------------------------------- -#if defined( PLATFORM_64BITS ) +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) -#if defined (PLATFORM_WINDOWS) +#if defined (PLATFORM_WINDOWS) //typedef __m128i int128; //inline int128 int128_zero() { return _mm_setzero_si128(); } #else // PLATFORM_WINDOWS @@ -129,7 +129,7 @@ union TSLIST_HEAD_ALIGN TSLHead_t int16 Depth; int16 Sequence; #endif -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) int32 Padding; #endif } value; @@ -140,7 +140,7 @@ union TSLIST_HEAD_ALIGN TSLHead_t int32 DepthAndSequence; } value32; -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) int128 value64x128; #else int64 value64x128; @@ -194,7 +194,7 @@ class CTSListBase #ifdef USE_NATIVE_SLIST InitializeSListHead( &m_Head ); -#elif defined(PLATFORM_64BITS) +#elif defined(PLATFORM_64BITS) && !defined(PLATFORM_E2K) m_Head.value64x128 = int128_zero(); #else m_Head.value64x128 = (int64)0; @@ -231,7 +231,7 @@ class CTSListBase __lwsync(); // write-release barrier #endif -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) newHead.value.Padding = 0; #endif for ( ;; ) @@ -268,7 +268,7 @@ class CTSListBase TSLHead_t oldHead; TSLHead_t newHead; -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) newHead.value.Padding = 0; #endif for ( ;; ) @@ -307,7 +307,7 @@ class CTSListBase TSLHead_t oldHead; TSLHead_t newHead; -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) newHead.value.Padding = 0; #endif do @@ -719,7 +719,7 @@ class TSLIST_HEAD_ALIGN CTSQueue intp sequence; } value; -#ifdef PLATFORM_64BITS +#if defined( PLATFORM_64BITS ) && !defined( PLATFORM_E2K ) int128 value64x128; #else int64 value64x128; diff --git a/studiorender/studiorender.h b/studiorender/studiorender.h index a85dba1d8..2af99450f 100644 --- a/studiorender/studiorender.h +++ b/studiorender/studiorender.h @@ -929,7 +929,7 @@ class CWorldLightAngleWrapper NO_DEFAULT; } -#ifdef _PS3 +#if defined _PS3 || defined __e2k__ Assert( false ); // PS3 doesn't have true __assume (used in NO_DEFAULT), so a return value is expected return 0.0f; #endif @@ -979,7 +979,7 @@ class CWorldLightAngleWrapperConstDirectional NO_DEFAULT; } -#ifdef _PS3 +#if defined _PS3 || defined __e2k__ Assert( false ); // PS3 doesn't have true __assume (used in NO_DEFAULT), so a return value is expected return 0.0f; #endif @@ -996,7 +996,7 @@ inline float CStudioRender::R_WorldLightAngle( const LightDesc_t *wl, const Vect case MATERIAL_LIGHT_SPOT: return CWorldLightAngleWrapper<MATERIAL_LIGHT_SPOT>::WorldLightAngle( wl, lnormal, snormal, delta ); NO_DEFAULT; } -#ifdef _PS3 +#if defined _PS3 || defined __e2k__ Assert( false ); // PS3 doesn't have true __assume (used in NO_DEFAULT), so a return value is expected return 0.0f; #endif diff --git a/thirdparty/RmlUi/CMakeLists.txt b/thirdparty/RmlUi/CMakeLists.txt index 6d9a466e1..f03456e41 100644 --- a/thirdparty/RmlUi/CMakeLists.txt +++ b/thirdparty/RmlUi/CMakeLists.txt @@ -316,13 +316,16 @@ foreach(library ${LIBRARIES}) #Kisak-Strike memoverride.cpp target_compile_definitions(${NAME} PRIVATE -DLINUX -D_LINUX -DPOSIX -DCOMPILER_GCC -DBASE -DNDEBUG -DGNUC) target_include_directories(${NAME} PRIVATE ../../public) - target_sources(${NAME} PRIVATE ../../public/tier0/memoverride.cpp) + # e2k workaround + if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k") + target_sources(${NAME} PRIVATE ../../public/tier0/memoverride.cpp) + endif() #end set_target_properties(${NAME} PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR} ) - + if (MSVC) target_compile_options(${NAME} PUBLIC "/MP") endif(MSVC) diff --git a/tier0/cpu.cpp b/tier0/cpu.cpp index 67d71c039..47ee5c994 100644 --- a/tier0/cpu.cpp +++ b/tier0/cpu.cpp @@ -45,7 +45,7 @@ struct CpuIdResult_t static bool cpuid( unsigned long function, CpuIdResult_t &out ) { -#if defined( _X360 ) || defined( _PS3 ) +#if defined( _X360 ) || defined( _PS3 ) || defined( __e2k__ ) return false; #elif defined(GNUC) unsigned long out_eax,out_ebx,out_ecx,out_edx; @@ -124,7 +124,7 @@ static bool cpuid( unsigned long function, CpuIdResult_t &out ) static bool cpuidex( unsigned long function, unsigned long subfunction, CpuIdResult_t &out ) { -#if defined( _X360 ) || defined( _PS3 ) +#if defined( _X360 ) || defined( _PS3 ) || defined( __e2k__ ) return false; #elif defined(GNUC) unsigned long out_eax, out_ebx, out_ecx, out_edx; @@ -218,7 +218,7 @@ static CpuIdResult_t cpuidex( unsigned long function, unsigned long subfunction //----------------------------------------------------------------------------- static bool IsWin98OrOlder() { -#if defined( _X360 ) || defined( _PS3 ) || defined( POSIX ) +#if defined( _X360 ) || defined( _PS3 ) || defined( __e2k__ ) || defined( POSIX ) return false; #else bool retval = false; @@ -260,18 +260,38 @@ static bool IsWin98OrOlder() #endif } +static bool CheckMMXTechnology(void) +{ +#if defined( _X360 ) || defined( _PS3 ) + return false; +#elif defined( __e2k__ ) + #if defined( __MMX__ ) + return true; + #else + return false; + #endif +#else + return ( cpuid( 1 ).edx & 0x800000 ) != 0; // bit 23 of EDX +#endif +} static bool CheckSSETechnology(void) { #if defined( _X360 ) || defined( _PS3 ) return true; +#elif defined( __e2k__ ) + #if defined( __SSE__ ) + return true; + #else + return false; + #endif #else if ( IsWin98OrOlder() ) { return false; } - return ( cpuid( 1 ).edx & 0x2000000L ) != 0; + return ( cpuid( 1 ).edx & 0x2000000L ) != 0; // bit 25 of EDX #endif } @@ -279,8 +299,14 @@ static bool CheckSSE2Technology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __SSE2__ ) + return true; + #else + return false; + #endif #else - return ( cpuid( 1 ).edx & 0x04000000 ) != 0; + return ( cpuid( 1 ).edx & 0x04000000 ) != 0; // bit 26 of EDX #endif } @@ -288,6 +314,12 @@ bool CheckSSE3Technology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined(__e2k__ ) + #if defined( __SSE3__ ) + return true; + #else + return false; + #endif #else return ( cpuid( 1 ).ecx & 0x00000001 ) != 0; // bit 1 of ECX #endif @@ -297,6 +329,12 @@ bool CheckSSSE3Technology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __SSSE3__ ) + return true; + #else + return false; + #endif #else // SSSE 3 is implemented by both Intel and AMD // detection is done the same way for both vendors @@ -308,6 +346,12 @@ bool CheckSSE41Technology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __SSE4_1__ ) + return true; + #else + return false; + #endif #else // SSE 4.1 is implemented by both Intel and AMD // detection is done the same way for both vendors @@ -320,6 +364,12 @@ bool CheckSSE42Technology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __SSE4_2__ ) + return true; + #else + return false; + #endif #else // SSE4.2 is an Intel-only feature @@ -331,11 +381,31 @@ bool CheckSSE42Technology(void) #endif } +bool CheckAVXTechnology(void) +{ +#if defined( _X360 ) || defined( _PS3 ) + return false; +#elif defined( __e2k__ ) + #if defined( __AVX__ ) + return true; + #else + return false; + #endif +#else + return ( cpuid( 1 ).ecx & ( 1 << 28 ) ) != 0; // bit 28 of ECX +#endif +} -bool CheckSSE4aTechnology( void ) +bool CheckSSE4aTechnology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __SSE4A__ ) + return true; + #else + return false; + #endif #else // SSE 4a is an AMD-only feature @@ -347,11 +417,16 @@ bool CheckSSE4aTechnology( void ) #endif } - static bool Check3DNowTechnology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + #if defined( __3dNOW__ ) + return true; + #else + return false; + #endif #else if ( cpuid( 0x80000000 ).eax > 0x80000000L ) { @@ -361,9 +436,9 @@ static bool Check3DNowTechnology(void) #endif } -static bool CheckCMOVTechnology() +static bool CheckCMOVTechnology(void) { -#if defined( _X360 ) || defined( _PS3 ) +#if defined( _X360 ) || defined( _PS3 ) || defined( __e2k__ ) return false; #else return ( cpuid( 1 ).edx & ( 1 << 15 ) ) != 0; @@ -372,7 +447,7 @@ static bool CheckCMOVTechnology() static bool CheckFCMOVTechnology(void) { -#if defined( _X360 ) || defined( _PS3 ) +#if defined( _X360 ) || defined( _PS3 ) || defined( __e2k__ ) return false; #else return ( cpuid( 1 ).edx & ( 1 << 16 ) ) != 0; @@ -383,6 +458,8 @@ static bool CheckRDTSCTechnology(void) { #if defined( _X360 ) || defined( _PS3 ) return false; +#elif defined( __e2k__ ) + return true; #else return ( cpuid( 1 ).edx & 0x10 ) != 0; #endif @@ -407,6 +484,8 @@ const tchar* GetProcessorVendorId() { #if defined( _X360 ) || defined( _PS3 ) return "PPC"; +#elif defined( __e2k__ ) + return "MCST"; #else if ( s_bCpuVendorIdInitialized ) { @@ -448,6 +527,9 @@ const tchar* GetProcessorBrand() return "Xenon"; #elif defined( _PS3 ) return "Cell Broadband Engine"; +#elif defined( __e2k__ ) + return __builtin_cpu_name(); + // e.g. "elbrus-8c" #else if ( s_bCpuBrandInitialized ) { @@ -483,6 +565,8 @@ static bool HTSupported(void) // not entirtely sure about the semantic of HT support, it being an intel name // are we asking about HW threads or HT? return true; +#elif defined( __e2k__ ) + return false; #else enum { HT_BIT = 0x10000000, // EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware. @@ -515,6 +599,27 @@ static uint8 LogicalProcessorsPerPackage(void) { #if defined( _X360 ) return 2; +#elif defined( __e2k__ ) + if( __builtin_cpu_is("elbrus-16c") ) + { + return 16; + } + else if( __builtin_cpu_is("elbrus-12c") ) + { + return 12; + } + else if( __builtin_cpu_is("elbrus-8c") || __builtin_cpu_is("elbrus-8c2") ) + { + return 8; + } + else if( __builtin_cpu_is("elbrus-2c3") ) + { + return 2; + } + else + { + return 1; + } #else // EBX[23:16] indicate number of logical processors per package const unsigned NUM_LOGICAL_BITS = 0x00FF0000; @@ -540,7 +645,7 @@ static int64 CalculateClockSpeed() #if defined( _X360 ) || defined(_PS3) // Xbox360 and PS3 have the same clock speed and share a lot of characteristics on PPU return 3200000000LL; -#else +#else #if defined( _WIN32 ) LARGE_INTEGER waitTime, startCount, curCount; CCycleCount start, end; @@ -745,6 +850,21 @@ const CPUInformation& GetCPUInformation() pi.m_nLogicalProcessors = 1; } #elif defined(LINUX) +#if defined(__e2k__) // MCST Elbrus 2000 + pi.m_nLogicalProcessors = 1; + pi.m_nPhysicalProcessors = 1; + // e2k CPU don't have "core id" and "physical id" in "/proc/cpuinfo" (but have "processor") + // and don't have Hyper-Threading (HT) technology + // used sysconf() to count CPU cores + //pi.m_nLogicalProcessors = sysconf( _SC_NPROCESSORS_CONF ); // _SC_NPROCESSORS_ONLN may not be reliable on ARM/Android + //pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors; // hack for CPU without Hyper-Threading (HT) technology + + // FIXME + // have to use m_nLogicalProcessors = 1 and m_nPhysicalProcessors = 1 (no matter how many core e2k CPU has) + // because otherwise there will be problems with creating threads (CThreadSafeMultiMemoryPool::Alloc(unsigned int)) + // and with render (CMeshBuilder::Begin(IMesh*, MaterialPrimitiveType_t, int, int, MeshBuffersAllocationSettings_t*)) + +#else // x86/x86-64 pi.m_nLogicalProcessors = 0; pi.m_nPhysicalProcessors = 0; const int k_cMaxProcessors = 256; @@ -798,7 +918,7 @@ const CPUInformation& GetCPUInformation() pi.m_nPhysicalProcessors = 1; Assert( !"couldn't read cpu information from /proc/cpuinfo" ); } - +#endif // ifdef __e2k__ #elif defined(OSX) int num_phys_cpu = 1, num_log_cpu = 1; @@ -810,6 +930,27 @@ const CPUInformation& GetCPUInformation() #endif +#if defined(__e2k__) + // e2k CPU don't have CPUID + + // Determine Processor Features: + pi.m_bRDTSC = CheckRDTSCTechnology(); + pi.m_bCMOV = CheckCMOVTechnology(); + pi.m_bFCMOV = CheckFCMOVTechnology(); + pi.m_bMMX = CheckMMXTechnology(); + pi.m_bSSE = CheckSSETechnology(); + pi.m_bSSE2 = CheckSSE2Technology(); + pi.m_bSSE3 = CheckSSE3Technology(); + pi.m_bSSSE3 = CheckSSSE3Technology(); + pi.m_bSSE4a = CheckSSE4aTechnology(); + pi.m_bSSE41 = CheckSSE41Technology(); + pi.m_bSSE42 = CheckSSE42Technology(); + pi.m_b3DNow = Check3DNowTechnology(); + pi.m_bAVX = CheckAVXTechnology(); + pi.m_szProcessorID = ( tchar* )GetProcessorVendorId(); // MCST + pi.m_szProcessorBrand = ( tchar* )GetProcessorBrand(); // e.g. "elbrus-8c" + pi.m_bHT = HTSupported(); +#else // x86/x86-64 CpuIdResult_t cpuid0 = cpuid( 0 ); if ( cpuid0.eax >= 1 ) { @@ -920,6 +1061,7 @@ const CPUInformation& GetCPUInformation() pi.m_nL2CacheSizeKb = ( cpuid( 0x80000006 ).ecx >> 16 ); } } +#endif return pi; } diff --git a/tier1/pathmatch.cpp b/tier1/pathmatch.cpp index 9746841f0..9fcbeca5e 100644 --- a/tier1/pathmatch.cpp +++ b/tier1/pathmatch.cpp @@ -66,7 +66,11 @@ static bool s_bShowDiag; #define DEBUG_MSG( ... ) if ( s_bShowDiag ) fprintf( stderr, ##__VA_ARGS__ ) -#define DEBUG_BREAK() __asm__ __volatile__ ( "int $3" ) +#ifdef __e2k__ + #define DEBUG_BREAK() raise(SIGTRAP) +#else + #define DEBUG_BREAK() __asm__ __volatile__ ( "int $3" ) +#endif #define _COMPILE_TIME_ASSERT(pred) switch(0){case 0:case pred:;} #define WRAP( fn, ret, ... ) \ diff --git a/tier1/processor_detect_linux.cpp b/tier1/processor_detect_linux.cpp index 0421b7eb3..c9709cae0 100644 --- a/tier1/processor_detect_linux.cpp +++ b/tier1/processor_detect_linux.cpp @@ -16,6 +16,7 @@ // Turn off memdbg macros (turned on up top) since this is included like a header #include "tier0/memdbgoff.h" +#ifndef __e2k__ // e2k CPU don't have CPUID static void cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx) { #if defined(PLATFORM_64BITS) @@ -40,33 +41,65 @@ static void cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out ); #endif } +#endif // ifndef __e2k__ bool CheckMMXTechnology(void) { +#if defined(__e2k__) + #if defined(__MMX__) + return true; + #else + return false; + #endif +#else uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x800000; +#endif } bool CheckSSETechnology(void) { +#if defined(__e2k__) + #if defined(__SSE__) + return true; + #else + return false; + #endif +#else uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x2000000L; +#endif } bool CheckSSE2Technology(void) { +#if defined(__e2k__) + #if defined(__SSE2__) + return true; + #else + return false; + #endif +#else uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x04000000; +#endif } bool Check3DNowTechnology(void) { +#if defined(__e2k__) + #if defined(__3dNOW__) + return true; + #else + return false; + #endif +#else uint32 eax, unused; cpuid(0x80000000,eax,unused,unused,unused); @@ -76,5 +109,5 @@ bool Check3DNowTechnology(void) return ( eax & 1<<31 ); } return false; +#endif } -