tl updates

gliptic · Aug 4, 2016 · f7b8c3a · f7b8c3a
1 parent 7954259
commit f7b8c3a
Show file tree

Hide file tree

Showing 47 changed files with 3,636 additions and 1,077 deletions.
diff --git a/_build/Minimal size.props b/_build/Minimal size.props
@@ -38,13 +38,19 @@
       <SmallerTypeCheck>false</SmallerTypeCheck>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <BufferSecurityCheck>false</BufferSecurityCheck>
+      <ControlFlowGuard>false</ControlFlowGuard>
+      <ExceptionHandling>false</ExceptionHandling>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DisableSpecificWarnings>4577</DisableSpecificWarnings>
     </ClCompile>
     <Link>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <AdditionalOptions>/MERGE:.rdata=.text %(AdditionalOptions)</AdditionalOptions>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup />

diff --git a/_build/Tl Standard Config.props b/_build/Tl Standard Config.props
@@ -12,6 +12,10 @@
       <PreprocessorDefinitions>_CRT_NONSTDC_NO_WARNINGS;_SECURE_SCL=0;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ObjectFileName>$(IntDir)\%(Directory)</ObjectFileName>
       <FloatingPointModel>Precise</FloatingPointModel>
+      <WarningLevel>Level4</WarningLevel>
+      <ExceptionHandling>false</ExceptionHandling>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
   </ItemDefinitionGroup>
   <ItemGroup />

diff --git a/_build/msvcrt6.lib b/_build/msvcrt6.lib
diff --git a/_build/msvcrt64.lib b/_build/msvcrt64.lib
diff --git a/_build/tl.vcxproj b/_build/tl.vcxproj
diff --git a/_build/tl.vcxproj.filters b/_build/tl.vcxproj.filters
@@ -224,6 +224,18 @@
     <ClCompile Include="..\windows\runtime_vcpp.cpp">
       <Filter>windows</Filter>
     </ClCompile>
+    <ClCompile Include="..\image.cpp" />
+    <ClCompile Include="..\windows\stream.cpp">
+      <Filter>windows</Filter>
+    </ClCompile>
+    <ClCompile Include="..\stream.cpp" />
+    <ClCompile Include="..\approxmath\sincos.cpp">
+      <Filter>approxmath</Filter>
+    </ClCompile>
+    <ClCompile Include="..\string_set.cpp" />
+    <ClCompile Include="..\vec.cpp" />
+    <ClCompile Include="..\string.cpp" />
+    <ClCompile Include="..\bits.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\inflate.h" />
@@ -288,11 +300,25 @@
     <ClInclude Include="..\utf8.h" />
     <ClInclude Include="..\strscan.h" />
     <ClInclude Include="..\char.h" />
-    <ClInclude Include="..\vec.hpp" />
-    <ClInclude Include="..\vector.hpp" />
     <ClInclude Include="..\stream.h" />
     <ClInclude Include="..\stream.hpp" />
     <ClInclude Include="..\vector.h" />
+    <ClInclude Include="..\image.hpp" />
+    <ClInclude Include="..\windows\win.hpp">
+      <Filter>windows</Filter>
+    </ClInclude>
+    <ClInclude Include="..\rect.hpp" />
+    <ClInclude Include="..\string.hpp" />
+    <ClInclude Include="..\approxmath\am.hpp">
+      <Filter>approxmath</Filter>
+    </ClInclude>
+    <ClInclude Include="..\rand.hpp" />
+    <ClInclude Include="..\string_set.hpp" />
+    <ClInclude Include="..\shared_ptr.hpp" />
+    <ClInclude Include="..\vector_old.hpp" />
+    <ClInclude Include="..\vec.hpp" />
+    <ClInclude Include="..\vector.hpp" />
+    <ClInclude Include="..\filesystem.hpp" />
   </ItemGroup>
   <ItemGroup>
     <Filter Include="fdlibm">
@@ -313,4 +339,9 @@
       <Filter>codec</Filter>
     </None>
   </ItemGroup>
+  <ItemGroup>
+    <MASM Include="..\windows\allmul.asm">
+      <Filter>windows</Filter>
+    </MASM>
+  </ItemGroup>
 </Project>
diff --git a/approxmath/am.hpp b/approxmath/am.hpp
@@ -0,0 +1,16 @@
+#ifndef TL_APPROXMATH_HPP
+#define TL_APPROXMATH_HPP 1
+
+#include "../vector.hpp"
+
+namespace tl {
+
+static f64 const pi = 3.1415926535897932384626433832795;
+static f64 const pi2 = 6.283185307179586476925286766559;
+
+VectorD2 sincos(double x);
+
+}
+
+#endif // TL_APPROXMATH_HPP
+
diff --git a/approxmath/sincos.c b/approxmath/sincos.c
@@ -3,6 +3,187 @@
 #include "../bits.h"
 #include <stdint.h>
 
+
+float am_sinf(float x) {
+	uint32_t a, c, d;
+	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
+	xmm0 = _mm_load_ss(&x);
+	xmm1 = _mm_load_ss((float const*)_ps_am_inv_sign_mask);
+	a = tl_ftourep(x);
+	xmm0 = _mm_mul_ss(xmm0, _mm_load_ss(_ps_am_2_o_pi));
+	xmm0 = _mm_and_ps(xmm0, xmm1);
+	a &= 0x80000000;
+
+	c = _mm_cvttss_si32(xmm0);
+	xmm1 = _mm_load_ss(_ps_am_1);
+	d = c;
+	d <<= (31 - 1);
+	xmm2 = _mm_cvtsi32_ss(xmm2, c);
+	c &= 1;
+	d &= 0x80000000;
+
+	xmm0 = _mm_sub_ss(xmm0, xmm2);
+	xmm6 = _mm_load_ss((float*)&_sincos_masks[c]);
+	xmm0 = _mm_min_ss(xmm0, xmm1);
+
+	xmm5 = _mm_load_ss(_ps_sincos_p3);
+	xmm1 = _mm_sub_ss(xmm1, xmm0);
+
+	xmm1 = _mm_and_ps(xmm1, xmm6);
+	xmm6 = _mm_andnot_ps(xmm6, xmm0);
+	xmm1 = _mm_or_ps(xmm1, xmm6);
+	xmm4 = _mm_load_ss(_ps_sincos_p2);
+	xmm0 = xmm1;
+
+	xmm1 = _mm_mul_ss(xmm1, xmm1);
+	xmm7 = _mm_load_ss(_ps_sincos_p1);
+	a ^= d;
+	xmm2 = xmm1;
+	xmm1 = _mm_mul_ss(xmm1, xmm5);
+	xmm5 = _mm_load_ss(_ps_sincos_p0);
+	xmm1 = _mm_add_ss(xmm1, xmm4);
+	xmm1 = _mm_mul_ss(xmm1, xmm2);
+	xmm3 = _mm_load_ss((float const*)&a);
+	xmm1 = _mm_add_ss(xmm1, xmm7);
+	xmm1 = _mm_mul_ss(xmm1, xmm2);
+	xmm0 = _mm_or_ps(xmm0, xmm3);
+	xmm1 = _mm_add_ss(xmm1, xmm5);
+	xmm0 = _mm_mul_ss(xmm0, xmm1);
+
+	_mm_store_ss(&x, xmm0);
+
+	return x;
+}
+
+#if 0 // NOT Finished
+am_pair am_sincosf(float x)
+{
+	am_pair r;
+	float temp1, temp2;
+
+	uint32_t a, c, d, s;
+	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
+
+	xmm0 = _mm_load_ss(&x);
+	xmm1 = _mm_load_ss((float const*)_ps_am_inv_sign_mask);
+	a = tl_ftourep(x);
+	xmm0 = _mm_mul_ss(xmm0, _mm_load_ss(_ps_am_2_o_pi));
+	xmm0 = _mm_and_ps(xmm0, xmm1);
+	a &= 0x80000000;
+
+	d = _mm_cvttss_si32(xmm0);
+	c = d;
+	s = d;
+	++d;
+	c <<= (31 - 1);
+	d <<= (31 - 1);
+
+	__asm
+	{
+		movss	xmm1, _ps_am_inv_sign_mask // ok
+		mov		eax, x // ok
+		mulss	xmm0, _ps_am_2_o_pi // ok
+		andps	xmm0, xmm1 // ok
+		and		eax, 0x80000000 // ok
+
+		cvttss2si	edx, xmm0 //
+		mov		ecx, edx //
+		mov		esi, edx //
+		add		edx, 0x1 //
+		shl		ecx, (31 - 1) //
+		shl		edx, (31 - 1) //
+
+		movss	xmm4, _ps_am_1
+		cvtsi2ss	xmm3, esi
+		mov		temp1, eax
+		and		esi, 0x1
+
+		subss	xmm0, xmm3
+		movss	xmm3, _sincos_inv_masks[esi * 4]
+		minss	xmm0, xmm4
+
+		subss	xmm4, xmm0
+
+		movss	xmm6, xmm4
+		andps	xmm4, xmm3
+		and		ecx, 0x80000000
+		movss	xmm2, xmm3
+		andnps	xmm3, xmm0
+		and		edx, 0x80000000
+		movss	xmm7, temp1
+		andps	xmm0, xmm2
+		mov		temp1, ecx
+		mov		temp2, edx
+		orps	xmm4, xmm3
+
+		andnps	xmm2, xmm6
+		orps	xmm0, xmm2
+
+		movss	xmm2, temp1
+		movss	xmm1, xmm0
+		movss	xmm5, xmm4
+		xorps	xmm7, xmm2
+		movss	xmm3, _ps_sincos_p3
+		mulss	xmm0, xmm0
+		mulss	xmm4, xmm4
+		movss	xmm2, xmm0
+		movss	xmm6, xmm4
+		orps	xmm1, xmm7
+		movss	xmm7, _ps_sincos_p2
+		mulss	xmm0, xmm3
+		mulss	xmm4, xmm3
+		movss	xmm3, _ps_sincos_p1
+		addss	xmm0, xmm7
+		addss	xmm4, xmm7
+		movss	xmm7, _ps_sincos_p0
+		mulss	xmm0, xmm2
+		mulss	xmm4, xmm6
+		addss	xmm0, xmm3
+		addss	xmm4, xmm3
+		movss	xmm3, temp2
+		mulss	xmm0, xmm2
+		mulss	xmm4, xmm6
+		orps	xmm5, xmm3
+		addss	xmm0, xmm7
+		addss	xmm4, xmm7
+		mulss	xmm0, xmm1
+		mulss	xmm4, xmm5
+
+		movss r.first, xmm0
+		movss r.second, xmm4
+
+		//ret		16 + 4 + 4 + 8
+		//ret
+	}
+	return r;
+}
+
+#endif
+
+/*
+float am_sinf(float x)
+{
+	float r;
+	_mm_store_ss(&r, am_sin_ss(_mm_load_ss(&x)));
+	return r;
+}*/
+
+float am_sinf_2(float x)
+{
+	float r;
+	_mm_store_ss(&r, am_sin_ess(_mm_load_ss(&x)));
+	return r;
+}
+
+float am_cosf(float x)
+{
+	float r;
+	_mm_store_ss(&r, am_cos_ss(_mm_load_ss(&x)));
+	return r;
+}
+
+#if TL_X86
+
 am_pair am_sincosf(float x)
 {
 	am_pair r;
@@ -141,80 +322,6 @@ float am_sinf_inline(float x)
 	return x;
 }
 
-float am_sinf(float x)
-{
-	uint32_t a, c, d;
-	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-	xmm0 = _mm_load_ss(&x);
-	xmm1 = _mm_load_ss((float*)_ps_am_inv_sign_mask);
-	a = tl_ftourep(x);
-	xmm0 = _mm_mul_ss(xmm0, _mm_load_ss(_ps_am_2_o_pi));
-	xmm0 = _mm_and_ps(xmm0, xmm1);
-	a &= 0x80000000;
-
-	c = _mm_cvttss_si32(xmm0);
-	xmm1 = _mm_load_ss(_ps_am_1);
-	d = c;
-	d <<= (31 - 1);
-	xmm2 = _mm_cvtsi32_ss(xmm2, c);
-	c &= 1;
-	d &= 0x80000000;
-
-	xmm0 = _mm_sub_ss(xmm0, xmm2);
-	xmm6 = _mm_load_ss((float*)&_sincos_masks[c]);
-	xmm0 = _mm_min_ss(xmm0, xmm1);
-
-	xmm5 = _mm_load_ss(_ps_sincos_p3);
-	xmm1 = _mm_sub_ss(xmm1, xmm0);
-
-	xmm1 = _mm_and_ps(xmm1, xmm6);
-	xmm6 = _mm_andnot_ps(xmm6, xmm0);
-	xmm1 = _mm_or_ps(xmm1, xmm6);
-	xmm4 = _mm_load_ss(_ps_sincos_p2);
-	xmm0 = xmm1;
-
-	xmm1 = _mm_mul_ss(xmm1, xmm1);
-	xmm7 = _mm_load_ss(_ps_sincos_p1);
-	a ^= d;
-	xmm2 = xmm1;
-	xmm1 = _mm_mul_ss(xmm1, xmm5);
-	xmm5 = _mm_load_ss(_ps_sincos_p0);
-	xmm1 = _mm_add_ss(xmm1, xmm4);
-	xmm1 = _mm_mul_ss(xmm1, xmm2);
-	xmm3 = _mm_load_ss((float const*)&a);
-	xmm1 = _mm_add_ss(xmm1, xmm7);
-	xmm1 = _mm_mul_ss(xmm1, xmm2);
-	xmm0 = _mm_or_ps(xmm0, xmm3);
-	xmm1 = _mm_add_ss(xmm1, xmm5);
-	xmm0 = _mm_mul_ss(xmm0, xmm1);
-
-	_mm_store_ss(&x, xmm0);
-
-	return x;
-}
-
-/*
-float am_sinf(float x)
-{
-	float r;
-	_mm_store_ss(&r, am_sin_ss(_mm_load_ss(&x)));
-	return r;
-}*/
-
-float am_sinf_2(float x)
-{
-	float r;
-	_mm_store_ss(&r, am_sin_ess(_mm_load_ss(&x)));
-	return r;
-}
-
-float am_cosf(float x)
-{
-	float r;
-	_mm_store_ss(&r, am_cos_ss(_mm_load_ss(&x)));
-	return r;
-}
-
 __m128 __declspec(naked) __cdecl am_sin_ss(__m128 x)
 {
 	__asm
@@ -367,4 +474,6 @@ __m128 __declspec(naked) __cdecl am_sin_ess(__m128 x)
 
 		ret
 	}
-}
+}
+
+#endif