-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtimeliner_feature.cpp
234 lines (209 loc) · 9.21 KB
/
timeliner_feature.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#include "timeliner_feature.h"
#include "timeliner_diagnostics.h"
#include "timeliner_util.h"
#include "timeliner_util_threads.h"
#include <cstring>
#include <cstdio>
#include <cmath>
float gpuMBavailable()
{
// glerror GL_OUT_OF_MEMORY ?
// kernel: [4391985.748987] NVRM: VM: nv_vm_malloc_pages: failed to allocate contiguous memory
// http://developer.download.nvidia.com/opengl/specs/GL_NVX_gpu_memory_info.txt
// AMD/ATI equivalent: WGL_AMD_gpu_association wglGetGPUIDsAMD wglGetGPUIDsAMD wglGetGPUInfoAMD
// www.opengl.org/registry/specs/ATI/meminfo.txt
#if 0
#define GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX 0x9047
GLint total_mem_kb = 0;
glGetIntegerv(GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX, &total_mem_kb);
#endif
#define GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX 0x9049
GLint cur_avail_mem_kb = 0;
glGetIntegerv(GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
return cur_avail_mem_kb/1000.0F;
// A feature of width 65536 (almost 11 minutes at 100 samples/sec)
// uses ~145 MB texture RAM.
// That's 8 chunks of width 8192.
// 1 chunk is 8192x1 texels * 2 for mipmap * 3 for GL_RGB * 62 vectorsize = 3047424 B = 2.9MB;
// 8 chunks is 23 MB. But 145MB is used?! (RGBA not just RGB?)
}
Feature::Feature(int /*iColormap*/, const std::string& filename, const std::string& dirname): m_fValid(false) {
if (mb == mbUnknown) {
mb = gpuMBavailable() > 0.0f ? mbPositive : mbZero;
if (!hasGraphicsRAM())
warn("Found no dedicated graphics RAM. Might run slowly.");
}
const Mmap marshaled_file(dirname + "/" + filename);
if (!marshaled_file.valid())
return;
binaryload(marshaled_file.pch(), marshaled_file.cch()); // stuff many member variables
makeMipmaps();
m_fValid = true;
// ~Mmap closes file
}
void Feature::binaryload(const char* pch, long cch) {
assert(4 == sizeof(float));
strcpy(m_name, pch); pch += strlen(m_name);
m_iColormap = int(*(float*)pch); pch += sizeof(float);
m_period = *(float*)pch; pch += sizeof(float);
const int slices = int(*(float*)pch); pch += sizeof(float);
m_vectorsize = int(*(float*)pch); pch += sizeof(float);
m_pz = (const float*)pch; // m_cz floats. Not doubles.
m_cz = (cch - long(strlen(m_name) + 4*sizeof(float))) / 4;
#ifndef NDEBUG
printf("debugging feature: name %s, colormap %d, period %f, slices %d, width %d, cz %lu.\n", m_name, m_iColormap, m_period, slices, m_vectorsize, m_cz);
#endif
if (m_iColormap<0 || m_period<=0.0 || slices<=0 || m_vectorsize<=0) {
quit("binaryload: feature '" + std::string(m_name) + "' has corrupt data");
}
assert(slices*m_vectorsize == m_cz);
#ifdef NDEBUG
_unused(slices);
#else
#ifndef _MSC_VER
// VS2013 only got std::isnormal and std::fpclassify in July 2013:
// http://blogs.msdn.com/b/vcblog/archive/2013/07/19/c99-library-support-in-visual-studio-2013.aspx
for (int i=0; i<m_cz; ++i) {
if (!std::isnormal(m_pz[i]) && m_pz[i] != 0.0) {
printf("binaryload: feature's float %d of %ld is bogus: class %d, value %f\n", i, m_cz, std::fpclassify(m_pz[i]), m_pz[i]);
quit("");
}
}
#endif
#endif
}
void prepTextureMipmap(const GLuint t)
{
glBindTexture(GL_TEXTURE_1D, t);
assert(glIsTexture(t) == GL_TRUE);
// Prevent bleeding onto opposite edges like a torus.
#ifndef GL_CLAMP_TO_EDGE
#define GL_CLAMP_TO_EDGE 0x812F
#endif
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR );
//needed? glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 0.99);
}
arLock lockQueue;
std::vector<QueueElement> queueChunk; // ;;;; rename queue to vector
void Feature::makeMipmaps() {
// Adaptive subsample is too tricky, until I can better predict GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX.
// (Adapt the prediction itself?? Allocate a few textures of various sizes, and measure reported GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX. But implement this only after getting 2 or 3 different PCs to test it on.)
// Subsampling to coarser than 100 Hz would be pretty limiting.
const char* pch = getenv("timeliner_zoom");
unsigned subsample = pch ? atoi(pch) : 1;
if (subsample < 1)
subsample = 1;
if (subsample > 1)
printf("Subsampling %ux from environment variable timeliner_zoom.\n", subsample);
const int csample = samples();
// Smallest power of two that exceeds features' # of samples.
unsigned width = 1;
while (width < csample/subsample)
width *= 2;
//printf("feature has %d samples, for tex-chunks' width %d.\n", csample, width);
// Minimize cchunk to conserve RAM and increase FPS.
{
GLint widthLim; // often 2048..8192, rarely 16384, never greater.
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &widthLim);
assert(widthLim >= 0); // because width is unsigned
if (width > unsigned(widthLim)) assert(width%widthLim==0); // everything is a power of two
cchunk = width<unsigned(widthLim) ? 1 : width/widthLim;
//printf("width = %u, cchunk = %d, widthLim = %d\n", width, cchunk, widthLim);
if (width >= unsigned(widthLim))
assert(GLint(width/cchunk) == widthLim);
}
rgTex.resize(cchunk);
for (int ichunk=0; ichunk<cchunk; ++ichunk) {
glGenTextures(m_vectorsize, rgTex[ichunk].tex);
for (int j=0; j < m_vectorsize; ++j)
prepTextureMipmap(rgTex[ichunk].tex[j]);
}
const CHello cacheHTK(m_pz, m_cz, 1.0f/m_period, subsample, m_vectorsize);
glEnable(GL_TEXTURE_1D);
const float mb0 = hasGraphicsRAM() ? gpuMBavailable() : 0.0f;
// One pool for ALL Features would be slightly faster, but risks running out of memory.
// However, any worker pool at all uses more memory.
{
WorkerPool pool;
for (unsigned level=0; (width/cchunk)>>level >= 1; ++level) {
//printf(" computing feature's mipmap level %d.\n", level);
makeTextureMipmap(cacheHTK, level, width >> level);
}
}
printf("finishing %lu chunks\n\n\n", queueChunk.size());
for (std::vector<QueueElement>::iterator it = queueChunk.begin(); it != queueChunk.end(); ++it) {
finishMipmap(*it);
}
if (hasGraphicsRAM()) {
const float mb1 = gpuMBavailable();
printf("Feature used %.1f graphics MB; %.0f MB remaining.\n", mb0-mb1, mb1);
if (mb1 < 50.0) {
#ifdef _MSC_VER
warn("Running out of graphics RAM. Try increasing the environment variable timeliner_zoom to 15 or so.");
#else
// Less than 50 MB free may hang X. Mouse responsive, Xorg 100% cpu, network up, console frozen.
// Or, the next request may "go negative", mb0-mb1<0.
warn("Running out of graphics RAM. Try export timeliner_zoom=15.");
#endif
}
}
}
extern double tShowBound[2];
const void Feature::makeTextureMipmapChunk(const CHello& cacheHTK, const int mipmaplevel, const int width, const int ichunk) const {
unsigned char* bufByte = new unsigned char[vectorsize()*width];
const double chunkL = ichunk / double(cchunk); // e.g., 5/8
const double chunkR = (ichunk+1) / double(cchunk); // e.g., 6/8
printf("makeTextureMipmapChunk 0, vectorsize %d\n", vectorsize());
cacheHTK.getbatchByte(bufByte,
lerp(chunkL, tShowBound[0], tShowBound[1]),
lerp(chunkR, tShowBound[0], tShowBound[1]),
vectorsize(), width, m_iColormap);
arGuard _(lockQueue);
queueChunk.push_back( QueueElement(bufByte, ichunk, width, mipmaplevel) );
}
#include <GL/glx.h>
#include <X11/Xlib.h>
// Multithreaded OpenGL is tricky, brittle, poorly documented.
// So we call OpenGL not from the worker pool but only afterwards.
void Feature::finishMipmap(const QueueElement& arg) {
for (int j=0; j<vectorsize(); ++j) {
printf("finishMipmap\n");
//;;;;FAILS assert(glIsTexture(rgTex[arg.ichunk].tex[j]) == GL_TRUE);
glBindTexture(GL_TEXTURE_1D, rgTex[arg.ichunk].tex[j]);
glTexImage1D(GL_TEXTURE_1D, arg.mipmaplevel, GL_INTENSITY8, arg.width, 0, GL_RED, GL_UNSIGNED_BYTE, arg.bufByte + arg.width*j);
}
//;;;;?last one FAILS delete [] arg.bufByte;
}
extern WorkerPool* pool;
const void Feature::makeTextureMipmap(const CHello& cacheHTK, const int mipmaplevel, int width) const {
assert(vectorsize() <= vecLim);
assert(width % cchunk == 0);
width /= cchunk;
//if (mipmaplevel<3) printf(" Computing %d mipmaps of width %d.\n", cchunk, width);
//printf("vectorsize %d\n", vectorsize());
#if 1
for (int ichunk=0; ichunk<cchunk; ++ichunk) {
pool->task(new WorkerArgs(*this, cacheHTK, mipmaplevel, width, ichunk));
}
#else
unsigned char* bufByte = new unsigned char[vectorsize()*width];
for (int ichunk=0; ichunk<cchunk; ++ichunk) {
const double chunkL = ichunk / double(cchunk); // e.g., 5/8
const double chunkR = (ichunk+1) / double(cchunk); // e.g., 6/8
printf("makeTextureMipmapChunk 0, vectorsize %d\n", vectorsize());
cacheHTK.getbatchByte(bufByte,
lerp(chunkL, tShowBound[0], tShowBound[1]),
lerp(chunkR, tShowBound[0], tShowBound[1]),
vectorsize(), width, m_iColormap);
for (int j=0; j<vectorsize(); ++j) {
assert(glIsTexture(rgTex[ichunk].tex[j]) == GL_TRUE);
glBindTexture(GL_TEXTURE_1D, rgTex[ichunk].tex[j]);
glTexImage1D(GL_TEXTURE_1D, mipmaplevel, GL_INTENSITY8, width, 0, GL_RED, GL_UNSIGNED_BYTE, bufByte + width*j);
}
}
delete [] bufByte;
#endif
}