@@ -10,17 +10,20 @@ import {
10
10
ClickHouseEvent ,
11
11
EventDefinitionType ,
12
12
EventPropertyType ,
13
+ GroupTypeToColumnIndex ,
13
14
Hub ,
14
15
PluginServerService ,
15
16
PropertyDefinitionType ,
16
17
PropertyDefinitionTypeEnum ,
17
18
PropertyType ,
18
19
RawClickHouseEvent ,
19
- ResolvedGroups ,
20
+ TeamId ,
20
21
} from '../types'
21
22
import { parseRawClickHouseEvent } from '../utils/event'
22
23
import { status } from '../utils/status'
23
24
import { UUIDT } from '../utils/utils'
25
+ import { GroupTypeManager } from '../worker/ingestion/group-type-manager'
26
+ import { TeamManager } from '../worker/ingestion/team-manager'
24
27
import { PropertyDefsDB } from './services/property-defs-db'
25
28
import {
26
29
getPropertyType ,
@@ -32,6 +35,8 @@ import {
32
35
// Must require as `tsc` strips unused `import` statements and just requiring this seems to init some globals
33
36
require ( '@sentry/tracing' )
34
37
38
+ const BATCH_SIZE = 100
39
+
35
40
// TODO(eli): wire up LOTS more metrics ASAP!
36
41
37
42
const propertyDefTypesCounter = new Counter ( {
@@ -59,8 +64,8 @@ const propDefDroppedCounter = new Counter({
59
64
export type CollectedPropertyDefinitions = {
60
65
// looked up prior to event/prop extraction
61
66
knownTeamIds : Set < number >
62
- // looked up prior to event/prop extraction
63
- resolvedTeamGroups : Record < number , ResolvedGroups >
67
+ // looked up prior to event/prop extraction. map of project_id => group_type => group_index
68
+ resolvedTeamGroups : Record < number , GroupTypeToColumnIndex >
64
69
// known team ID => resolved group_type & group_type_index
65
70
eventDefinitionsById : Record < number , Record < string , EventDefinitionType > >
66
71
// known team ID => deduped properties
@@ -79,6 +84,8 @@ export class PropertyDefsConsumer {
79
84
80
85
private batchConsumer ?: BatchConsumer
81
86
private propertyDefsDB : PropertyDefsDB
87
+ private teamManager : TeamManager
88
+ private groupTypeManager : GroupTypeManager
82
89
private isStopping = false
83
90
protected heartbeat = ( ) => { }
84
91
protected promises : Set < Promise < any > > = new Set ( )
@@ -87,6 +94,8 @@ export class PropertyDefsConsumer {
87
94
this . groupId = hub . PROPERTY_DEFS_CONSUMER_GROUP_ID
88
95
this . topic = hub . PROPERTY_DEFS_CONSUMER_CONSUME_TOPIC
89
96
this . propertyDefsDB = new PropertyDefsDB ( hub )
97
+ this . teamManager = new TeamManager ( hub . postgres )
98
+ this . groupTypeManager = new GroupTypeManager ( hub . postgres , this . teamManager )
90
99
}
91
100
92
101
public get service ( ) : PluginServerService {
@@ -133,7 +142,9 @@ export class PropertyDefsConsumer {
133
142
}
134
143
135
144
public async handleKafkaBatch ( messages : Message [ ] ) {
136
- const parsedMessages = await this . runInstrumented ( 'parseKafkaMessages' , ( ) => this . parseKafkaBatch ( messages ) )
145
+ const parsedMessages : ClickHouseEvent [ ] = await this . runInstrumented ( 'parseKafkaMessages' , ( ) =>
146
+ this . parseKafkaBatch ( messages )
147
+ )
137
148
138
149
// used to filter and dedup to minimum batch of writable records
139
150
const collected : CollectedPropertyDefinitions = {
@@ -144,18 +155,18 @@ export class PropertyDefsConsumer {
144
155
eventPropertiesById : { } ,
145
156
}
146
157
147
- const teamsInBatch = this . extractTeamIds ( parsedMessages )
148
- const groupTeamsInBatch = this . extractGroupTeamIds ( parsedMessages , collected . knownTeamIds )
158
+ const eventTeamIds = parsedMessages . map ( ( msg ) => msg . team_id as TeamId )
159
+ const groupTeamIds = parsedMessages . filter ( ( msg ) => msg . event == '$groupidentify' ) . map ( ( msg ) => msg . team_id )
149
160
150
- const [ knownTeamIds , resolvedTeamGroups ] = await Promise . all ( [
151
- this . runInstrumented ( 'resolveTeams' , ( ) => this . resolveTeams ( this . propertyDefsDB , teamsInBatch ) ) ,
152
- this . runInstrumented ( 'resolveGroupsForTeams ' , ( ) =>
153
- this . resolveGroupsForTeams ( this . propertyDefsDB , groupTeamsInBatch )
161
+ const [ knownTeamIds , resolvedProjectGroups ] = await Promise . all ( [
162
+ this . runInstrumented ( 'resolveTeams' , ( ) => this . teamManager . validateTeamIds ( eventTeamIds ) ) ,
163
+ this . runInstrumented ( 'resolveProjectGroupTypeIndices ' , ( ) =>
164
+ this . groupTypeManager . fetchGroupTypesIndicesForTeams ( groupTeamIds )
154
165
) ,
155
166
] )
156
167
157
- collected . knownTeamIds = knownTeamIds
158
- collected . resolvedTeamGroups = resolvedTeamGroups
168
+ collected . knownTeamIds = new Set ( knownTeamIds )
169
+ collected . resolvedTeamGroups = resolvedProjectGroups
159
170
160
171
console . log ( '🔁' , `Event batch teams and group indices resolved` )
161
172
@@ -167,38 +178,47 @@ export class PropertyDefsConsumer {
167
178
console . log ( '🔁' , `Property definitions collected` , JSON . stringify ( collected , null , 2 ) )
168
179
169
180
for ( const knownTeamId in collected . eventDefinitionsById ) {
181
+ let buffer : EventDefinitionType [ ] = [ ]
170
182
for ( const key in collected . eventDefinitionsById [ knownTeamId ] ) {
171
183
const eventDef = collected . eventDefinitionsById [ knownTeamId ] [ key ]
172
-
184
+ buffer . push ( eventDef )
173
185
eventDefTypesCounter . inc ( )
174
- status . info ( '🔁' , `Writing event definition` , { eventDef } )
175
186
176
- // TODO: Batch all these DB writes
177
- void this . scheduleWork ( this . propertyDefsDB . writeEventDefinition ( eventDef ) )
187
+ if ( buffer . length === BATCH_SIZE ) {
188
+ status . info ( '🔁' , `Writing event definition batch of size ${ buffer . length } ` )
189
+ void this . scheduleWork ( this . propertyDefsDB . writeEventDefinitionsBatch ( buffer ) )
190
+ buffer = [ ]
191
+ }
178
192
}
179
193
}
180
194
181
195
for ( const knownTeamId in collected . propertyDefinitionsById ) {
196
+ let buffer : PropertyDefinitionType [ ] = [ ]
182
197
for ( const key in collected . propertyDefinitionsById [ knownTeamId ] ) {
183
198
const propDef : PropertyDefinitionType = collected . propertyDefinitionsById [ knownTeamId ] [ key ]
184
-
199
+ buffer . push ( propDef )
185
200
propertyDefTypesCounter . inc ( { type : propDef . property_type ?. toString ( ) ?? 'unknown' } )
186
- status . info ( '🔁' , `Writing property definition` , { propDef } )
187
201
188
- // TODO: Batch all these DB writes
189
- void this . scheduleWork ( this . propertyDefsDB . writePropertyDefinition ( propDef ) )
202
+ if ( buffer . length === BATCH_SIZE ) {
203
+ status . info ( '🔁' , `Writing property definitions batch of size ${ buffer . length } ` )
204
+ void this . scheduleWork ( this . propertyDefsDB . writePropertyDefinitionsBatch ( buffer ) )
205
+ buffer = [ ]
206
+ }
190
207
}
191
208
}
192
209
193
210
for ( const knownTeamId in collected . eventPropertiesById ) {
211
+ let buffer : EventPropertyType [ ] = [ ]
194
212
for ( const key in collected . eventPropertiesById [ knownTeamId ] ) {
195
213
const eventProp = collected . eventPropertiesById [ knownTeamId ] [ key ]
196
-
197
214
eventPropTypesCounter . inc ( )
198
- status . info ( '🔁' , `Writing event property` , { eventProp } )
215
+ buffer . push ( eventProp )
199
216
200
- // TODO: Batch all these DB writes
201
- void this . scheduleWork ( this . propertyDefsDB . writeEventProperty ( eventProp ) )
217
+ if ( buffer . length === BATCH_SIZE ) {
218
+ status . info ( '🔁' , `Writing event properties batch of size ${ buffer . length } ` )
219
+ void this . scheduleWork ( this . propertyDefsDB . writeEventPropertiesBatch ( buffer ) )
220
+ buffer = [ ]
221
+ }
202
222
}
203
223
}
204
224
@@ -207,45 +227,6 @@ export class PropertyDefsConsumer {
207
227
status . debug ( '🔁' , `Processed batch` )
208
228
}
209
229
210
- private async resolveTeams ( db : PropertyDefsDB , teamIdsInBatch : Set < number > ) : Promise < Set < number > > {
211
- const teamsFound = await db . findTeamIds ( Array . from ( teamIdsInBatch ) )
212
- return new Set ( teamsFound . filter ( ( row ) => ! teamIdsInBatch . has ( row . teamId ) ) . map ( ( row ) => row . teamId ) )
213
- }
214
-
215
- private async resolveGroupsForTeams (
216
- db : PropertyDefsDB ,
217
- knownTeamIdsWithGroup : Set < number >
218
- ) : Promise < Record < number , ResolvedGroups > > {
219
- const result = await db . resolveGroupsForTeams ( Array . from ( knownTeamIdsWithGroup ) )
220
-
221
- const out : Record < number , ResolvedGroups > = { }
222
- result . forEach ( ( row ) => {
223
- let resolved : ResolvedGroups
224
- if ( out [ row . teamId ] ) {
225
- resolved = out [ row . teamId ]
226
- } else {
227
- resolved = { }
228
- }
229
-
230
- resolved [ row . groupName ] = row . groupIndex
231
- out [ row . teamId ] = resolved
232
- } )
233
-
234
- return out
235
- }
236
-
237
- private extractTeamIds ( events : ClickHouseEvent [ ] ) : Set < number > {
238
- return new Set ( events . map ( ( event ) => event . team_id ) )
239
- }
240
-
241
- private extractGroupTeamIds ( events : ClickHouseEvent [ ] , knownTeamIds : Set < number > ) : Set < number > {
242
- return new Set (
243
- events
244
- . filter ( ( event ) => event . event == '$groupidentify' && knownTeamIds . has ( event . team_id ) )
245
- . map ( ( event ) => event . team_id )
246
- )
247
- }
248
-
249
230
private extractPropertyDefinitions ( events : ClickHouseEvent [ ] , collected : CollectedPropertyDefinitions ) {
250
231
for ( const event of events ) {
251
232
if ( ! collected . knownTeamIds . has ( event . team_id ) ) {
0 commit comments