Skip to content

Commit

Permalink
Open-sourcing Timelines Aggregation Framework
Browse files Browse the repository at this point in the history
Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models.
  • Loading branch information
twitter-team committed Apr 28, 2023
1 parent b5e849b commit 197bf2c
Show file tree
Hide file tree
Showing 146 changed files with 16,429 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw
| | [topic-social-proof](topic-social-proof/README.md) | Identifies topics related to individual Tweets. |
| Software framework | [navi](navi/README.md) | High performance, machine learning model serving written in Rust. |
| | [product-mixer](product-mixer/README.md) | Software framework for building feeds of content. |
| | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time.
| | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. |

The product surface currently included in this repository is the For You Timeline.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.twitter.timelines.prediction.common.aggregates

import com.twitter.ml.api.Feature
import com.twitter.ml.api.FeatureContext
import com.twitter.ml.api.ITransform
import com.twitter.ml.api.constant.SharedFeatures
import java.lang.{Double => JDouble}

import com.twitter.timelines.prediction.common.adapters.AdapterConsumer
import com.twitter.timelines.prediction.common.adapters.EngagementLabelFeaturesDataRecordUtils
import com.twitter.ml.api.DataRecord
import com.twitter.ml.api.RichDataRecord
import com.twitter.timelines.suggests.common.engagement.thriftscala.EngagementType
import com.twitter.timelines.suggests.common.engagement.thriftscala.Engagement
import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
import com.twitter.timelines.prediction.features.common.CombinedFeatures

/**
* To transfrom BCE events UUA data records that contain only continuous dwell time to datarecords that contain corresponding binary label features
* The UUA datarecords inputted would have USER_ID, SOURCE_TWEET_ID,TIMESTAMP and
* 0 or one of (TWEET_DETAIL_DWELL_TIME_MS, PROFILE_DWELL_TIME_MS, FULLSCREEN_VIDEO_DWELL_TIME_MS) features.
* We will use the different engagement TIME_MS to differentiate different engagements,
* and then re-use the function in EngagementTypeConverte to add the binary label to the datarecord.
**/

object BCELabelTransformFromUUADataRecord extends ITransform {

val dwellTimeFeatureToEngagementMap = Map(
TimelinesSharedFeatures.TWEET_DETAIL_DWELL_TIME_MS -> EngagementType.TweetDetailDwell,
TimelinesSharedFeatures.PROFILE_DWELL_TIME_MS -> EngagementType.ProfileDwell,
TimelinesSharedFeatures.FULLSCREEN_VIDEO_DWELL_TIME_MS -> EngagementType.FullscreenVideoDwell
)

def dwellFeatureToEngagement(
rdr: RichDataRecord,
dwellTimeFeature: Feature[JDouble],
engagementType: EngagementType
): Option[Engagement] = {
if (rdr.hasFeature(dwellTimeFeature)) {
Some(
Engagement(
engagementType = engagementType,
timestampMs = rdr.getFeatureValue(SharedFeatures.TIMESTAMP),
weight = Some(rdr.getFeatureValue(dwellTimeFeature))
))
} else {
None
}
}
override def transformContext(featureContext: FeatureContext): FeatureContext = {
featureContext.addFeatures(
(CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements).toSeq: _*)
}
override def transform(record: DataRecord): Unit = {
val rdr = new RichDataRecord(record)
val engagements = dwellTimeFeatureToEngagementMap
.map {
case (dwellTimeFeature, engagementType) =>
dwellFeatureToEngagement(rdr, dwellTimeFeature, engagementType)
}.flatten.toSeq

// Re-use BCE( behavior client events) label conversion in EngagementTypeConverter to align with BCE labels generation for offline training data
EngagementLabelFeaturesDataRecordUtils.setDwellTimeFeatures(
rdr,
Some(engagements),
AdapterConsumer.Combined)
}
}
Loading

0 comments on commit 197bf2c

Please sign in to comment.