Skip to content

Commit

Permalink
Enabled and refactored compute changed files to have all files settin…
Browse files Browse the repository at this point in the history
…g to true and get all files

Needs to be tested in ci and test related to compute all files need to be added
  • Loading branch information
Rd4dev committed Aug 7, 2024
1 parent e20782c commit 2535777
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 102 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/code_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ on:
- develop

jobs:
# check_unit_tests_completed:
# name: Check unit test completed
# runs-on: ubuntu-latest
# steps:
# - name: Wait for unit tests to checks
# uses: ArcticLampyrid/[email protected]
# with:
# workflow: unit_tests.yml
# sha: auto
check_unit_tests_completed:
name: Check unit test completed
runs-on: ubuntu-latest
steps:
- name: Wait for unit tests to checks
uses: ArcticLampyrid/[email protected]
with:
workflow: unit_tests.yml
sha: auto

compute_changed_files:
name: Compute changed files
# needs: check_unit_tests_completed
needs: check_unit_tests_completed
runs-on: ubuntu-20.04
outputs:
matrix: ${{ steps.compute-file-matrix.outputs.matrix }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,20 +127,15 @@ class ComputeAffectedTests(
} else computeAffectedTargetsForNonDevelopBranch(gitClient, bazelClient, rootDirectory)

val filteredTestTargets = filterTargets(affectedTestTargets)
println()
println("Affected test targets:")
println(filteredTestTargets.joinToString(separator = "\n") { "- $it" })

// Bucket the targets & then shuffle them so that shards are run in different orders each time
// (to avoid situations where the longest/most expensive tests are run last).
val affectedTestBuckets = bucketTargets(filteredTestTargets)
println("Affected Test Buckets: $affectedTestBuckets")
val encodedTestBucketEntries =
affectedTestBuckets.associateBy { it.toCompressedBase64() }.entries.shuffled()
println("Encoded Test Buckets: $encodedTestBucketEntries")
File(pathToOutputFile).printWriter().use { writer ->
encodedTestBucketEntries.forEachIndexed { index, (encoded, bucket) ->
println("Shard index: $index, encoded: $encoded")
writer.println("${bucket.cacheBucketName}-shard$index;$encoded")
}
}
Expand Down Expand Up @@ -225,7 +220,6 @@ class ComputeAffectedTests(
keySelector = { checkNotNull(it.key).groupingStrategy },
valueTransform = { checkNotNull(it.key) to it.value }
).mapValues { (_, bucketLists) -> bucketLists.toMap() }
println("Grouped Buckets: $groupedBuckets")

// Next, properly segment buckets by splitting out individual ones and collecting like one:
// 5. Convert to: Map<String, Map<TestBucket, List<String>>>
Expand All @@ -245,7 +239,6 @@ class ComputeAffectedTests(
GroupingStrategy.BUCKET_GENERICALLY -> listOf(GENERIC_TEST_BUCKET_NAME to buckets)
}
}.toMap()
println("Partitioned Buckets: $partitionedBuckets")

// Next, collapse the test bucket lists & partition them based on the common sharding strategy
// for each group:
Expand All @@ -267,7 +260,6 @@ class ComputeAffectedTests(
// Use randomization to encourage cache breadth & potentially improve workflow performance.
allPartitionTargets.shuffled().chunked(maxTestCountPerShard)
}
println("Sharded Buckets: $shardedBuckets")

// Finally, compile into a list of protos:
// 7. Convert to List<AffectedTestsBucket>
Expand Down
142 changes: 58 additions & 84 deletions scripts/src/java/org/oppia/android/scripts/ci/ComputeChangedFiles.kt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ fun main(args: Array<String>) {
ScriptBackgroundCoroutineDispatcher().use { scriptBgDispatcher ->
ComputeChangedFiles(scriptBgDispatcher)
.compute(pathToRoot, pathToOutputFile, baseCommit, computeAllFilesSetting)
// .compute(pathToRoot, pathToOutputFile, baseCommit)
}
}

Expand Down Expand Up @@ -115,60 +114,73 @@ class ComputeChangedFiles(
println("Most recent common commit: ${gitClient.branchMergeBase}.")

val currentBranch = gitClient.currentBranch.lowercase(Locale.US)
val changedFilesAll: List<File>? = if (computeAllFilesSetting || currentBranch == "develop") {
// computeAllFiles()
val testFileExemptiontextProto = "scripts/assets/test_file_exemptions"
val testFileExemptionList = loadTestFileExemptionsProto(testFileExemptiontextProto)
.testFileExemptionList
.filter { it.testFileNotRequired }
.map { it.exemptedFilePath }

val searchFiles = RepositoryFile.collectSearchFiles(
repoPath = pathToRoot,
expectedExtension = ".kt",
exemptionsList = testFileExemptionList
)
val changedFiles = if (computeAllFilesSetting || currentBranch == "develop") {
computeAllFiles(rootDirectory, pathToRoot)
} else computeChangedFilesForNonDevelopBranch(gitClient, rootDirectory)

// A list of all the prod files present in the repo.
searchFiles.filter { file -> !file.name.endsWith("Test.kt") }
} else {null}
val ktFiles = changedFiles.filter { it.endsWith(".kt") }
val filteredFiles = filterFiles(ktFiles)

println("Changed Files: $changedFilesAll")
val changedFileBuckets = bucketFiles(filteredFiles)
val encodedFileBucketEntries = changedFileBuckets
.associateBy { it.toCompressedBase64() }
.entries.shuffled()

val changedFiles = computeChangedFilesForNonDevelopBranch(gitClient, rootDirectory)
println("\nChanged Files: $changedFiles")
val ktFiles = changedFiles.filter { it.endsWith(".kt") }
println("\nKt file: $ktFiles")
File(pathToOutputFile).printWriter().use { writer ->
encodedFileBucketEntries.forEachIndexed { index, (encoded, bucket) ->
writer.println("${bucket.cacheBucketName}-shard$index;$encoded")
}
}
}

val filteredFiles = filterFiles(ktFiles)
println("\nFilter: Files: $filteredFiles")
private fun computeAllFiles(
rootDirectory: File,
pathToRoot: String
): List<String> {
val testFileExemptiontextProto = "scripts/assets/test_file_exemptions"
val testFileExemptionList = loadTestFileExemptionsProto(testFileExemptiontextProto)
.testFileExemptionList
.filter { it.testFileNotRequired }
.map { it.exemptedFilePath }

val searchFiles = RepositoryFile.collectSearchFiles(
repoPath = pathToRoot,
expectedExtension = ".kt",
exemptionsList = testFileExemptionList
)

return searchFiles
.filter { it.name.endsWith(".kt") && !it.name.endsWith("Test.kt") }
.map { rootDirectory.toPath().relativize(it.toPath()).toString() }
}

private fun computeChangedFilesForNonDevelopBranch(
gitClient: GitClient,
rootDirectory: File
): List<String> {
return gitClient.changedFiles
.map { File(rootDirectory, it) }
.filter { it.exists() }
.map { rootDirectory.toPath().relativize(it.toPath()).toString() }
}

private fun filterFiles(files: List<String>) : List<String> {
// Filtering out files that need to be ignored.
return files.filter { file ->
!file
.startsWith(
"instrumentation/src/javatests/org/oppia/android/instrumentation/player",
ignoreCase = true
)
}
}

// create and move this to bucketFiles()
private fun bucketFiles(filteredFiles: List<String>): List<ChangedFilesBucket> {
val groupedBuckets = filteredFiles.groupBy { FileBucket.retrieveCorrespondingFileBucket(it) }
.entries.groupBy(
keySelector = { checkNotNull(it.key).groupingStrategy },
valueTransform = { checkNotNull(it.key) to it.value }
).mapValues { (_, fileLists) -> fileLists.toMap() }
println("\nGrouped Buckets: $groupedBuckets")

/*val groupedBuckets2 = ktFiles.groupBy { FileBucket.retrieveCorrespondingFileBucket(it) }
.entries.groupBy { it.key.groupingStrategy }
.mapValues { (_, buckets) -> buckets.associate { it.key to it.value } }
println("\n********************")
println("\nGrouped Buckets: $groupedBuckets2")*/

/*val partitionedBuckets: Map<String, Map<FileBucket, List<String>>> =
groupedBuckets.entries.flatMap { (strategy, buckets) ->
return@flatMap when (strategy) {
GroupingStrategy.BUCKET_SEPARATELY -> {
buckets.mapValues { (fileBucket, targets) -> mapOf(fileBucket to targets) }
.mapKeys { (fileBucket, _) -> fileBucket.cacheBucketName }
.entries.map { (cacheName, bucket) -> cacheName to bucket }
}
GroupingStrategy.BUCKET_GENERICALLY -> listOf(GENERIC_FILE_BUCKET_NAME to buckets)
}
}.toMap()
println("\nPartitioned Buckets: $partitionedBuckets")*/

val partitionedBuckets = groupedBuckets.flatMap { (strategy, buckets) ->
when (strategy) {
Expand All @@ -178,7 +190,6 @@ class ComputeChangedFiles(
GroupingStrategy.BUCKET_GENERICALLY -> listOf(GENERIC_FILE_BUCKET_NAME to buckets)
}
}.toMap()
println("\nPartitioned Buckets: $partitionedBuckets")

val shardedBuckets: Map<String, List<List<String>>> =
partitionedBuckets.mapValues { (_, bucketMap) ->
Expand All @@ -197,52 +208,15 @@ class ComputeChangedFiles(
// Use randomization to encourage cache breadth & potentially improve workflow performance.
allPartitionFiles.shuffled().chunked(maxFileCountPerShard)
}
println("\nSharded Buckets: $shardedBuckets")

val computedBuckets = shardedBuckets.entries.flatMap { (bucketName, shardedFiles) ->
return shardedBuckets.entries.flatMap { (bucketName, shardedFiles) ->
shardedFiles.map { files ->
ChangedFilesBucket.newBuilder().apply {
cacheBucketName = bucketName
addAllChangedFiles(files)
}.build()
}
}
println("\nComputed Buckets: $computedBuckets")

val encodedFileBucketEntries = computedBuckets
.associateBy { it.toCompressedBase64() }
.entries.shuffled()
println("\nEncoded File Bucket Entries: $encodedFileBucketEntries")

File(pathToOutputFile).printWriter().use { writer ->
encodedFileBucketEntries.forEachIndexed { index, (encoded, bucket) ->
writer.println("${bucket.cacheBucketName}-shard$index;$encoded")
}
}
}

private fun computeChangedFilesForNonDevelopBranch(
gitClient: GitClient,
rootDirectory: File
): List<String> {
// Update later
val changedFiles = gitClient.changedFiles.filter { filepath ->
File(rootDirectory, filepath).exists()
}.toSet()
println("Changed files (per Git, ${changedFiles.size} total): $changedFiles")

return changedFiles.toList()
}

private fun filterFiles(files: List<String>) : List<String> {
// Filtering out files that need to be ignored.
return files.filter { file ->
!file
.startsWith(
"instrumentation/src/javatests/org/oppia/android/instrumentation/player",
ignoreCase = true
)
}
}

private enum class FileBucket(
Expand Down

0 comments on commit 2535777

Please sign in to comment.