Skip to content

Commit

Permalink
SONARPY-2303 Stop running V1 type inference during the indexing phase
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaume-dequenne committed Nov 5, 2024
1 parent 459f178 commit 6e6e789
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ public void removeModule(String packageName, String fileName) {
}

public void addModule(FileInput fileInput, String packageName, PythonFile pythonFile) {
SymbolTableBuilder symbolTableBuilder = new SymbolTableBuilder(packageName, pythonFile);
fileInput.accept(symbolTableBuilder);

String fullyQualifiedModuleName = SymbolUtils.fullyQualifiedModuleName(packageName, pythonFile.fileName());

var symbolTable = new SymbolTableBuilderV2(fileInput).build();
var typeInferenceV2 = new TypeInferenceV2(new BasicTypeTable(), pythonFile, symbolTable, packageName);
var typesBySymbol = typeInferenceV2.inferTypes(fileInput);

importsByModule.put(fullyQualifiedModuleName, typeInferenceV2.importedModulesFQN());

var moduleDescriptors = typesBySymbol.entrySet()
.stream()
.filter(entry -> entry.getValue().stream().noneMatch(UnknownType.UnresolvedImportType.class::isInstance))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
Expand All @@ -33,6 +34,7 @@
import org.slf4j.LoggerFactory;
import org.sonar.plugins.python.api.ProjectPythonVersion;
import org.sonar.plugins.python.api.PythonVersionUtils;
import org.sonar.plugins.python.api.symbols.Symbol;
import org.sonar.python.index.ClassDescriptor;
import org.sonar.python.index.Descriptor;
import org.sonar.python.index.ModuleDescriptor;
Expand Down Expand Up @@ -94,6 +96,16 @@ public Map<String, Descriptor> descriptorsForModule(String moduleName) {
return cachedDescriptors.computeIfAbsent(moduleName, this::searchTypeShedForModule);
}

public Set<String> stubModules() {
Set<String> modules = new HashSet<>();
for (Map.Entry<String, Map<String, Descriptor>> entry : cachedDescriptors.entrySet()) {
if (!entry.getValue().isEmpty()) {
modules.add(entry.getKey());
}
}
return modules;
}

//================================================================================
// Private methods
//================================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -739,11 +739,12 @@ void class_having_another_class_with_same_name_should_not_trigger_error() {
"class A:",
" class B(B): pass"
);
globalSymbols(fileInput, "mod");
ClassDef outerClassDef = (ClassDef) fileInput.statements().statements().get(1);
ClassDef innerClassDef = (ClassDef) outerClassDef.body().statements().get(0);
// SONARPY-1350: Parent should be external.B
assertThat(TreeUtils.getParentClassesFQN(innerClassDef)).containsExactly("mod.mod.A.B");
Set<Symbol> symbols = globalSymbols(fileInput, "mod");
// SONARPY-1829: Parent should be external.B
ClassSymbol outerClassSymbol = ((ClassSymbol) symbols.stream().findFirst().get());
ClassSymbol innerClassSymbol = (ClassSymbol) outerClassSymbol.resolveMember("B").get();
assertThat(innerClassSymbol.superClasses()).isEmpty();
assertThat(innerClassSymbol.hasUnresolvedTypeHierarchy()).isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ private boolean checkRequiresParsingOfImpactedFile(PythonInputFile inputFile, Py

@Override
public void endOfAnalysis() {
indexer.postAnalysis(context);
checks.all().stream()
.filter(EndOfAnalysis.class::isInstance)
.map(EndOfAnalysis.class::cast)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ void addFile(PythonInputFile inputFile) throws IOException {

public abstract void buildOnce(SensorContext context);

public abstract void postAnalysis(SensorContext context);

public void setSonarLintCache(@Nullable SonarLintCache sonarLintCache) {
// no op by default
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ public void buildOnce(SensorContext context) {
globalSymbolsStep.execute(files, context);
}

@Override
public void postAnalysis(SensorContext context) {
// no op
}

// SonarLintCache has to be set lazily because SonarLintPythonIndex is injected in the PythonSensor
@Override
public void setSonarLintCache(@Nullable SonarLintCache sonarLintCache) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.sonar.python.index.Descriptor;
import org.sonar.python.semantic.DependencyGraph;
import org.sonar.python.semantic.SymbolUtils;
import org.sonar.python.semantic.v2.typeshed.TypeShedDescriptorsProvider;
import org.sonar.python.types.TypeShed;
import org.sonarsource.performance.measure.PerformanceMeasure;

Expand Down Expand Up @@ -82,6 +83,16 @@ public void buildOnce(SensorContext context) {
duration.stop();
}

@Override
public void postAnalysis(SensorContext context) {
if (caching.isCacheEnabled()) {
Set<String> stubModules = projectLevelSymbolTable().typeShedDescriptorsProvider().stubModules();
if (!stubModules.isEmpty()) {
caching.writeTypeshedModules(stubModules);
}
}
}

private boolean shouldOptimizeAnalysis(SensorContext context) {
return caching.isCacheEnabled()
&& (context.canSkipUnchangedFiles() || context.config().getBoolean(SONAR_CAN_SKIP_UNCHANGED_FILES_KEY).orElse(false))
Expand All @@ -90,6 +101,7 @@ private boolean shouldOptimizeAnalysis(SensorContext context) {

private void computeGlobalSymbolsUsingCache(SensorContext context) {
loadTypeshedSymbols();
projectLevelSymbolTable().typeShedDescriptorsProvider();
LOG.info("Using cached data to retrieve global symbols.");
Set<String> currentProjectModulesFQNs = new HashSet<>(inputFileToFQN.values());
Set<String> deletedModulesFQNs = deletedModulesFQNs(currentProjectModulesFQNs);
Expand Down Expand Up @@ -130,9 +142,9 @@ private void computeGlobalSymbolsUsingCache(SensorContext context) {
* For that reason, we load all symbols that were used in the previous analysis upfront, even if the file using them will not be parsed.
*/
private void loadTypeshedSymbols() {
TypeShed.builtinSymbols();
TypeShedDescriptorsProvider typeshedReader = projectLevelSymbolTable().typeShedDescriptorsProvider();
Set<String> typeShedModules = caching.readTypeshedModules();
typeShedModules.forEach(TypeShed::symbolsForModule);
typeShedModules.forEach(typeshedReader::descriptorsForModule);
}

private boolean tryToUseCache(Map<String, Set<String>> importsByModule, PythonInputFile inputFile, String currFQN) {
Expand Down Expand Up @@ -175,10 +187,7 @@ public void computeGlobalSymbols(List<PythonInputFile> files, SensorContext cont
if (caching.isCacheEnabled()) {
saveGlobalSymbolsInCache(files);
saveMainFilesListInCache(new HashSet<>(inputFileToFQN.values()));
Set<String> stubModules = TypeShed.stubModules();
if (!stubModules.isEmpty()) {
caching.writeTypeshedModules(stubModules);
}
// Information on used Typeshed stubs needs to be done at the end of the analysis, as it is not computed during indexing anymore
caching.writeCacheVersion();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
Expand Down Expand Up @@ -119,6 +120,7 @@
import static org.sonar.plugins.python.caching.Caching.CPD_TOKENS_STRING_TABLE_KEY_PREFIX;
import static org.sonar.plugins.python.caching.Caching.IMPORTS_MAP_CACHE_KEY_PREFIX;
import static org.sonar.plugins.python.caching.Caching.PROJECT_SYMBOL_TABLE_CACHE_KEY_PREFIX;
import static org.sonar.plugins.python.caching.Caching.TYPESHED_MODULES_KEY;
import static org.sonar.plugins.python.caching.Caching.fileContentHashCacheKey;
import static org.sonar.plugins.python.caching.Caching.importsMapCacheKey;
import static org.sonar.plugins.python.caching.Caching.projectSymbolTableCacheKey;
Expand All @@ -128,6 +130,7 @@ class PythonSensorTest {

private static final String FILE_1 = "file1.py";
private static final String FILE_2 = "file2.py";
private static final String FILE_USING_TYPESHED = "uses_typeshed.py";
private static final String FILE_QUICKFIX = "file_quickfix.py";
private static final String FILE_TEST_FILE = "test_file.py";
private static final String FILE_INVALID_SYNTAX = "invalid_syntax.py";
Expand Down Expand Up @@ -862,6 +865,71 @@ void test_using_cache() throws IOException {
.contains("The Python analyzer was able to leverage cached data from previous analyses for 1 out of 1 files. These files were not parsed.");
}

@Test
void test_typeshed_stubs_information_is_saved_to_cache() {
activeRules = new ActiveRulesBuilder()
.addRule(new NewActiveRule.Builder()
.setRuleKey(RuleKey.of(CheckList.REPOSITORY_KEY, ONE_STATEMENT_PER_LINE_RULE_KEY))
.build())
.build();

PythonInputFile inputFile = inputFile(FILE_USING_TYPESHED, Type.MAIN, InputFile.Status.CHANGED);
TestReadCache readCache = getValidReadCache();
TestWriteCache writeCache = new TestWriteCache();
writeCache.bind(readCache);

context.setPreviousCache(readCache);
context.setNextCache(writeCache);
context.setCacheEnabled(true);
context.setSettings(new MapSettings().setProperty("sonar.python.skipUnchanged", true));
sensor().execute(context);

assertThat(context.allIssues()).hasSize(1);
Issue issue = context.allIssues().iterator().next();
assertThat(issue.primaryLocation().inputComponent()).isEqualTo(inputFile.wrappedFile());
assertThat(issue.ruleKey().rule()).isEqualTo(ONE_STATEMENT_PER_LINE_RULE_KEY);

byte[] bytes = writeCache.getData().get(TYPESHED_MODULES_KEY);
Set<String> resolvedTypeshedModules = new HashSet<>(Arrays.asList(new String(bytes, StandardCharsets.UTF_8).split(";")));
assertThat(resolvedTypeshedModules).containsExactlyInAnyOrder("math");
}

@Test
void test_typeshed_stub_cache_information_is_propagated() throws IOException {
activeRules = new ActiveRulesBuilder()
.addRule(new NewActiveRule.Builder()
.setRuleKey(RuleKey.of(CheckList.REPOSITORY_KEY, ONE_STATEMENT_PER_LINE_RULE_KEY))
.build())
.build();

PythonInputFile inputFile = inputFile(FILE_USING_TYPESHED, Type.MAIN, InputFile.Status.SAME);
TestReadCache readCache = getValidReadCache();
TestWriteCache writeCache = new TestWriteCache();
writeCache.bind(readCache);

byte[] serializedSymbolTable = toProtobufModuleDescriptor(Set.of(new VariableDescriptor("x", "main.x", null))).toByteArray();
CpdSerializer.SerializationResult cpdTokens = CpdSerializer.serialize(Collections.emptyList());
readCache.put(importsMapCacheKey(inputFile.wrappedFile().key()), String.join(";", Collections.emptyList()).getBytes(StandardCharsets.UTF_8));
readCache.put(TYPESHED_MODULES_KEY, String.join(";", List.of("math")).getBytes(StandardCharsets.UTF_8));
readCache.put(projectSymbolTableCacheKey(inputFile.wrappedFile().key()), serializedSymbolTable);
readCache.put(CPD_TOKENS_CACHE_KEY_PREFIX + inputFile.wrappedFile().key(), cpdTokens.data);
readCache.put(CPD_TOKENS_STRING_TABLE_KEY_PREFIX + inputFile.wrappedFile().key(), cpdTokens.stringTable);
readCache.put(fileContentHashCacheKey(inputFile.wrappedFile().key()), inputFile.wrappedFile().md5Hash().getBytes(UTF_8));

context.setPreviousCache(readCache);
context.setNextCache(writeCache);
context.setCacheEnabled(true);
context.setSettings(new MapSettings().setProperty("sonar.python.skipUnchanged", true));
sensor().execute(context);

assertThat(context.allIssues()).isEmpty();
assertThat(logTester.logs(Level.INFO))
.contains("The Python analyzer was able to leverage cached data from previous analyses for 1 out of 1 files. These files were not parsed.");
byte[] bytes = writeCache.getData().get(TYPESHED_MODULES_KEY);
Set<String> resolvedTypeshedModules = new HashSet<>(Arrays.asList(new String(bytes, StandardCharsets.UTF_8).split(";")));
assertThat(resolvedTypeshedModules).containsExactlyInAnyOrder("math");
}

@Test
void test_scan_without_parsing_test_file() {
activeRules = new ActiveRulesBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.slf4j.event.Level;
Expand All @@ -39,6 +40,7 @@
import org.sonar.api.batch.sensor.internal.SensorContextTester;
import org.sonar.api.testfixtures.log.LogTesterJUnit5;
import org.sonar.plugins.python.PythonInputFile;
import org.sonar.plugins.python.PythonScanner;
import org.sonar.plugins.python.api.caching.PythonReadCache;
import org.sonar.plugins.python.api.caching.PythonWriteCache;
import org.sonar.plugins.python.caching.TestReadCache;
Expand Down Expand Up @@ -408,22 +410,6 @@ void test_disabled_cache() {
assertThat(logTester.logs(Level.INFO)).doesNotContain("Using cached data to retrieve global symbols.");
}

@Test
void test_typeshed_modules_cached() {
file1 = createInputFile(baseDir, "uses_typeshed.py", InputFile.Status.CHANGED, InputFile.Type.MAIN);

List<PythonInputFile> inputFiles = new ArrayList<>(List.of(file1));

pythonIndexer = new SonarQubePythonIndexer(inputFiles, cacheContext, context);
pythonIndexer.buildOnce(context);

assertThat(pythonIndexer.canBePartiallyScannedWithoutParsing(file1)).isFalse();

byte[] bytes = writeCache.getData().get(TYPESHED_MODULES_KEY);
Set<String> resolvedTypeshedModules = new HashSet<>(Arrays.asList(new String(bytes, StandardCharsets.UTF_8).split(";")));
assertThat(resolvedTypeshedModules).containsExactlyInAnyOrder("math");
}

@Test
void test_typeshed_modules_not_cached_if_empty() {
file1 = createInputFile(baseDir, "main.py", InputFile.Status.CHANGED, InputFile.Type.MAIN);
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import math

x = 42

foo();bar()

0 comments on commit 6e6e789

Please sign in to comment.