Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Giovanni Rosa committed Feb 2, 2021
0 parents commit dff121b
Show file tree
Hide file tree
Showing 66 changed files with 2,185 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
issues
temp
out
160 changes: 160 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@

# Created by https://www.toptal.com/developers/gitignore/api/pycharm+all,code,intellij+all
# Edit at https://www.toptal.com/developers/gitignore?templates=pycharm+all,code,intellij+all

### Code ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### Intellij+all Patch ###
# Ignores the whole .idea folder and all .iml files
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

.idea/

# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

*.iml
modules.xml
.idea/misc.xml
*.ipr

# Sonarlint plugin
.idea/sonarlint

### PyCharm+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff

# Generated files

# Sensitive or high-churn files

# Gradle

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake

# Mongo Explorer plugin

# File-based project format

# IntelliJ

# mpeltonen/sbt-idea plugin

# JIRA plugin

# Cursive Clojure plugin

# Crashlytics plugin (for Android Studio and IntelliJ)

# Editor-based Rest Client

# Android studio 3.1+ serialized cache file

### PyCharm+all Patch ###
# Ignores the whole .idea folder and all .iml files
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360


# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023


# Sonarlint plugin

# End of https://www.toptal.com/developers/gitignore/api/pycharm+all,code,intellij+all

temp/
__pycache__
20 changes: 20 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.7-slim-buster

RUN echo "deb http://deb.debian.org/debian buster-backports main" >> /etc/apt/sources.list && \
apt-get update && apt-get upgrade -y --no-install-recommends && \
apt-get install -y --no-install-recommends wget libarchive13 libcurl4 libxml2 python-magic && \
apt-get -t buster-backports install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*

RUN wget http://131.123.42.38/lmcrs/v1.0.0/srcml_1.0.0-1_ubuntu18.04.deb && \
dpkg -i srcml_1.0.0-1_ubuntu18.04.deb

WORKDIR /usr/src/app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY szz szz
COPY main.py .

ENTRYPOINT [ "python", "-u", "main.py" ]
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# PySZZ
This is an open-source implementation of several versions of the SZZ algorithm for detecting bug-inducing commits.

## Requirements
To run PySZZ you need:

- Python 3
- srcML (https://www.srcml.org/) (i.e., the `srcml` command should be in the path)
- git >= 2.23

## Setup
Run the following command to install the required python dependencies:
```
pip3 install --no-cache-dir -r requirements.txt
```

## Run
To run the tool, simply execute the following command:

```
python3 main.py /path/to/bug-fixes.json /path/to/configuration-file.yml /path/to/repo-directory
```
where:

- `bug-fixes.json` contains a list of information about bug-fixing commits and (optionally) issues
- `configuration-file.yml` is one of the following, depending on the SZZ variant you want to run:
- `conf/agszz.yaml`: runs AG-ZZ
- `conf/lszz.yaml`: runs L-ZZ
- `conf/rszz.yaml`: runs R-ZZ
- `conf/maszz.yaml`: runs MA-ZZ
- `conf/raszz.yaml`: runs RA-ZZ
- `repo-directory` is a folder which contains all the repositories that are required by `bug-fixes.json`

To have different run configurations, just create or edit the configuration files. The available parameters are described in each yml file.

## Input data
The `data` dir contains two sub-folders:
- `data/langs_only` contains the json files extracted from the dataset filtered only by the defined langs for the experiment.
- `data/with_whitelist` contains the json files extracted from the dataset filtered by the file extensions defined in the [whitelist csv](https://gitlab.reveal.si.usi.ch/gbavota/icse2021-szz-oracle/-/blob/master/database/langs.csv).

The input json files are the following:
- `bugfix_commits_no_issues.json`: contains only fix commits having no issue references.
- `bugfix_commits_issues_only.json`: contains only fix commits that reference one or more issues, where the `earliest_issue_date` field is the earliest creation date among the referenced issues;
- `bugfix_commits_all.json`: contains all the fix commits, where if there are no referenced issues, the field `best_scenario_issue_date` will be the earliest creation date among the linked bug commits with a time offset of 60 seconds. Otherwise, the field `earliest_issue_date` will be the earliest issue creation date;
22 changes: 22 additions & 0 deletions conf/agszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
### szz implementation
szz_name: ag

### obtain the diff only for the specified file extensions
# file_ext_to_parse:
# - c
# - cpp
# - cs
# - h
# - java
# - js
# - py

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true
issue_date_filter: false

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file

### ignores commits with a change size higher than the specified value during blame
max_change_size: 20
19 changes: 19 additions & 0 deletions conf/bszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
### szz implementation
szz_name: b

### obtain the diff only for the specified file extensions
# file_ext_to_parse:
# - c
# - cpp
# - cs
# - h
# - java
# - js
# - py

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true
issue_date_filter: false

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file
28 changes: 28 additions & 0 deletions conf/lszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
### szz implementation
szz_name: l

### obtain the diff only for the specified file extensions
# file_ext_to_parse:
# - c
# - cpp
# - cs
# - h
# - java
# - js
# - py

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true
issue_date_filter: true

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file

### ignores commits during blame with a change size higher than the specified value
max_change_size: 20

### set -C param for blame to detect line moves/copies across:
## SAME_COMMIT = 1
## PARENT_COMMIT = 2
## ANY_COMMIT = 3
detect_move_from_other_files: 1
28 changes: 28 additions & 0 deletions conf/maszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
### szz implementation
szz_name: ma

### obtain the diff only for the specified file extensions
# file_ext_to_parse:
# - c
# - cpp
# - cs
# - h
# - java
# - js
# - py

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true
issue_date_filter: false

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file

### ignores commits during blame with a change size higher than the specified value
max_change_size: 20

### set -C param for blame to detect line moves/copies across:
## SAME_COMMIT = 1
## PARENT_COMMIT = 2
## ANY_COMMIT = 3
detect_move_from_other_files: 1
22 changes: 22 additions & 0 deletions conf/raszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
### szz implementation
szz_name: ra

### obtain the diff only for the specified file extensions
file_ext_to_parse:
- java

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file

### ignores commits during blame with a change size higher than the specified value
max_change_size: 20
issue_date_filter: false

### set -C param for blame to detect line moves/copies across:
## SAME_COMMIT = 1
## PARENT_COMMIT = 2
## ANY_COMMIT = 3
detect_move_from_other_files: 1
28 changes: 28 additions & 0 deletions conf/rszz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
### szz implementation
szz_name: r

### obtain the diff only for the specified file extensions
# file_ext_to_parse:
# - c
# - cpp
# - cs
# - h
# - java
# - js
# - py

### detect in diff deleted lines only, otherwise detect only the lines that are both deleted and added
only_deleted_lines: true
issue_date_filter: true

### ignore during blame all the commits specified in revs file
# ignore_revs_file_path: /path/to/revs/file

### ignores commits during blame with a change size higher than the specified value
max_change_size: 20

### set -C param for blame to detect line moves/copies across:
## SAME_COMMIT = 1
## PARENT_COMMIT = 2
## ANY_COMMIT = 3
detect_move_from_other_files: 1
Loading

0 comments on commit dff121b

Please sign in to comment.