Skip to content
This repository has been archived by the owner on Jun 9, 2023. It is now read-only.

Commit

Permalink
Add statistics aggregation for daily data (#364)
Browse files Browse the repository at this point in the history
* Add statistics aggregation for daily data

* Add option --merge, remove others, fix data saving
  • Loading branch information
xtuchyna authored Apr 7, 2021
1 parent cdd8c41 commit 01973ff
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 3 deletions.
17 changes: 16 additions & 1 deletion srcopsmetrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,13 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
@click.option(
"--metrics", "-m", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""",
)
@click.option(
"--merge",
"-m",
is_flag=True,
required=False,
help=f"""Merge all of the aggregated data under given KNOWLEDGE_PATH.""",
)
def cli(
repository: Optional[str],
organization: Optional[str],
Expand All @@ -129,6 +136,7 @@ def cli(
knowledge_path: str,
thoth: bool,
metrics: bool,
merge: bool,
):
"""Command Line Interface for SrcOpsMetrics."""
os.environ["IS_LOCAL"] = "True" if is_local else "False"
Expand All @@ -151,7 +159,8 @@ def cli(
for project in repos:
os.environ["PROJECT"] = project

if thoth:
if thoth:
if repository and not merge:
kebechet_metrics = KebechetMetrics(repository=repos[0], today=True, is_local=is_local)
kebechet_metrics.evaluate_and_store_kebechet_metrics()

Expand All @@ -170,6 +179,12 @@ def cli(
path = Path(f"./srcopsmetrics/metrics/{repos[0]}/issue_scores.json")
KnowledgeStorage(is_local=is_local).save_knowledge(file_path=path, data=scores_issues)

if merge:
if thoth:
KebechetMetrics.merge_kebechet_metrics_today(is_local=is_local)
else:
raise NotImplementedError


if __name__ == "__main__":
cli(auto_envvar_prefix="MI")
45 changes: 45 additions & 0 deletions srcopsmetrics/entities/tools/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,51 @@ def get_ceph_store(self) -> CephStore:
s3.connect()
return s3

def save_data(self, file_path: Path, data: Dict[str, Any]):
"""Save data as json.
Arguments:
file_path {Path} -- where the knowledge should be saved
data {Dict[str, Any]} -- collected knowledge. Should be json compatible
"""
_LOGGER.info("Saving knowledge file %s of size %d" % (os.path.basename(file_path), len(data)))

if not self.is_local:
ceph_filename = os.path.relpath(file_path).replace("./", "")
s3 = self.get_ceph_store()
s3.store_document(data, ceph_filename)
_LOGGER.info("Saved on CEPH at %s/%s%s" % (s3.bucket, s3.prefix, ceph_filename))
else:
with open(file_path, "w") as f:
json.dump(data, f)
_LOGGER.info("Saved locally at %s" % file_path)

def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]:
"""Load previously collected repo knowledge. If a repo was not inspected before, create its directory.
Arguments:
file_path {Optional[Path]} -- path to previously stored knowledge from
inspected github repository. If None is passed, the used path will
be :value:`~enums.StoragePath.DEFAULT`
Returns:
Dict[str, Any] -- previusly collected knowledge.
Empty dict if the knowledge does not exist.
"""
if file_path is None:
raise ValueError("Filepath has to be specified.")

results = self.load_locally(file_path) if self.is_local else self.load_remotely(file_path)

if results is None:
_LOGGER.info("File does not exist.")
return {}

_LOGGER.info("Data from file %s loaded")
return results

@staticmethod
def load_locally(file_path: Path) -> Optional[Dict[str, Any]]:
"""Load knowledge file from local storage."""
Expand Down
41 changes: 39 additions & 2 deletions srcopsmetrics/kebechet_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from srcopsmetrics import utils
from srcopsmetrics.entities.issue import Issue
from srcopsmetrics.entities.pull_request import PullRequest
from srcopsmetrics.storage import KnowledgeStorage
from srcopsmetrics.entities.tools.storage import KnowledgeStorage

BOT_NAMES = {"sesheta"}

Expand Down Expand Up @@ -226,7 +226,44 @@ def evaluate_and_store_kebechet_metrics(self):
file_name += f"_{str(curr_day)}"
file_name += ".json"

KnowledgeStorage(is_local=self.is_local).save_knowledge(file_path=path.joinpath(file_name), data=stats)
KnowledgeStorage(is_local=self.is_local).save_data(file_path=path.joinpath(file_name), data=stats)

@staticmethod
def merge_kebechet_metrics_today(is_local: bool = False):
"""Merge all the collected metrics under given parent directory."""
today = str(datetime.now().date())

overall_today = {
"created_pull_requests": 0,
"rejected": 0,
"rejected_by_kebechet_bot": 0,
"rejected_by_other": 0,
"merged": 0,
"merged_by_kebechet_bot": 0,
"merged_by_other": 0,
}
ttms = []

ks = KnowledgeStorage(is_local=is_local)
for manager_name in ["update_manager"]:

file_name = f"kebechet_{manager_name}_{today}.json"

for path in Path(Path(f"./{_ROOT_DIR}/")).rglob(f"*{file_name}"):
if path.name == f"overall_{file_name}":
continue
data = ks.load_data(file_path=path)
for k in data["daily"]:
if k == "median_ttm":
ttms.append(data["daily"][k])
else:
overall_today[k] += data["daily"][k]

ttm_median = np.nanmedian(ttms)
overall_today["median_ttm"] = ttm_median if not np.isnan(ttm_median) else None

path = Path(f"./{_ROOT_DIR}/overall_{file_name}")
ks.save_data(path, overall_today)

def update_manager(self):
"""Calculate and store update manager metrics."""
Expand Down

0 comments on commit 01973ff

Please sign in to comment.