Skip to content

Commit ef88aac

Browse files
authored
Merge pull request #55 from github/copilot/fix-d2184ad1-8c09-4dca-99dd-740c3fae4b09
feat: Replace print() statements with proper logging framework
2 parents 88bf4a7 + 841ba11 commit ef88aac

File tree

7 files changed

+250
-47
lines changed

7 files changed

+250
-47
lines changed

config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import os
1414
from os.path import dirname, join
1515

16+
from constants import DEFAULT_CHUNK_SIZE, MIN_CHUNK_SIZE
1617
from dotenv import load_dotenv
1718

1819

@@ -233,14 +234,14 @@ def get_env_vars(test: bool = False) -> EnvVars:
233234
rate_limit_bypass = get_bool_env_var("RATE_LIMIT_BYPASS", False)
234235

235236
# Get the chunk size for processing data in batches (for memory efficiency)
236-
chunk_size_str = os.getenv("CHUNK_SIZE", "100")
237+
chunk_size_str = os.getenv("CHUNK_SIZE", str(DEFAULT_CHUNK_SIZE))
237238
try:
238239
chunk_size = int(chunk_size_str)
239240
# Ensure a reasonable minimum chunk size
240-
chunk_size = max(chunk_size, 10)
241+
chunk_size = max(chunk_size, MIN_CHUNK_SIZE)
241242
except ValueError:
242-
# Default to 100 if not a valid integer
243-
chunk_size = 100
243+
# Default to DEFAULT_CHUNK_SIZE if not a valid integer
244+
chunk_size = DEFAULT_CHUNK_SIZE
244245

245246
return EnvVars(
246247
gh_app_id,

constants.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""Constants used throughout the InnerSource measurement tool.
2+
3+
This module defines commonly used constants to avoid magic values
4+
and improve code maintainability.
5+
"""
6+
7+
# GitHub issue body character limit
8+
GITHUB_ISSUE_BODY_MAX_CHARS = 65535
9+
10+
# Default chunk size for processing data in batches
11+
DEFAULT_CHUNK_SIZE = 100
12+
13+
# Minimum allowed chunk size
14+
MIN_CHUNK_SIZE = 10

logging_config.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""Logging configuration and utilities for the InnerSource measurement tool.
2+
3+
This module provides centralized logging configuration to replace
4+
print statements with proper logging levels.
5+
"""
6+
7+
import logging
8+
import sys
9+
10+
11+
def setup_logging(level: str = "INFO") -> logging.Logger:
12+
"""Configure and return a logger for the InnerSource measurement tool.
13+
14+
Args:
15+
level (str): Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
16+
Defaults to INFO.
17+
18+
Returns:
19+
logging.Logger: Configured logger instance
20+
"""
21+
# Create logger
22+
logger = logging.getLogger("innersource_measure")
23+
24+
# Avoid adding multiple handlers if logger is already configured
25+
if logger.handlers:
26+
return logger
27+
28+
# Set level
29+
numeric_level = getattr(logging, level.upper(), logging.INFO)
30+
logger.setLevel(numeric_level)
31+
32+
# Create console handler
33+
console_handler = logging.StreamHandler(sys.stdout)
34+
console_handler.setLevel(numeric_level)
35+
36+
# Create formatter
37+
formatter = logging.Formatter(
38+
fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
39+
datefmt="%Y-%m-%d %H:%M:%S",
40+
)
41+
console_handler.setFormatter(formatter)
42+
43+
# Add handler to logger
44+
logger.addHandler(console_handler)
45+
46+
# Prevent propagation to avoid duplicate messages
47+
logger.propagate = False
48+
49+
return logger
50+
51+
52+
def get_logger() -> logging.Logger:
53+
"""Get the configured logger instance.
54+
55+
Returns:
56+
logging.Logger: The configured logger for the application
57+
"""
58+
return logging.getLogger("innersource_measure")

measure_innersource.py

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
from auth import auth_to_github, get_github_app_installation_token
1414
from config import get_env_vars
15+
from constants import GITHUB_ISSUE_BODY_MAX_CHARS
16+
from logging_config import get_logger, setup_logging
1517
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
1618
from markdown_writer import write_to_markdown
1719

@@ -38,15 +40,20 @@ def evaluate_markdown_file_size(output_file: str) -> None:
3840
"""
3941
output_file_name = output_file if output_file else "innersource_report.md"
4042
file_name_without_extension = Path(output_file_name).stem
41-
max_char_count = 65535
43+
max_char_count = GITHUB_ISSUE_BODY_MAX_CHARS
44+
logger = get_logger()
45+
4246
if markdown_too_large_for_issue_body(output_file_name, max_char_count):
4347
split_markdown_file(output_file_name, max_char_count)
4448
shutil.move(output_file_name, f"{file_name_without_extension}_full.md")
4549
shutil.move(f"{file_name_without_extension}_0.md", output_file_name)
46-
print(
47-
f"The markdown file is too large for GitHub issue body and has been \
48-
split into multiple files. ie. {output_file_name}, {file_name_without_extension}_1.md, etc. \
49-
The full file is saved as {file_name_without_extension}_full.md\n"
50+
logger.info(
51+
"The markdown file is too large for GitHub issue body and has been "
52+
"split into multiple files. ie. %s, %s_1.md, etc. "
53+
"The full file is saved as %s_full.md\n",
54+
output_file_name,
55+
file_name_without_extension,
56+
file_name_without_extension,
5057
)
5158

5259

@@ -82,7 +89,9 @@ def main(): # pragma: no cover
8289
- Requires org-data.json file to be present in the current directory
8390
"""
8491

85-
print("Starting innersource-measure tool...")
92+
# Initialize logging
93+
logger = setup_logging()
94+
logger.info("Starting innersource-measure tool...")
8695

8796
# Get the environment variables for use in the script
8897
env_vars = get_env_vars()
@@ -117,50 +126,57 @@ def main(): # pragma: no cover
117126
# evaluate_markdown_file_size(output_file)
118127

119128
if github_connection:
120-
print("connection successful")
129+
logger.info("Connection to GitHub successful")
121130

122131
# fetch repository data
123-
print(f"Fetching repository data for {owner}/{repo}...")
132+
logger.info("Fetching repository data for %s/%s...", owner, repo)
124133
repo_data = github_connection.repository(owner, repo)
125134
if not repo_data:
126-
print(f"Unable to fetch repository {owner}/{repo} specified. Exiting.")
135+
logger.error(
136+
"Unable to fetch repository %s/%s specified. Exiting.", owner, repo
137+
)
127138
return
128139

129-
print(f"Repository {repo_data.full_name} found.")
140+
logger.info("Repository %s found.", repo_data.full_name)
130141

131142
# Read in the org data in org-data.json
132143
org_data = None
133144
org_data_path = Path("org-data.json")
134145
if org_data_path.exists():
135-
print("Reading in org data from org-data.json...")
146+
logger.info("Reading in org data from org-data.json...")
136147
with open(org_data_path, "r", encoding="utf-8") as org_file:
137148
org_data = json.load(org_file)
138-
print("Org data read successfully.")
149+
logger.info("Org data read successfully.")
139150
else:
140-
print("No org data found. InnerSource collaboration cannot be measured.")
151+
logger.warning(
152+
"No org data found. InnerSource collaboration cannot be measured."
153+
)
141154

142155
if org_data:
143-
print("Org data found. Measuring InnerSource collaboration...")
156+
logger.info("Org data found. Measuring InnerSource collaboration...")
144157
else:
145-
print("No org data found. InnerSource collaboration cannot be measured.")
158+
logger.error(
159+
"No org data found. InnerSource collaboration cannot be measured."
160+
)
146161
return
147162

148163
# Initialize contributor lists and team members list
149164
all_contributors = []
150165
innersource_contributors = []
151166
team_members_that_own_the_repo = []
152167

153-
print("Analyzing first commit...")
168+
logger.info("Analyzing first commit...")
154169
commits = repo_data.commits()
155170
# Paginate to the last page to get the oldest commit
156171
# commits is a GitHubIterator, so you can use .count to get total, then get the last one
157172
commit_list = list(commits)
158173
first_commit = commit_list[-1] # The last in the list is the oldest
159174
original_commit_author = first_commit.author.login
160175
original_commit_author_manager = org_data[original_commit_author]["manager"]
161-
print(
162-
f"Original commit author: {original_commit_author}, \
163-
with manager: {original_commit_author_manager}"
176+
logger.info(
177+
"Original commit author: %s, with manager: %s",
178+
original_commit_author,
179+
original_commit_author_manager,
164180
)
165181
# Create a dictionary mapping users to their managers for faster lookups
166182
user_to_manager = {}
@@ -195,11 +211,13 @@ def main(): # pragma: no cover
195211

196212
# Remove duplicates from the team members list
197213
team_members_that_own_the_repo = list(set(team_members_that_own_the_repo))
198-
print(f"Team members that own the repo: {team_members_that_own_the_repo}")
214+
logger.debug(
215+
"Team members that own the repo: %s", team_members_that_own_the_repo
216+
)
199217

200218
# For each contributor, check if they are in the team that owns the repo list
201219
# and if not, add them to the innersource contributors list
202-
print("Analyzing all contributors in the repository...")
220+
logger.info("Analyzing all contributors in the repository...")
203221
for contributor in repo_data.contributors():
204222
all_contributors.append(contributor.login)
205223
if (
@@ -208,25 +226,25 @@ def main(): # pragma: no cover
208226
):
209227
innersource_contributors.append(contributor.login)
210228

211-
print(f"All contributors: {all_contributors}")
212-
print(f"Innersource contributors: {innersource_contributors}")
229+
logger.debug("All contributors: %s", all_contributors)
230+
logger.debug("Innersource contributors: %s", innersource_contributors)
213231

214232
# Process data in chunks to avoid memory issues while maintaining performance
215233
chunk_size = env_vars.chunk_size
216-
print(f"Using chunk size of {chunk_size} for data processing")
234+
logger.info("Using chunk size of %s for data processing", chunk_size)
217235

218-
print("Pre-processing contribution data...")
236+
logger.info("Pre-processing contribution data...")
219237

220238
# Create mapping of commit authors to commit counts
221-
print("Processing commits...")
239+
logger.info("Processing commits...")
222240
commit_author_counts = {}
223241
for commit in commit_list:
224242
if hasattr(commit.author, "login"):
225243
author = commit.author.login
226244
commit_author_counts[author] = commit_author_counts.get(author, 0) + 1
227245

228246
# Process pull requests in chunks
229-
print("Processing pull requests in chunks...")
247+
logger.info("Processing pull requests in chunks...")
230248
pr_author_counts = {}
231249
total_prs = 0
232250

@@ -252,12 +270,12 @@ def main(): # pragma: no cover
252270
pr_author_counts[author] = pr_author_counts.get(author, 0) + 1
253271

254272
total_prs += len(chunk)
255-
print(f" Processed {total_prs} pull requests so far...")
273+
logger.debug(" Processed %s pull requests so far...", total_prs)
256274

257-
print(f"Found and processed {total_prs} pull requests")
275+
logger.info("Found and processed %s pull requests", total_prs)
258276

259277
# Process issues in chunks
260-
print("Processing issues in chunks...")
278+
logger.info("Processing issues in chunks...")
261279
issue_author_counts = {}
262280
total_issues = 0
263281

@@ -283,13 +301,13 @@ def main(): # pragma: no cover
283301
issue_author_counts[author] = issue_author_counts.get(author, 0) + 1
284302

285303
total_issues += len(chunk)
286-
print(f" Processed {total_issues} issues so far...")
304+
logger.debug(" Processed %s issues so far...", total_issues)
287305

288-
print(f"Found and processed {total_issues} issues")
306+
logger.info("Found and processed %s issues", total_issues)
289307

290308
# Count contributions for each innersource contributor using precompiled dictionaries
291309
innersource_contribution_counts = {}
292-
print("Counting contributions for each innersource contributor...")
310+
logger.info("Counting contributions for each innersource contributor...")
293311
for contributor in innersource_contributors:
294312
# Initialize counter for this contributor
295313
innersource_contribution_counts[contributor] = 0
@@ -309,13 +327,13 @@ def main(): # pragma: no cover
309327
contributor, 0
310328
)
311329

312-
print("Innersource contribution counts:")
330+
logger.debug("Innersource contribution counts:")
313331
for contributor, count in innersource_contribution_counts.items():
314-
print(f" {contributor}: {count} contributions")
332+
logger.debug(" %s: %s contributions", contributor, count)
315333

316334
# Count contributions for each team member using precompiled dictionaries
317335
team_member_contribution_counts = {}
318-
print("Counting contributions for each team member that owns the repo...")
336+
logger.info("Counting contributions for each team member that owns the repo...")
319337
for member in team_members_that_own_the_repo:
320338
# Initialize counter for this team member
321339
team_member_contribution_counts[member] = 0
@@ -333,10 +351,10 @@ def main(): # pragma: no cover
333351
member, 0
334352
)
335353

336-
print("Team member contribution counts:")
354+
logger.debug("Team member contribution counts:")
337355
for member, count in team_member_contribution_counts.items():
338356
if count > 0:
339-
print(f" {member}: {count} contributions")
357+
logger.debug(" %s: %s contributions", member, count)
340358

341359
# Calculate the ratio of innersource contributions to total contributions
342360
total_contributions = sum(innersource_contribution_counts.values()) + sum(
@@ -349,7 +367,7 @@ def main(): # pragma: no cover
349367
else:
350368
innersource_ratio = 0
351369

352-
print(f"Innersource contribution ratio: {innersource_ratio:.2%}")
370+
logger.info("Innersource contribution ratio: %.2f%%", innersource_ratio * 100)
353371

354372
# Write the results to a markdown file using report_title and output_file
355373
write_to_markdown(
@@ -367,10 +385,10 @@ def main(): # pragma: no cover
367385
)
368386

369387
evaluate_markdown_file_size(output_file)
370-
print(f"InnerSource report written to {output_file}")
388+
logger.info("InnerSource report written to %s", output_file)
371389

372390
else:
373-
print("Failed to connect to GitHub. Exiting.")
391+
logger.error("Failed to connect to GitHub. Exiting.")
374392

375393

376394
if __name__ == "__main__":

test_constants.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Tests for constants.py"""
2+
3+
import unittest
4+
5+
from constants import DEFAULT_CHUNK_SIZE, GITHUB_ISSUE_BODY_MAX_CHARS, MIN_CHUNK_SIZE
6+
7+
8+
class TestConstants(unittest.TestCase):
9+
"""Test cases for constants"""
10+
11+
def test_github_issue_body_max_chars(self):
12+
"""Test that the GitHub issue body limit constant is correct"""
13+
assert GITHUB_ISSUE_BODY_MAX_CHARS == 65535
14+
15+
def test_default_chunk_size(self):
16+
"""Test that the default chunk size constant is correct"""
17+
assert DEFAULT_CHUNK_SIZE == 100
18+
19+
def test_min_chunk_size(self):
20+
"""Test that the minimum chunk size constant is correct"""
21+
assert MIN_CHUNK_SIZE == 10
22+
23+
def test_constants_are_integers(self):
24+
"""Test that all constants are integers"""
25+
assert isinstance(GITHUB_ISSUE_BODY_MAX_CHARS, int)
26+
assert isinstance(DEFAULT_CHUNK_SIZE, int)
27+
assert isinstance(MIN_CHUNK_SIZE, int)
28+
29+
def test_chunk_size_relationships(self):
30+
"""Test that chunk size constants have correct relationships"""
31+
assert MIN_CHUNK_SIZE <= DEFAULT_CHUNK_SIZE
32+
assert MIN_CHUNK_SIZE > 0
33+
assert DEFAULT_CHUNK_SIZE > 0

0 commit comments

Comments
 (0)