From 37f7346266221822a32931ff0eff994e20088177 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Mon, 29 Sep 2025 17:11:53 -0700
Subject: [PATCH 1/9] fix: remove dead code

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 measure_innersource.py | 2 --
 1 file changed, 2 deletions(-)
diff --git a/measure_innersource.py b/measure_innersource.py
index 6d0d933..693a48b 100644
--- a/measure_innersource.py
+++ b/measure_innersource.py
@@ -123,8 +123,6 @@ def main():  # pragma: no cover
             ghe, gh_app_id, gh_app_private_key_bytes, gh_app_installation_id
         )
 
-    # evaluate_markdown_file_size(output_file)
-
     if github_connection:
         logger.info("Connection to GitHub successful")
 

From 204b3f882dce270173557ff39b16bf51b62c042b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Sep 2025 00:16:15 +0000
Subject: [PATCH 2/9] Initial plan


From 56128798cd15111b9bdb0350b7adfcdb5718457c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Sep 2025 00:38:04 +0000
Subject: [PATCH 3/9] Add OWNING_TEAM environment variable for custom team
 ownership

Co-authored-by: zkoppert <6935431+zkoppert@users.noreply.github.com>
---
 README.md                  |  47 ++++++++++++++
 config.py                  |  19 +++++-
 markdown_writer.py         |  13 +++-
 measure_innersource.py     | 120 ++++++++++++++++++----------------
 test_config_owning_team.py | 128 +++++++++++++++++++++++++++++++++++++
 5 files changed, 269 insertions(+), 58 deletions(-)
 create mode 100644 test_config_owning_team.py

diff --git a/README.md b/README.md
index 0dd80c0..0f43a43 100644
--- a/README.md
+++ b/README.md
@@ -214,6 +214,17 @@ The tool determines team boundaries using this algorithm:
    - Are managers of anyone in the team
 4. **Classify Contributors**: Any contributor not in the team list is considered an InnerSource contributor
 
+**Overriding Team Determination**: You can override this algorithm by setting the `OWNING_TEAM` environment variable with a comma-separated list of GitHub usernames. When set, these users will be considered the owning team, and the algorithm above will be bypassed. This is useful when:
+- The first commit author doesn't accurately represent the current owning team
+- The org chart doesn't align with actual repository ownership
+- You want to explicitly define team boundaries
+
+Example:
+```yaml
+env:
+  OWNING_TEAM: "alice,bob,charlie"
+```
+
 #### Example Team Boundary Calculation
 
 <details>
@@ -517,6 +528,40 @@ jobs:
           CHUNK_SIZE: "100"
 ```
 
+#### Using Custom Team Ownership
+
+To override the automatic team determination and explicitly specify the owning team:
+
+```yaml
+name: InnerSource with Custom Team
+
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+  workflow_dispatch:
+
+jobs:
+  measure-innersource:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Measure InnerSource
+        uses: github/measure-innersource@v1
+        env:
+          REPOSITORY: "org/repository"
+          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+          OWNING_TEAM: "alice,bob,charlie,david"
+          REPORT_TITLE: "InnerSource Report with Custom Team"
+```
+
+This is useful when:
+- The first commit author doesn't represent the current team
+- The org chart doesn't align with actual ownership
+- You want to explicitly define team boundaries
+
+
 ### Configuration
 
 Below are the allowed configuration options:
@@ -550,6 +595,7 @@ This action can be configured to authenticate with GitHub App Installation or Pe
 | `REPORT_TITLE`      | False    | `"InnerSource Report"`  | Title to have on the report issue.                                                                                                    |
 | `REPOSITORY`        | True     | `""`                    | The name of the repository you are trying to measure. Format `owner/repo` ie. `github/measure-innersource`                            |
 | `CHUNK_SIZE`        | False    | `100`                   | Number of items to process at once when fetching data. Increasing can improve performance but uses more memory. Minimum value is 10.  |
+| `OWNING_TEAM`       | False    | `""`                    | Comma-separated list of GitHub usernames that own the repository. Overrides the built-in team determination algorithm. Example: `alice,bob,charlie` |
 
 ## Understanding the Results
 
@@ -709,6 +755,7 @@ The tool automatically splits large files, but you can:
 - [ ] `OUTPUT_FILE` (default: "innersource_report.md")
 - [ ] `CHUNK_SIZE` (default: 100, minimum: 10)
 - [ ] `RATE_LIMIT_BYPASS` (default: false)
+- [ ] `OWNING_TEAM` (comma-separated usernames to override team determination)
 
 #### File Requirements Checklist
 
diff --git a/config.py b/config.py
index 4e89356..0c818d3 100644
--- a/config.py
+++ b/config.py
@@ -36,6 +36,7 @@ class EnvVars:
         output_file (str): The name of the file to write the report to
         rate_limit_bypass (bool): If set to TRUE, bypass the rate limit for the GitHub API
         chunk_size (int): The number of items to process at once when fetching data (for memory efficiency)
+        owning_team (list[str] | None): Optional list of usernames that comprise the owning team (overrides algorithm)
     """
 
     def __init__(
@@ -52,6 +53,7 @@ def __init__(
         output_file: str,
         rate_limit_bypass: bool = False,
         chunk_size: int = 100,
+        owning_team: list[str] | None = None,
     ):
         self.gh_app_id = gh_app_id
         self.gh_app_installation_id = gh_app_installation_id
@@ -65,6 +67,7 @@ def __init__(
         self.output_file = output_file
         self.rate_limit_bypass = rate_limit_bypass
         self.chunk_size = chunk_size
+        self.owning_team = owning_team
 
     def __repr__(self):
         return (
@@ -80,7 +83,8 @@ def __repr__(self):
             f"{self.repo},"
             f"{self.output_file},"
             f"{self.rate_limit_bypass},"
-            f"{self.chunk_size}"
+            f"{self.chunk_size},"
+            f"{self.owning_team}"
             ")"
         )
 
@@ -183,6 +187,8 @@ def get_env_vars(test: bool = False) -> EnvVars:
         - OUTPUT_FILE: Output filename (default: "innersource_report.md")
         - RATE_LIMIT_BYPASS: Set to "true" to bypass rate limiting
         - CHUNK_SIZE: Number of items to process at once (default: 100, minimum: 10)
+        - OWNING_TEAM: Comma-separated list of GitHub usernames that own the repository
+                      (overrides the built-in team determination algorithm)
 
     Examples:
         >>> os.environ['GH_TOKEN'] = 'ghp_...'
@@ -243,6 +249,16 @@ def get_env_vars(test: bool = False) -> EnvVars:
         # Default to DEFAULT_CHUNK_SIZE if not a valid integer
         chunk_size = DEFAULT_CHUNK_SIZE
 
+    # Get optional owning team override (comma-separated list of usernames)
+    owning_team_str = os.getenv("OWNING_TEAM", "").strip()
+    owning_team = None
+    if owning_team_str:
+        # Parse comma-separated list and strip whitespace from each username
+        owning_team = [username.strip() for username in owning_team_str.split(",") if username.strip()]
+        # If the list is empty after stripping, set to None
+        if not owning_team:
+            owning_team = None
+
     return EnvVars(
         gh_app_id,
         gh_app_installation_id,
@@ -256,4 +272,5 @@ def get_env_vars(test: bool = False) -> EnvVars:
         output_file,
         rate_limit_bypass,
         chunk_size,
+        owning_team,
     )
diff --git a/markdown_writer.py b/markdown_writer.py
index c3cfd88..d48a0ef 100644
--- a/markdown_writer.py
+++ b/markdown_writer.py
@@ -138,9 +138,16 @@ def write_to_markdown(
             return
         report_file.write(f"## Repository: {repo_data.full_name}\n\n")
         report_file.write(f"### InnerSource Ratio: {innersource_ratio:.2%}\n\n")
-        report_file.write(
-            f"### Original Commit Author: {original_commit_author} (Manager: {original_commit_author_manager})\n\n"
-        )
+        if original_commit_author and original_commit_author_manager:
+            report_file.write(
+                f"### Original Commit Author: {original_commit_author} (Manager: {original_commit_author_manager})\n\n"
+            )
+        elif original_commit_author:
+            report_file.write(
+                f"### Original Commit Author: {original_commit_author}\n\n"
+            )
+        else:
+            report_file.write("### Team ownership is explicitly specified\n\n")
         report_file.write("## Team Members that Own the Repo:\n")
         if team_members_that_own_the_repo:
             for member in team_members_that_own_the_repo:
diff --git a/measure_innersource.py b/measure_innersource.py
index 693a48b..8cde7be 100644
--- a/measure_innersource.py
+++ b/measure_innersource.py
@@ -100,6 +100,7 @@ def main():  # pragma: no cover
     repo = env_vars.repo
     report_title = env_vars.report_title
     output_file = env_vars.output_file
+    owning_team = env_vars.owning_team
     # rate_limit_bypass = env_vars.rate_limit_bypass
 
     ghe = env_vars.ghe
@@ -163,68 +164,79 @@ def main():  # pragma: no cover
         innersource_contributors = []
         team_members_that_own_the_repo = []
 
-        logger.info("Analyzing first commit...")
+        # Get all commits for contribution counting (needed regardless of team determination method)
+        logger.info("Fetching commits...")
         commits = repo_data.commits()
-        # Paginate to the last page to get the oldest commit
-        # commits is a GitHubIterator, so you can use .count to get total,
-        # then get the last one
         commit_list = list(commits)
-        first_commit = commit_list[-1]  # The last in the list is the oldest
-        original_commit_author = first_commit.author.login
 
-        # Check if original commit author exists in org chart
-        if original_commit_author not in org_data:
-            logger.warning(
-                "Original commit author '%s' not found in org chart. "
-                "Cannot determine team boundaries for InnerSource "
-                "measurement.",
+        # Check if owning team is explicitly specified
+        if owning_team:
+            logger.info("Using explicitly specified owning team: %s", owning_team)
+            team_members_that_own_the_repo = owning_team
+            # Set variables to None since they're not used when team is specified
+            original_commit_author = None
+            original_commit_author_manager = None
+        else:
+            logger.info("Analyzing first commit...")
+            # Paginate to the last page to get the oldest commit
+            # commits is a GitHubIterator, so you can use .count to get total,
+            # then get the last one
+            first_commit = commit_list[-1]  # The last in the list is the oldest
+            original_commit_author = first_commit.author.login
+
+            # Check if original commit author exists in org chart
+            if original_commit_author not in org_data:
+                logger.warning(
+                    "Original commit author '%s' not found in org chart. "
+                    "Cannot determine team boundaries for InnerSource "
+                    "measurement.",
+                    original_commit_author,
+                )
+                return
+
+            original_commit_author_manager = org_data[original_commit_author]["manager"]
+            logger.info(
+                "Original commit author: %s, with manager: %s",
                 original_commit_author,
+                original_commit_author_manager,
             )
-            return
+            # Create a dictionary mapping users to their managers for faster lookups
+            user_to_manager = {}
+            manager_to_reports = {}
+
+            for user, data in org_data.items():
+                manager = data["manager"]
+                user_to_manager[user] = manager
+
+                # Also create reverse mapping of manager to direct reports
+                if manager not in manager_to_reports:
+                    manager_to_reports[manager] = []
+                manager_to_reports[manager].append(user)
+
+            # Find all users that report up to the same manager as the original commit author
+            team_members_that_own_the_repo.append(original_commit_author)
+            team_members_that_own_the_repo.append(original_commit_author_manager)
+
+            # Add all users reporting to the same manager
+            if original_commit_author_manager in manager_to_reports:
+                team_members_that_own_the_repo.extend(
+                    manager_to_reports[original_commit_author_manager]
+                )
 
-        original_commit_author_manager = org_data[original_commit_author]["manager"]
-        logger.info(
-            "Original commit author: %s, with manager: %s",
-            original_commit_author,
-            original_commit_author_manager,
-        )
-        # Create a dictionary mapping users to their managers for faster lookups
-        user_to_manager = {}
-        manager_to_reports = {}
-
-        for user, data in org_data.items():
-            manager = data["manager"]
-            user_to_manager[user] = manager
-
-            # Also create reverse mapping of manager to direct reports
-            if manager not in manager_to_reports:
-                manager_to_reports[manager] = []
-            manager_to_reports[manager].append(user)
-
-        # Find all users that report up to the same manager as the original commit author
-        team_members_that_own_the_repo.append(original_commit_author)
-        team_members_that_own_the_repo.append(original_commit_author_manager)
-
-        # Add all users reporting to the same manager
-        if original_commit_author_manager in manager_to_reports:
-            team_members_that_own_the_repo.extend(
-                manager_to_reports[original_commit_author_manager]
+            # Add everyone that has one of the team members listed as their manager
+            for user, manager in user_to_manager.items():
+                if (
+                    manager in team_members_that_own_the_repo
+                    and user not in team_members_that_own_the_repo
+                ):
+                    team_members_that_own_the_repo.append(user)
+
+            # Remove duplicates from the team members list
+            team_members_that_own_the_repo = list(set(team_members_that_own_the_repo))
+            logger.debug(
+                "Team members that own the repo: %s", team_members_that_own_the_repo
             )
 
-        # Add everyone that has one of the team members listed as their manager
-        for user, manager in user_to_manager.items():
-            if (
-                manager in team_members_that_own_the_repo
-                and user not in team_members_that_own_the_repo
-            ):
-                team_members_that_own_the_repo.append(user)
-
-        # Remove duplicates from the team members list
-        team_members_that_own_the_repo = list(set(team_members_that_own_the_repo))
-        logger.debug(
-            "Team members that own the repo: %s", team_members_that_own_the_repo
-        )
-
         # For each contributor, check if they are in the team that owns the repo list
         # and if not, add them to the innersource contributors list
         logger.info("Analyzing all contributors in the repository...")
diff --git a/test_config_owning_team.py b/test_config_owning_team.py
new file mode 100644
index 0000000..2717ece
--- /dev/null
+++ b/test_config_owning_team.py
@@ -0,0 +1,128 @@
+"""Unit tests for OWNING_TEAM environment variable in config module."""
+
+import os
+import unittest
+from unittest.mock import patch
+
+from config import get_env_vars
+
+
+class TestOwningTeamEnvVar(unittest.TestCase):
+    """Test suite for the OWNING_TEAM environment variable."""
+
+    def setUp(self):
+        """Clean up environment before each test."""
+        env_keys = [
+            "GH_APP_ID",
+            "GH_APP_INSTALLATION_ID",
+            "GH_APP_PRIVATE_KEY",
+            "GH_TOKEN",
+            "GH_ENTERPRISE_URL",
+            "OUTPUT_FILE",
+            "REPORT_TITLE",
+            "RATE_LIMIT_BYPASS",
+            "REPOSITORY",
+            "OWNING_TEAM",
+        ]
+        for key in env_keys:
+            if key in os.environ:
+                del os.environ[key]
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": "alice,bob,charlie",
+        },
+    )
+    def test_owning_team_parsed_correctly(self):
+        """Test that OWNING_TEAM is parsed correctly as a list."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNotNone(env_vars.owning_team)
+        self.assertEqual(env_vars.owning_team, ["alice", "bob", "charlie"])
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": "alice, bob, charlie",
+        },
+    )
+    def test_owning_team_with_spaces(self):
+        """Test that OWNING_TEAM handles spaces correctly."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNotNone(env_vars.owning_team)
+        self.assertEqual(env_vars.owning_team, ["alice", "bob", "charlie"])
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": "single-user",
+        },
+    )
+    def test_owning_team_single_user(self):
+        """Test that OWNING_TEAM works with a single user."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNotNone(env_vars.owning_team)
+        self.assertEqual(env_vars.owning_team, ["single-user"])
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": "",
+        },
+    )
+    def test_owning_team_empty_string(self):
+        """Test that empty OWNING_TEAM is treated as None."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNone(env_vars.owning_team)
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+        },
+    )
+    def test_owning_team_not_set(self):
+        """Test that missing OWNING_TEAM is None."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNone(env_vars.owning_team)
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": " , , ",
+        },
+    )
+    def test_owning_team_only_spaces_and_commas(self):
+        """Test that OWNING_TEAM with only spaces and commas is treated as None."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNone(env_vars.owning_team)
+
+    @patch.dict(
+        os.environ,
+        {
+            "GH_TOKEN": "test_token",
+            "REPOSITORY": "owner/repo",
+            "OWNING_TEAM": "alice,,bob,,,charlie",
+        },
+    )
+    def test_owning_team_with_empty_entries(self):
+        """Test that OWNING_TEAM handles empty entries correctly."""
+        env_vars = get_env_vars(test=True)
+        self.assertIsNotNone(env_vars.owning_team)
+        # Empty entries should be filtered out
+        self.assertEqual(env_vars.owning_team, ["alice", "bob", "charlie"])
+
+
+if __name__ == "__main__":
+    unittest.main()

From a033b83980f40c8ab265f05625565f0dcb08e7dc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Sep 2025 00:39:31 +0000
Subject: [PATCH 4/9] Add comprehensive tests for OWNING_TEAM feature and
 update .env-example

Co-authored-by: zkoppert <6935431+zkoppert@users.noreply.github.com>
---
 .env-example                        |  1 +
 test_markdown_writer_owning_team.py | 87 +++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 test_markdown_writer_owning_team.py

diff --git a/.env-example b/.env-example
index c05a496..84e3169 100644
--- a/.env-example
+++ b/.env-example
@@ -6,3 +6,4 @@ GH_ENTERPRISE_URL = ""
 GH_TOKEN = ""
 OUTPUT_FILE = ""
 REPORT_TITLE = ""
+OWNING_TEAM = ""
diff --git a/test_markdown_writer_owning_team.py b/test_markdown_writer_owning_team.py
new file mode 100644
index 0000000..16c3bf0
--- /dev/null
+++ b/test_markdown_writer_owning_team.py
@@ -0,0 +1,87 @@
+"""Unit tests for markdown_writer with owning_team override."""
+
+from unittest.mock import MagicMock
+
+from markdown_writer import write_to_markdown
+
+
+def test_write_to_markdown_with_owning_team_override(tmp_path):
+    """Test markdown output when owning team is explicitly specified."""
+    output_file = tmp_path / "test_owning_team.md"
+
+    # Mock repository object
+    mock_repo = MagicMock()
+    mock_repo.full_name = "org/repo"
+
+    # Call with None for original_commit_author and manager (simulating owning_team override)
+    write_to_markdown(
+        report_title="Test Report with Owning Team",
+        output_file=str(output_file),
+        innersource_ratio=0.35,
+        repo_data=mock_repo,
+        original_commit_author=None,
+        original_commit_author_manager=None,
+        team_members_that_own_the_repo=["alice", "bob", "charlie"],
+        all_contributors=["alice", "bob", "charlie", "dave", "eve"],
+        innersource_contributors=["dave", "eve"],
+        innersource_contribution_counts={"dave": 15, "eve": 8},
+        team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5},
+    )
+
+    # Verify the file was created
+    assert output_file.exists()
+
+    # Read the content
+    content = output_file.read_text(encoding="utf-8")
+
+    # Verify the content includes the expected sections
+    assert "# Test Report with Owning Team" in content
+    assert "## Repository: org/repo" in content
+    assert "### InnerSource Ratio: 35.00%" in content
+    assert "### Team ownership is explicitly specified" in content
+    # Should NOT contain original commit author and manager
+    assert "Original Commit Author:" not in content
+    assert "Manager:" not in content
+    # Should contain team members
+    assert "## Team Members that Own the Repo:" in content
+    assert "- alice" in content
+    assert "- bob" in content
+    assert "- charlie" in content
+
+
+def test_write_to_markdown_with_original_author(tmp_path):
+    """Test markdown output when team is determined by algorithm."""
+    output_file = tmp_path / "test_algorithm.md"
+
+    # Mock repository object
+    mock_repo = MagicMock()
+    mock_repo.full_name = "org/repo"
+
+    # Call with original_commit_author and manager (normal algorithm mode)
+    write_to_markdown(
+        report_title="Test Report with Algorithm",
+        output_file=str(output_file),
+        innersource_ratio=0.35,
+        repo_data=mock_repo,
+        original_commit_author="alice",
+        original_commit_author_manager="manager1",
+        team_members_that_own_the_repo=["alice", "bob", "charlie"],
+        all_contributors=["alice", "bob", "charlie", "dave", "eve"],
+        innersource_contributors=["dave", "eve"],
+        innersource_contribution_counts={"dave": 15, "eve": 8},
+        team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5},
+    )
+
+    # Verify the file was created
+    assert output_file.exists()
+
+    # Read the content
+    content = output_file.read_text(encoding="utf-8")
+
+    # Verify the content includes the expected sections
+    assert "# Test Report with Algorithm" in content
+    assert "## Repository: org/repo" in content
+    assert "### InnerSource Ratio: 35.00%" in content
+    assert "### Original Commit Author: alice (Manager: manager1)" in content
+    # Should NOT contain the override message
+    assert "Team ownership is explicitly specified" not in content

From 2b7be5d10fce550dd797878c9bb736b5d45967ba Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Tue, 30 Sep 2025 11:26:20 -0700
Subject: [PATCH 5/9] fix: linting fix for readability

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 config.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/config.py b/config.py
index 0c818d3..e0b0a8e 100644
--- a/config.py
+++ b/config.py
@@ -254,7 +254,11 @@ def get_env_vars(test: bool = False) -> EnvVars:
     owning_team = None
     if owning_team_str:
         # Parse comma-separated list and strip whitespace from each username
-        owning_team = [username.strip() for username in owning_team_str.split(",") if username.strip()]
+        owning_team = [
+            username.strip()
+            for username in owning_team_str.split(",")
+            if username.strip()
+        ]
         # If the list is empty after stripping, set to None
         if not owning_team:
             owning_team = None

From 3a45a2dfaa6a2f4ffe738adf2e8022f164d7cfce Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Tue, 30 Sep 2025 14:42:30 -0700
Subject: [PATCH 6/9] ix: resolve infinite loop in
 test_main_missing_user_in_org_chart

This commit fixes an issue in the test where the mock commits setup was causing
an infinite loop. The changes include:

- Converting mock commits to a proper iterator to match real GitHub API behavior
- Fixing environment variable field names to align with actual main function usage
- Adding chunk_size to avoid issues in processing loops
- Adding additional assertions to verify early function exit

Tests that were previously hanging now complete successfully.

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 test_measure_innersource.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/test_measure_innersource.py b/test_measure_innersource.py
index adc22e1..92c9854 100644
--- a/test_measure_innersource.py
+++ b/test_measure_innersource.py
@@ -99,23 +99,26 @@ def test_main_missing_user_in_org_chart(tmp_path, monkeypatch):
     mock_repo.full_name = "test/repo"
 
     # Mock commits to return our mock commit as the first/oldest commit
-    mock_repo.commits.return_value = [mock_commit]
+    # Create a proper iterator that will convert to a list with one item
+    mock_repo.commits.return_value = iter([mock_commit])
 
     mock_github = MagicMock()
     mock_github.repository.return_value = mock_repo
 
     # Mock environment variables
     mock_env_vars = MagicMock()
-    mock_env_vars.github_token = "fake_token"
-    mock_env_vars.github_enterprise_hostname = None
-    mock_env_vars.github_org = "test"
-    mock_env_vars.github_repo = "repo"
+    mock_env_vars.gh_token = "fake_token"  # Change to match field name in main()
+    mock_env_vars.ghe = None  # Change to match field name in main()
+    mock_env_vars.owner = "test"  # Change to match field name in main()
+    mock_env_vars.repo = "repo"  # Change to match field name in main()
     mock_env_vars.gh_app_id = None
     mock_env_vars.gh_app_installation_id = None
     mock_env_vars.gh_app_private_key_bytes = None
     mock_env_vars.gh_app_enterprise_only = False
     mock_env_vars.report_title = "Test Report"
     mock_env_vars.output_file = "test_output.md"
+    mock_env_vars.owning_team = None  # Add owning_team field that's used in main()
+    mock_env_vars.chunk_size = 100  # Add chunk_size to avoid issues in processing loops
 
     # Apply mocks
     with patch("measure_innersource.get_env_vars", return_value=mock_env_vars), patch(
@@ -159,6 +162,13 @@ def test_main_missing_user_in_org_chart(tmp_path, monkeypatch):
                 for msg in info_calls
             )
 
+            # Verify that the function exited without attempting to process pull requests or issues
+            # by checking that "Processing pull requests" message is not in the logs
+            assert not any(
+                isinstance(msg, str) and "Processing pull requests" in msg
+                for msg in info_calls
+            )
+
 
 def test_contributors_missing_from_org_chart_excluded(tmp_path, monkeypatch):
     """Test that contributors missing from org chart are excluded from
@@ -189,7 +199,7 @@ def test_contributors_missing_from_org_chart_excluded(tmp_path, monkeypatch):
 
     mock_repo = MagicMock()
     mock_repo.full_name = "test/repo"
-    mock_repo.commits.return_value = [mock_commit]
+    mock_repo.commits.return_value = iter([mock_commit])
     mock_repo.contributors.return_value = [mock_contributor1]
     # Mock empty pull requests and issues to avoid infinite loops
     mock_repo.pull_requests.return_value = iter([])
@@ -200,16 +210,17 @@ def test_contributors_missing_from_org_chart_excluded(tmp_path, monkeypatch):
 
     # Mock environment variables
     mock_env_vars = MagicMock()
-    mock_env_vars.github_token = "fake_token"
-    mock_env_vars.github_enterprise_hostname = None
-    mock_env_vars.github_org = "test"
-    mock_env_vars.github_repo = "repo"
+    mock_env_vars.gh_token = "fake_token"  # Change to match field name in main()
+    mock_env_vars.ghe = None  # Change to match field name in main()
+    mock_env_vars.owner = "test"  # Change to match field name in main()
+    mock_env_vars.repo = "repo"  # Change to match field name in main()
     mock_env_vars.gh_app_id = None
     mock_env_vars.gh_app_installation_id = None
     mock_env_vars.gh_app_private_key_bytes = None
     mock_env_vars.gh_app_enterprise_only = False
     mock_env_vars.report_title = "Test Report"
     mock_env_vars.output_file = "test_output.md"
+    mock_env_vars.owning_team = None  # Add owning_team field that's used in main()
     mock_env_vars.chunk_size = 100
 
     # Apply mocks

From ec1c43488780e9136973bc48eb78bdc7c1ea5920 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Tue, 30 Sep 2025 14:47:58 -0700
Subject: [PATCH 7/9] test: add coverage for original commit author without
 manager case

Added test_markdown_writer_original_author_only.py to cover the specific case
where an original commit author is provided without a manager, ensuring that
the appropriate heading is written to the report markdown file.

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 test_markdown_writer_original_author_only.py | 59 ++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 test_markdown_writer_original_author_only.py

diff --git a/test_markdown_writer_original_author_only.py b/test_markdown_writer_original_author_only.py
new file mode 100644
index 0000000..8782993
--- /dev/null
+++ b/test_markdown_writer_original_author_only.py
@@ -0,0 +1,59 @@
+"""
+Tests for markdown_writer.py specifically for when only original_commit_author is provided
+"""
+
+import os
+import tempfile
+
+from markdown_writer import write_to_markdown
+
+
+def test_write_to_markdown_original_author_only():
+    """
+    Test writing markdown when only original_commit_author is provided without manager
+    """
+    with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as temp_file:
+        temp_file_path = temp_file.name
+
+    try:
+        # Mock repo data object
+        class MockRepo:
+            """
+            Mock repository data class for testing
+            """
+
+            @property
+            def full_name(self):
+                """
+                Returns the full name of the mock repository
+                """
+                return "owner/repo"
+
+        write_to_markdown(
+            report_title="Test Report",
+            output_file=temp_file_path,
+            innersource_ratio=0.5,
+            repo_data=MockRepo(),
+            original_commit_author="author",  # Only provide the author, not the manager
+            original_commit_author_manager="",  # Empty manager
+            team_members_that_own_the_repo=["team_member1"],
+            all_contributors=["contributor1"],
+            innersource_contributors=["contributor1"],
+            innersource_contribution_counts={"contributor1": 5},
+            team_member_contribution_counts={"team_member1": 10},
+        )
+
+        # Read the generated file
+        with open(temp_file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Check that the original author is included without manager
+        assert "### Original Commit Author: author\n" in content
+
+        # Make sure the manager version is not included
+        assert "Manager:" not in content
+
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)

From 2d429f4d619fbd57a968314b7c82506bc0e67609 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Tue, 30 Sep 2025 14:54:39 -0700
Subject: [PATCH 8/9] refactor: add explicit team_ownership flag for better
 edge case handling

This change improves the clarity and maintainability of the codebase by:

- Adding an explicit `team_ownership_explicitly_specified` parameter to track when
  team ownership is provided explicitly rather than derived from commit history
- Updating logic in markdown_writer.py to prioritize this flag when determining
  report headings
- Adding a fallback message for cases where neither original author nor explicit
  team ownership is specified
- Setting flag value in measure_innersource.py based on owning_team parameter
- Adding comprehensive tests for the new parameter and updated behavior

This change makes the relationship between owning_team and report content more
explicit and handles edge cases more gracefully.

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 markdown_writer.py                           |  30 ++++-
 measure_innersource.py                       |   3 +
 test_markdown_writer.py                      |   4 +-
 test_markdown_writer_edge_cases.py           |   2 +
 test_markdown_writer_more_edge_cases.py      |   1 +
 test_markdown_writer_original_author_only.py |   1 +
 test_markdown_writer_owning_team.py          |   4 +-
 test_markdown_writer_team_ownership_flag.py  | 112 +++++++++++++++++++
 test_markdown_writer_zero_contributions.py   |   1 +
 9 files changed, 153 insertions(+), 5 deletions(-)
 create mode 100644 test_markdown_writer_team_ownership_flag.py

diff --git a/markdown_writer.py b/markdown_writer.py
index d48a0ef..6143526 100644
--- a/markdown_writer.py
+++ b/markdown_writer.py
@@ -40,6 +40,7 @@ def write_to_markdown(
     innersource_contributors=None,
     innersource_contribution_counts=None,
     team_member_contribution_counts=None,
+    team_ownership_explicitly_specified=False,
 ) -> None:
     """
     Generate a comprehensive InnerSource collaboration report in markdown format.
@@ -84,6 +85,10 @@ def write_to_markdown(
                                                                            mapping team member
                                                                            usernames to their
                                                                            contribution counts.
+        team_ownership_explicitly_specified (bool, optional): Flag indicating whether team
+                                                            ownership is explicitly specified
+                                                            rather than derived from commit history.
+                                                            Defaults to False.
 
     Returns:
         None: This function creates a markdown file as a side effect.
@@ -126,7 +131,22 @@ def write_to_markdown(
         ...     all_contributors=["alice", "bob", "charlie", "dave", "eve"],
         ...     innersource_contributors=["dave", "eve"],
         ...     innersource_contribution_counts={"dave": 15, "eve": 8},
-        ...     team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5}
+        ...     team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5},
+        ...     team_ownership_explicitly_specified=False
+        ... )
+
+        >>> # Generate a report with explicitly specified team ownership
+        >>> write_to_markdown(
+        ...     report_title="Team Ownership Report",
+        ...     output_file="team_report.md",
+        ...     innersource_ratio=0.38,
+        ...     repo_data=repo_object,
+        ...     team_members_that_own_the_repo=["david", "emily", "frank"],
+        ...     all_contributors=["david", "emily", "frank", "greg", "hannah"],
+        ...     innersource_contributors=["greg", "hannah"],
+        ...     innersource_contribution_counts={"greg": 12, "hannah": 6},
+        ...     team_member_contribution_counts={"david": 18, "emily": 9, "frank": 7},
+        ...     team_ownership_explicitly_specified=True
         ... )
     """
     output_file_name = output_file if output_file else "innersource_report.md"
@@ -138,7 +158,9 @@ def write_to_markdown(
             return
         report_file.write(f"## Repository: {repo_data.full_name}\n\n")
         report_file.write(f"### InnerSource Ratio: {innersource_ratio:.2%}\n\n")
-        if original_commit_author and original_commit_author_manager:
+        if team_ownership_explicitly_specified:
+            report_file.write("### Team ownership is explicitly specified\n\n")
+        elif original_commit_author and original_commit_author_manager:
             report_file.write(
                 f"### Original Commit Author: {original_commit_author} (Manager: {original_commit_author_manager})\n\n"
             )
@@ -147,7 +169,9 @@ def write_to_markdown(
                 f"### Original Commit Author: {original_commit_author}\n\n"
             )
         else:
-            report_file.write("### Team ownership is explicitly specified\n\n")
+            report_file.write(
+                "### Original commit author information not available\n\n"
+            )
         report_file.write("## Team Members that Own the Repo:\n")
         if team_members_that_own_the_repo:
             for member in team_members_that_own_the_repo:
diff --git a/measure_innersource.py b/measure_innersource.py
index 8cde7be..0ff2951 100644
--- a/measure_innersource.py
+++ b/measure_innersource.py
@@ -414,6 +414,9 @@ def main():  # pragma: no cover
             innersource_contributors=innersource_contributors,
             innersource_contribution_counts=innersource_contribution_counts,
             team_member_contribution_counts=team_member_contribution_counts,
+            team_ownership_explicitly_specified=bool(
+                owning_team
+            ),  # True if owning_team is specified
         )
 
         evaluate_markdown_file_size(output_file)
diff --git a/test_markdown_writer.py b/test_markdown_writer.py
index 8bc6acc..8ff20a4 100644
--- a/test_markdown_writer.py
+++ b/test_markdown_writer.py
@@ -29,6 +29,7 @@ def test_write_to_markdown_no_repo_data(self):
         write_to_markdown(
             report_title="InnerSource Report",
             output_file="innersource_report.md",
+            team_ownership_explicitly_specified=False,
         )
 
         # Check that the function writes the correct markdown file
@@ -63,6 +64,7 @@ def test_write_to_markdown_with_data(self):
             innersource_contributors=innersource_contributors,
             innersource_contribution_counts=innersource_counts,
             team_member_contribution_counts=team_member_counts,
+            team_ownership_explicitly_specified=False,
         )
 
         # Check that the function writes the correct markdown file
@@ -96,7 +98,7 @@ def test_write_to_markdown_default_filename(self):
         """Test that write_to_markdown uses the default filename when none is provided."""
         # Call the function with no output_file
         write_to_markdown(
-            report_title="InnerSource Report",
+            report_title="InnerSource Report", team_ownership_explicitly_specified=False
         )
 
         # Check that the function uses the default filename
diff --git a/test_markdown_writer_edge_cases.py b/test_markdown_writer_edge_cases.py
index 76905ba..e12476a 100644
--- a/test_markdown_writer_edge_cases.py
+++ b/test_markdown_writer_edge_cases.py
@@ -46,6 +46,7 @@ def full_name(self):
                 innersource_contributors=["contributor1"],
                 innersource_contribution_counts={"contributor1": 5},
                 team_member_contribution_counts={"team_member1": 10},
+                team_ownership_explicitly_specified=False,
             )
 
             # Read the generated file
@@ -93,6 +94,7 @@ def full_name(self):
                 innersource_contributors=None,  # Empty innersource contributors
                 innersource_contribution_counts={"contributor1": 5},
                 team_member_contribution_counts={"team_member1": 10},
+                team_ownership_explicitly_specified=False,
             )
 
             # Read the generated file
diff --git a/test_markdown_writer_more_edge_cases.py b/test_markdown_writer_more_edge_cases.py
index e2b68b5..c2d930b 100644
--- a/test_markdown_writer_more_edge_cases.py
+++ b/test_markdown_writer_more_edge_cases.py
@@ -46,6 +46,7 @@ def full_name(self):
                 innersource_contributors=["contributor1"],
                 innersource_contribution_counts=None,  # Empty contribution counts
                 team_member_contribution_counts={},  # Empty dict for team member counts
+                team_ownership_explicitly_specified=False,
             )
 
             # Read the generated file
diff --git a/test_markdown_writer_original_author_only.py b/test_markdown_writer_original_author_only.py
index 8782993..e49602e 100644
--- a/test_markdown_writer_original_author_only.py
+++ b/test_markdown_writer_original_author_only.py
@@ -41,6 +41,7 @@ def full_name(self):
             innersource_contributors=["contributor1"],
             innersource_contribution_counts={"contributor1": 5},
             team_member_contribution_counts={"team_member1": 10},
+            team_ownership_explicitly_specified=False,
         )
 
         # Read the generated file
diff --git a/test_markdown_writer_owning_team.py b/test_markdown_writer_owning_team.py
index 16c3bf0..5dfd6b1 100644
--- a/test_markdown_writer_owning_team.py
+++ b/test_markdown_writer_owning_team.py
@@ -13,7 +13,7 @@ def test_write_to_markdown_with_owning_team_override(tmp_path):
     mock_repo = MagicMock()
     mock_repo.full_name = "org/repo"
 
-    # Call with None for original_commit_author and manager (simulating owning_team override)
+    # Call with team_ownership_explicitly_specified=True
     write_to_markdown(
         report_title="Test Report with Owning Team",
         output_file=str(output_file),
@@ -26,6 +26,7 @@ def test_write_to_markdown_with_owning_team_override(tmp_path):
         innersource_contributors=["dave", "eve"],
         innersource_contribution_counts={"dave": 15, "eve": 8},
         team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5},
+        team_ownership_explicitly_specified=True,
     )
 
     # Verify the file was created
@@ -70,6 +71,7 @@ def test_write_to_markdown_with_original_author(tmp_path):
         innersource_contributors=["dave", "eve"],
         innersource_contribution_counts={"dave": 15, "eve": 8},
         team_member_contribution_counts={"alice": 25, "bob": 12, "charlie": 5},
+        team_ownership_explicitly_specified=False,
     )
 
     # Verify the file was created
diff --git a/test_markdown_writer_team_ownership_flag.py b/test_markdown_writer_team_ownership_flag.py
new file mode 100644
index 0000000..ac12a25
--- /dev/null
+++ b/test_markdown_writer_team_ownership_flag.py
@@ -0,0 +1,112 @@
+"""
+Tests for markdown_writer.py specifically for the team_ownership_explicitly_specified parameter
+"""
+
+import os
+import tempfile
+
+from markdown_writer import write_to_markdown
+
+
+def test_write_to_markdown_missing_original_author_with_flag():
+    """
+    Test writing markdown with team_ownership_explicitly_specified=True
+    """
+    with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as temp_file:
+        temp_file_path = temp_file.name
+
+    try:
+        # Mock repo data object
+        class MockRepo:
+            """
+            Mock repository data class for testing
+            """
+
+            @property
+            def full_name(self):
+                """
+                Returns the full name of the mock repository
+                """
+                return "owner/repo"
+
+        # Test with missing original author but with explicit team ownership flag
+        write_to_markdown(
+            report_title="Test Report",
+            output_file=temp_file_path,
+            innersource_ratio=0.5,
+            repo_data=MockRepo(),
+            original_commit_author=None,  # No original author
+            original_commit_author_manager=None,  # No manager
+            team_members_that_own_the_repo=["team_member1"],
+            all_contributors=["contributor1"],
+            innersource_contributors=["contributor1"],
+            innersource_contribution_counts={"contributor1": 5},
+            team_member_contribution_counts={"team_member1": 10},
+            team_ownership_explicitly_specified=True,  # Explicit flag
+        )
+
+        # Read the generated file
+        with open(temp_file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Check that explicit team ownership message is included
+        assert "### Team ownership is explicitly specified" in content
+        # Make sure the "Original commit author information not available" is not included
+        assert "Original commit author information not available" not in content
+
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+
+
+def test_write_to_markdown_missing_original_author_without_flag():
+    """
+    Test writing markdown with missing original_commit_author but without team_ownership_explicitly_specified
+    """
+    with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as temp_file:
+        temp_file_path = temp_file.name
+
+    try:
+        # Mock repo data object
+        class MockRepo:
+            """
+            Mock repository data class for testing
+            """
+
+            @property
+            def full_name(self):
+                """
+                Returns the full name of the mock repository
+                """
+                return "owner/repo"
+
+        # Test with missing original author and without explicit team ownership flag
+        write_to_markdown(
+            report_title="Test Report",
+            output_file=temp_file_path,
+            innersource_ratio=0.5,
+            repo_data=MockRepo(),
+            original_commit_author=None,  # No original author
+            original_commit_author_manager=None,  # No manager
+            team_members_that_own_the_repo=["team_member1"],
+            all_contributors=["contributor1"],
+            innersource_contributors=["contributor1"],
+            innersource_contribution_counts={"contributor1": 5},
+            team_member_contribution_counts={"team_member1": 10},
+            team_ownership_explicitly_specified=False,  # Flag is False
+        )
+
+        # Read the generated file
+        with open(temp_file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Check that "Original commit author information not available" is included
+        assert "### Original commit author information not available" in content
+        # Make sure the team ownership explicitly specified message is not included
+        assert "Team ownership is explicitly specified" not in content
+
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
diff --git a/test_markdown_writer_zero_contributions.py b/test_markdown_writer_zero_contributions.py
index d348eb4..6bdf503 100644
--- a/test_markdown_writer_zero_contributions.py
+++ b/test_markdown_writer_zero_contributions.py
@@ -49,6 +49,7 @@ def full_name(self):
                     "team_member1": 0,
                     "team_member2": 0,
                 },  # All zero counts
+                team_ownership_explicitly_specified=False,
             )
 
             # Read the generated file

From 0bd5cc4340b98c6a4f9d162e4e0cedd5e942f6b4 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Tue, 30 Sep 2025 15:28:31 -0700
Subject: [PATCH 9/9] perf: refactor commit processing to use chunking for
 better memory efficiency

This change improves memory usage and scalability for repositories with large
commit histories by:

- Replacing list(commits) with chunked processing to avoid loading the entire
  commit history into memory at once
- Using iterators directly to find the original commit author instead of
  loading all commits first
- Processing commits in manageable chunks based on the configured chunk_size,
  similar to how pull requests and issues are already handled
- Adding debug logging for commit processing progress

This approach ensures consistent memory usage regardless of repository size
and prevents potential out-of-memory issues when analyzing repositories with
extensive commit histories.

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 measure_innersource.py      | 68 +++++++++++++++++++++++++++----------
 test_measure_innersource.py | 11 +++---
 2 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/measure_innersource.py b/measure_innersource.py
index 0ff2951..15ea113 100644
--- a/measure_innersource.py
+++ b/measure_innersource.py
@@ -164,11 +164,6 @@ def main():  # pragma: no cover
         innersource_contributors = []
         team_members_that_own_the_repo = []
 
-        # Get all commits for contribution counting (needed regardless of team determination method)
-        logger.info("Fetching commits...")
-        commits = repo_data.commits()
-        commit_list = list(commits)
-
         # Check if owning team is explicitly specified
         if owning_team:
             logger.info("Using explicitly specified owning team: %s", owning_team)
@@ -177,12 +172,26 @@ def main():  # pragma: no cover
             original_commit_author = None
             original_commit_author_manager = None
         else:
-            logger.info("Analyzing first commit...")
-            # Paginate to the last page to get the oldest commit
-            # commits is a GitHubIterator, so you can use .count to get total,
-            # then get the last one
-            first_commit = commit_list[-1]  # The last in the list is the oldest
-            original_commit_author = first_commit.author.login
+            logger.info("Finding original commit author...")
+            # We need to find the oldest commit for team determination
+            # Use GitHub's default chronological ordering (oldest first)
+            commits_iterator = repo_data.commits()
+            original_commit = None
+
+            # Process just enough commits to find the oldest one
+            # Most repos will only need a single API call since GitHub sorts oldest first
+            # For repositories with unusual commit ordering, we'll get the first commit from the first page
+            try:
+                original_commit = next(commits_iterator)
+                original_commit_author = (
+                    original_commit.author.login
+                    if hasattr(original_commit.author, "login")
+                    else None
+                )
+                logger.info("Found original commit by %s", original_commit_author)
+            except StopIteration:
+                logger.warning("No commits found in repository")
+                original_commit_author = None
 
             # Check if original commit author exists in org chart
             if original_commit_author not in org_data:
@@ -267,13 +276,38 @@ def main():  # pragma: no cover
 
         logger.info("Pre-processing contribution data...")
 
-        # Create mapping of commit authors to commit counts
-        logger.info("Processing commits...")
+        # Process commits in chunks
+        logger.info("Processing commits in chunks...")
         commit_author_counts = {}
-        for commit in commit_list:
-            if hasattr(commit.author, "login"):
-                author = commit.author.login
-                commit_author_counts[author] = commit_author_counts.get(author, 0) + 1
+        total_commits = 0
+
+        # GitHub API returns an iterator that internally handles pagination
+        # We'll manually chunk it to avoid loading everything at once
+        commits_iterator = repo_data.commits()
+        while True:
+            # Process a chunk of commits
+            chunk = []
+            for _ in range(chunk_size):
+                try:
+                    chunk.append(next(commits_iterator))
+                except StopIteration:
+                    break
+
+            if not chunk:
+                break
+
+            # Update counts for this chunk
+            for commit in chunk:
+                if hasattr(commit.author, "login"):
+                    author = commit.author.login
+                    commit_author_counts[author] = (
+                        commit_author_counts.get(author, 0) + 1
+                    )
+
+            total_commits += len(chunk)
+            logger.debug("  Processed %s commits so far...", total_commits)
+
+        logger.info("Found and processed %s commits", total_commits)
 
         # Process pull requests in chunks
         logger.info("Processing pull requests in chunks...")
diff --git a/test_measure_innersource.py b/test_measure_innersource.py
index 92c9854..5f4f60c 100644
--- a/test_measure_innersource.py
+++ b/test_measure_innersource.py
@@ -147,13 +147,12 @@ def test_main_missing_user_in_org_chart(tmp_path, monkeypatch):
                 call[0][0] for call in mock_logger.info.call_args_list if call[0]
             ]
 
-            # Should have logged about reading org data and analyzing first
-            # commit, but should NOT have logged about original commit author
-            # with manager
+            # Should have logged about reading org data and finding original commit author
+            # but should NOT have logged about original commit author with manager
             assert "Reading in org data from org-data.json..." in info_calls
-            assert "Analyzing first commit..." in info_calls
-
-            # Should NOT contain the log message about
+            assert (
+                "Finding original commit author..." in info_calls
+            )  # Should NOT contain the log message about
             # "Original commit author: X, with manager: Y"
             assert not any(
                 isinstance(msg, str)