diff --git a/docusaurus.config.js b/docusaurus.config.js index 8b5cb286dab..94b050151be 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -56,18 +56,6 @@ const versions = require('./versions.json'); }, "v0.15":{ banner: 'none', - }, - "v0.14":{ - banner: 'none', - }, - "v0.13":{ - banner: 'none', - }, - "v0.12":{ - banner: 'none', - }, - "v0.11":{ - banner: 'none', } } }, diff --git a/versioned_docs/version-v0.11/DataModels/DataSupport.md b/versioned_docs/version-v0.11/DataModels/DataSupport.md deleted file mode 100644 index 6cb180afeaf..00000000000 --- a/versioned_docs/version-v0.11/DataModels/DataSupport.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: "Data Support" -description: > - Data sources that DevLake supports -sidebar_position: 1 ---- - - -## Data Sources and Data Plugins -DevLake supports the following data sources. The data from each data source is collected with one or more plugins. There are 9 data plugins in total: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira`, `refdiff` and `tapd`. - - -| Data Source | Versions | Plugins | -|-------------|--------------------------------------|-------- | -| AE | | `ae` | -| Feishu | Cloud |`feishu` | -| GitHub | Cloud |`github`, `gitextractor`, `refdiff` | -| GitLab | Cloud, Community Edition 13.x+ |`gitlab`, `gitextractor`, `refdiff` | -| Jenkins | 2.263.x+ |`jenkins` | -| Jira | Cloud, Server 8.x+, Data Center 8.x+ |`jira` | -| TAPD | Cloud | `tapd` | - - - -## Data Collection Scope By Each Plugin -This table shows the entities collected by each plugin. Domain layer entities in this table are consistent with the entities [here](./DevLakeDomainLayerSchema.md). - -| Domain Layer Entities | ae | gitextractor | github | gitlab | jenkins | jira | refdiff | tapd | -| --------------------- | -------------- | ------------ | -------------- | ------- | ------- | ------- | ------- | ------- | -| commits | update commits | default | not-by-default | default | | | | | -| commit_parents | | default | | | | | | | -| commit_files | | default | | | | | | | -| pull_requests | | | default | default | | | | | -| pull_request_commits | | | default | default | | | | | -| pull_request_comments | | | default | default | | | | | -| pull_request_labels | | | default | | | | | | -| refs | | default | | | | | | | -| refs_commits_diffs | | | | | | | default | | -| refs_issues_diffs | | | | | | | default | | -| ref_pr_cherry_picks | | | | | | | default | | -| repos | | | default | default | | | | | -| repo_commits | | default | default | | | | | | -| board_repos | | | | | | | | | -| issue_commits | | | | | | | | | -| issue_repo_commits | | | | | | | | | -| pull_request_issues | | | | | | | | | -| refs_issues_diffs | | | | | | | | | -| boards | | | default | | | default | | default | -| board_issues | | | default | | | default | | default | -| issue_changelogs | | | | | | default | | default | -| issues | | | default | | | default | | default | -| issue_comments | | | | | | default | | default | -| issue_labels | | | default | | | | | | -| sprints | | | | | | default | | default | -| issue_worklogs | | | | | | default | | default | -| users o | | | default | | | default | | default | -| builds | | | | | default | | | | -| jobs | | | | | default | | | | - diff --git a/versioned_docs/version-v0.11/DataModels/DevLakeDomainLayerSchema.md b/versioned_docs/version-v0.11/DataModels/DevLakeDomainLayerSchema.md deleted file mode 100644 index 30fc5d6a4e7..00000000000 --- a/versioned_docs/version-v0.11/DataModels/DevLakeDomainLayerSchema.md +++ /dev/null @@ -1,532 +0,0 @@ ---- -title: "Domain Layer Schema" -description: > - DevLake Domain Layer Schema -sidebar_position: 2 ---- - -## Summary - -This document describes the entities in DevLake's domain layer schema and their relationships. - -Data in the domain layer is transformed from the data in the tool layer. The tool layer schema is based on the data from specific tools such as Jira, GitHub, Gitlab, Jenkins, etc. The domain layer schema can be regarded as an abstraction of tool-layer schemas. - -Domain layer schema itself includes 2 logical layers: a `DWD` layer and a `DWM` layer. The DWD layer stores the detailed data points, while the DWM is the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -## Use Cases -1. Users can make customized Grafana dashboards based on the domain layer schema. -2. Contributors can complete the ETL logic when adding new data source plugins refering to this data model. - - -## Data Model - -This is the up-to-date domain layer schema for DevLake v0.10.x. Tables (entities) are categorized into 5 domains. -1. Issue tracking domain entities: Jira issues, GitHub issues, GitLab issues, etc -2. Source code management domain entities: Git/GitHub/Gitlab commits and refs, etc -3. Code review domain entities: GitHub PRs, Gitlab MRs, etc -4. CI/CD domain entities: Jenkins jobs & builds, etc -5. Cross-domain entities: entities that map entities from different domains to break data isolation - - -### Schema Diagram -![Domain Layer Schema](/img/DomainLayerSchema/schema-diagram-v0.14.png) - -When reading the schema, you'll notice that many tables' primary key is called `id`. Unlike auto-increment id or UUID, `id` is a string composed of several parts to uniquely identify similar entities (e.g. repo) from different platforms (e.g. Github/Gitlab) and allow them to co-exist in a single table. - -Tables that end with WIP are still under development. - - -### Naming Conventions - -1. The name of a table is in plural form. Eg. boards, issues, etc. -2. The name of a table which describe the relation between 2 entities is in the form of [BigEntity in singular form]\_[SmallEntity in plural form]. Eg. board_issues, sprint_issues, pull_request_comments, etc. -3. Value of the field in enum type are in capital letters. Eg. [table.issues.type](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k) has 3 values, REQUIREMENT, BUG, INCIDENT. Values that are phrases, such as 'IN_PROGRESS' of [table.issues.status](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k), are separated with underscore '\_'. - -
- -## DWD Entities - (Data Warehouse Detail) - -### Domain 1 - Issue Tracking - -#### 1. Issues - -An `issue` is the abstraction of Jira/Github/GitLab/TAPD/... issues. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | An issue's `id` is composed of < plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `number` | varchar | 255 | The number of this issue. For example, the number of this Github [issue](https://github.com/apache/incubator-devlake/issues/1145) is 1145. | | -| `url` | varchar | 255 | The url of the issue. It's a web address in most cases. | | -| `title` | varchar | 255 | The title of an issue | | -| `description` | longtext | | The detailed description/summary of an issue | | -| `type` | varchar | 255 | The standard type of this issue. There're 3 standard types: The 3 standard types are transformed from the original types of an issue. The transformation rule is set in the '.env' file or 'config-ui' before data collection. For issues with an original type that has not mapped to a standard type, the value of `type` will be the issue's original type. | | -| `status` | varchar | 255 | The standard statuses of this issue. There're 3 standard statuses: The 3 standard statuses are transformed from the original statuses of an issue. The transformation rule: | | -| `original_status` | varchar | 255 | The original status of an issue. | | -| `story_point` | int | | The story point of this issue. It's default to an empty string for data sources such as Github issues and Gitlab issues. | | -| `priority` | varchar | 255 | The priority of the issue | | -| `component` | varchar | 255 | The component a bug-issue affects. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `severity` | varchar | 255 | The severity level of a bug-issue. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `parent_issue_id` | varchar | 255 | The id of its parent issue | | -| `epic_key` | varchar | 255 | The key of the epic this issue belongs to. For tools with no epic-type issues such as Github and Gitlab, this field is default to an empty string | | -| `original_estimate_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_spent_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_remaining_minutes` | int | | The remaining time to resolve the issue | | -| `creator_id` | varchar | 255 | The id of issue creator | | -| `assignee_id` | varchar | 255 | The id of issue assignee. | | -| `assignee_name` | varchar | 255 | The name of the assignee | | -| `created_date` | datetime | 3 | The time issue created | | -| `updated_date` | datetime | 3 | The last time issue gets updated | | -| `resolution_date` | datetime | 3 | The time the issue changes to 'DONE'. | | -| `lead_time_minutes` | int | | Describes the cycle time from issue creation to issue resolution. | | - -#### 2. issue_labels - -This table shows the labels of issues. Multiple entries can exist per issue. This table can be used to filter issues by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `name` | varchar | 255 | Label name | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### 3. issue_comments(WIP) - -This table shows the comments of issues. Issues with multiple comments are shown as multiple records. This table can be used to calculate _metric - issue response time_. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------- | -| `id` | varchar | 255 | The unique id of a comment | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `user_id` | varchar | 255 | The id of the user who made the comment | FK_users.id | -| `body` | longtext | | The body/detail of the comment | | -| `created_date` | datetime | 3 | The creation date of the comment | | -| `updated_date` | datetime | 3 | The last time comment gets updated | | -| `position` | int | | The position of a comment under an issue. It starts from 1. The position is sorted by comment created_date asc.
Eg. If an issue has 5 comments, the position of the 1st created comment is 1. | | - -#### 4. issue_changelog(WIP) - -This table shows the changelogs of issues. Issues with multiple changelogs are shown as multiple records. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :-------------------------------------------------------------------- | :----------- | -| `id` | varchar | 255 | The unique id of an issue changelog | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `actor_id` | varchar | 255 | The id of the user who made the change | FK_users.id | -| `field` | varchar | 255 | The id of changed field | | -| `from` | varchar | 255 | The original value of the changed field | | -| `to` | varchar | 255 | The new value of the changed field | | -| `created_date` | datetime | 3 | The creation date of the changelog | | - - -#### 5. issue_worklogs - -This table shows the work logged under issues. Usually, an issue has multiple worklogs logged by different developers. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------- | :----------- | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `author_id` | varchar | 255 | The id of the user who logged the work | FK_users.id | -| `comment` | varchar | 255 | The comment an user made while logging the work. | | -| `time_spent_minutes` | int | | The time user logged. The unit of value is normalized to minute. Eg. 1d =) 480, 4h30m =) 270 | | -| `logged_date` | datetime | 3 | The time of this logging action | | -| `started_date` | datetime | 3 | Start time of the worklog | | - - -#### 6. boards - -A `board` is an issue list or a collection of issues. It's the abstraction of a Jira board, a Jira project or a [Github issue list](https://github.com/merico-dev/lake/issues). This table can be used to filter issues by the boards they belong to. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A board's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `name` | varchar | 255 | The name of the board. Note: the board name of a Github project 'merico-dev/lake' is 'merico-dev/lake', representing the [default issue list](https://github.com/merico-dev/lake/issues). | | -| `description` | varchar | 255 | The description of the board. | | -| `url` | varchar | 255 | The url of the board. Eg. https://Github.com/merico-dev/lake | | -| `created_date` | datetime | 3 | Board creation time | | - -#### 7. board_issues - -This table shows the relation between boards and issues. This table can be used to filter issues by board. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | - -#### 8. sprints - -A `sprint` is the abstraction of Jira sprints, TAPD iterations and Github milestones. A sprint contains a list of issues. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------ | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :----------- | -| `id` | varchar | 255 | A sprint's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `name` | varchar | 255 | The name of sprint.
For Github projects, the sprint name is the milestone name. For instance, 'v0.10.0 - Introduce Temporal to DevLake' is the name of this [sprint](https://github.com/apache/incubator-devlake/milestone/5). | | -| `url` | varchar | 255 | The url of sprint. | | -| `status` | varchar | 255 | There're 3 statuses of a sprint: | | -| `started_date` | datetime | 3 | The start time of a sprint | | -| `ended_date` | datetime | 3 | The planned/estimated end time of a sprint. It's usually set when planning a sprint. | | -| `completed_date` | datetime | 3 | The actual time to complete a sprint. | | -| `original_board_id` | datetime | 3 | The id of board where the sprint first created. This field is not null only when this entity is transformed from Jira sprintas.
In Jira, sprint and board entities have 2 types of relation: | FK_boards.id | - -#### 9. sprint_issues - -This table shows the relation between sprints and issues that have been added to sprints. This table can be used to show metrics such as _'ratio of unplanned issues'_, _'completion rate of sprint issues'_, etc - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `is_removed` | bool | | If the issue is removed from this sprint, then TRUE; else FALSE | | -| `added_date` | datetime | 3 | The time this issue added to the sprint. If an issue is added to a sprint multiple times, the latest time will be the value. | | -| `removed_date` | datetime | 3 | The time this issue gets removed from the sprint. If an issue is removed multiple times, the latest time will be the value. | | -| `added_stage` | varchar | 255 | The stage when issue is added to this sprint. There're 3 possible values: | | -| `resolved_stage` | varchar | 255 | The stage when an issue is resolved (issue status turns to 'DONE'). There're 3 possible values: | | - -#### 10. board_sprints - -| **field** | **type** | **length** | **description** | **key** | -| :---------- | :------- | :--------- | :-------------- | :------------ | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | - -
- -### Domain 2 - Source Code Management - -#### 11. repos - -Information about Github or Gitlab repositories. A repository is always owned by a user. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `name` | varchar | 255 | The name of repo. | | -| `description` | varchar | 255 | The description of repo. | | -| `url` | varchar | 255 | The url of repo. Eg. https://Github.com/merico-dev/lake | | -| `owner_id` | varchar | 255 | The id of the owner of repo | FK_users.id | -| `language` | varchar | 255 | The major language of repo. Eg. The language for merico-dev/lake is 'Go' | | -| `forked_from` | varchar | 255 | Empty unless the repo is a fork in which case it contains the `id` of the repo the repo is forked from. | | -| `deleted` | tinyint | 255 | 0: repo is active 1: repo has been deleted | | -| `created_date` | datetime | 3 | Repo creation date | | -| `updated_date` | datetime | 3 | Last full update was done for this repo | | - -#### 12. repo_languages(WIP) - -Languages that are used in the repository along with byte counts for all files in those languages. This is in line with how Github calculates language percentages in a repository. Multiple entries can exist per repo. - -The table is filled in when the repo has been first inserted on when an update round for all repos is made. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `language` | varchar | 255 | The language of repo.
These are the [languages](https://api.github.com/repos/merico-dev/lake/languages) for merico-dev/lake | | -| `bytes` | int | | The byte counts for all files in those languages | | -| `created_date` | datetime | 3 | The field is filled in with the latest timestamp the query for a specific `repo_id` was done. | | - -#### 13. repo_commits - -The commits belong to the history of a repository. More than one repos can share the same commits if one is a fork of the other. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### 14. refs - -A ref is the abstraction of a branch or tag. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A ref's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github ref is composed of "github:GithubRepos:< GithubRepoId >:< RefUrl >". Eg. The id of release v5.3.0 of PingCAP/TiDB project is 'github:GithubRepos:384111310:refs/tags/v5.3.0' A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `ref_name` | varchar | 255 | The name of ref. Eg. '[refs/tags/v0.9.3](https://github.com/merico-dev/lake/tree/v0.9.3)' | | -| `repo_id` | varchar | 255 | The id of repo this ref belongs to | FK_repos.id | -| `commit_sha` | char | 40 | The commit this ref points to at the time of collection | | -| `is_default` | int | | | | -| `merge_base` | char | 40 | The merge base commit of the main ref and the current ref | | -| `ref_type` | varchar | 64 | There're 2 typical types: | | - -#### 15. refs_commits_diffs - -This table shows the commits added in a new ref compared to an old ref. This table can be used to support tag-based analysis, for instance, '_No. of commits of a tag_', '_No. of merged pull request of a tag_', etc. - -The records of this table are computed by [RefDiff](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/refdiff) plugin. The computation should be manually triggered after using [GitRepoExtractor](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/gitextractor) to collect commits and refs. The algorithm behind is similar to [this](https://github.com/merico-dev/lake/compare/v0.8.0%E2%80%A6v0.9.0). - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :-------------------------------------------------------------- | :------------- | -| `commit_sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `sorting_index` | varchar | 255 | An index for debugging, please skip it | | - -#### 16. commits - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `message` | varchar | 255 | Commit message | | -| `author_name` | varchar | 255 | The value is set with command `git config user.name xxxxx` commit | | -| `author_email` | varchar | 255 | The value is set with command `git config user.email xxxxx` author | | -| `authored_date` | datetime | 3 | The date when this commit was originally made | | -| `author_id` | varchar | 255 | The id of commit author | FK_users.id | -| `committer_name` | varchar | 255 | The name of committer | | -| `committer_email` | varchar | 255 | The email of committer | | -| `committed_date` | datetime | 3 | The last time the commit gets modified.
For example, when rebasing the branch where the commit is in on another branch, the committed_date changes. | | -| `committer_id` | varchar | 255 | The id of committer | FK_users.id | -| `additions` | int | | Added lines of code | | -| `deletions` | int | | Deleted lines of code | | -| `dev_eq` | int | | A metric that quantifies the amount of code contribution. The data can be retrieved from [AE plugin](https://github.com/merico-dev/lake/tree/v0.9.3/plugins/ae). | | - - -#### 17. commit_files - -The files have been changed via commits. Multiple entries can exist per commit. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :------------------------------------- | :------------- | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | -| `file_path` | varchar | 255 | Path of a changed file in a commit | | -| `additions` | int | | The added lines of code in this file | | -| `deletions` | int | | The deleted lines of code in this file | | - -#### 18. commit_comments(WIP) - -Code review comments on commits. These are comments on individual commits. If a commit is associated with a pull request, then its comments are in the [pull_request_comments](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#xt2lv4) table. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :---------------------------------- | :------------- | -| `id` | varchar | 255 | Unique comment id | | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | -| `user_id` | varchar | 255 | Id of the user who made the comment | | -| `created_date` | datetime | 3 | Comment creation time | | -| `body` | longtext | | Comment body/detail | | -| `line` | int | | | | -| `position` | int | | | | - -#### 19. commit_parents - -The parent commit(s) for each commit, as specified by Git. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------- | :------------- | -| `commit_sha` | char | 40 | commit sha | FK_commits.sha | -| `parent` | char | 40 | Parent commit sha | FK_commits.sha | - -
- -### Domain 3 - Code Review - -#### 20. pull_requests - -A pull request is the abstraction of Github pull request and Gitlab merge request. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `id` | char | 40 | A pull request's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." Eg. For 'github:GithubPullRequests:1347' | FK_commits.sha | -| `title` | varchar | 255 | The title of pull request | | -| `description` | longtext | | The body/description of pull request | | -| `status` | varchar | 255 | the status of pull requests. For a Github pull request, the status can either be 'open' or 'closed'. | | -| `number` | varchar | 255 | The number of PR. Eg, 1536 is the number of this [PR](https://github.com/apache/incubator-devlake/pull/1563) | | -| `base_repo_id` | varchar | 255 | The repo that will be updated. | | -| `head_reop_id` | varchar | 255 | The repo containing the changes that will be added to the base. If the head repository is NULL, this means that the corresponding project had been deleted when DevLake processed the pull request. | | -| `base_ref` | varchar | 255 | The branch name in the base repo that will be updated | | -| `head_ref` | varchar | 255 | The branch name in the head repo that contains the changes that will be added to the base | | -| `author_name` | varchar | 255 | The creator's name of the pull request | | -| `author_id` | varchar | 255 | The creator's id of the pull request | | -| `url` | varchar | 255 | the web link of the pull request | | -| `type` | varchar | 255 | The work-type of a pull request. For example: feature-development, bug-fix, docs, etc.
The value is transformed from Github pull request labels by configuring `GITHUB_PR_TYPE` in `.env` file during installation. | | -| `component` | varchar | 255 | The component this PR affects.
The value is transformed from Github/Gitlab pull request labels by configuring `GITHUB_PR_COMPONENT` in `.env` file during installation. | | -| `created_date` | datetime | 3 | The time PR created. | | -| `merged_date` | datetime | 3 | The time PR gets merged. Null when the PR is not merged. | | -| `closed_date` | datetime | 3 | The time PR closed. Null when the PR is not closed. | | -| `merge_commit_sha` | char | 40 | the merge commit of this PR. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), when you click the default Merge pull request option on a pull request on Github, all commits from the feature branch are added to the base branch in a merge commit. | | - -#### 21. pull_request_labels - -This table shows the labels of pull request. Multiple entries can exist per pull request. This table can be used to filter pull requests by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `name` | varchar | 255 | Label name | | -| `pull_request_id` | varchar | 255 | Pull request ID | FK_pull_requests.id | - -#### 22. pull_request_commits - -A commit associated with a pull request - -The list is additive. This means if a rebase with commit squashing takes place after the commits of a pull request have been processed, the old commits will not be deleted. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### 23. pull_request_comments(WIP) - -A code review comment on a commit associated with a pull request - -The list is additive. If commits are squashed on the head repo, the comments remain intact. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Comment id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `user_id` | varchar | 255 | Id of user who made the comment | FK_users.id | -| `created_date` | datetime | 3 | Comment creation time | | -| `body` | longtext | | The body of the comment | | -| `position` | int | | The position of a comment under a pull request. It starts from 1. The position is sorted by comment created_date asc.
Eg. If a PR has 5 comments, the position of the 1st created comment is 1. | | - -#### 24. pull_request_events(WIP) - -Events of pull requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Event id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `action` | varchar | 255 | The action to be taken, some values: | | -| `actor_id` | varchar | 255 | The user id of the event performer | FK_users.id | -| `created_date` | datetime | 3 | Event creation time | | - -
- -### Domain 4 - CI/CD(WIP) - -#### 25. jobs - -The CI/CD schedule, not a specific task. - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :------- | :--------- | :-------------- | :------ | -| `id` | varchar | 255 | Job id | PK | -| `name` | varchar | 255 | Name of job | | - -#### 26. builds - -A build is an execution of a job. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------- | :--------- | -| `id` | varchar | 255 | Build id | PK | -| `job_id` | varchar | 255 | Id of the job this build belongs to | FK_jobs.id | -| `name` | varchar | 255 | Name of build | | -| `duration_sec` | bigint | | The duration of build in seconds | | -| `started_date` | datetime | 3 | Started time of the build | | -| `status` | varchar | 255 | The result of build. The values may be 'success', 'failed', etc. | | -| `commit_sha` | char | 40 | The specific commit being built on. Nullable. | | - - -### Cross-Domain Entities - -These entities are used to map entities between different domains. They are the key players to break data isolation. - -There're low-level entities such as issue_commits, users, and higher-level cross domain entities such as board_repos - -#### 27. issue_commits - -A low-level mapping between "issue tracking" and "source code management" domain by mapping `issues` and `commits`. Issue(n): Commit(n). - -The original connection between these two entities lies in either issue tracking tools like Jira or source code management tools like GitLab. You have to use tools to accomplish this. - -For example, a common method to connect Jira issue and GitLab commit is a GitLab plugin [Jira Integration](https://docs.gitlab.com/ee/integration/jira/). With this plugin, the Jira issue key in the commit message written by the committers will be parsed. Then, the plugin will add the commit urls under this jira issue. Hence, DevLake's [Jira plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/jira) can get the related commits (including repo, commit_id, url) of an issue. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### 28. pull_request_issues - -This table shows the issues closed by pull requests. It's a medium-level mapping between "issue tracking" and "source code management" domain by mapping issues and commits. Issue(n): Commit(n). - -The data is extracted from the body of pull requests conforming to certain regular expression. The regular expression can be defined in GITHUB_PR_BODY_CLOSE_PATTERN in the .env file - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :------------------ | :------------------ | -| `pull_request_id` | char | 40 | Pull request id | FK_pull_requests.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `pull_request_number` | varchar | 255 | Pull request number | | -| `issue_number` | varchar | 255 | Issue number | | - -#### 29. board_repo(WIP) - -A rough way to link "issue tracking" and "source code management" domain by mapping `boards` and `repos`. Board(n): Repo(n). - -The mapping logic is under development. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | - -#### 30. users(WIP) - -This is the table to unify user identities across tools. This table can be used to do all user-based metrics, such as _'No. of Issue closed by contributor', 'No. of commits by contributor',_ - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------ | -| `id` | varchar | 255 | A user's `id` is composed of "< Plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github user's id is composed of "< github >:< GithubUsers >< GithubUserId)". Eg. 'github:GithubUsers:14050754' | PK | -| user_name | varchar | 255 | username/Github login of a user | | -| `fullname` | varchar | 255 | User's full name | | -| `email` | varchar | 255 | Email | | -| `avatar_url` | varchar | 255 | | | -| `organization` | varchar | 255 | User's organization or comany name | | -| `created_date` | datetime | 3 | User creation time | | -| `deleted` | tinyint | | 0: default. The user is active 1: the user is no longer active | | - -
- -## DWM Entities - (Data Warehouse Middle) - -DWM entities are the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - -#### 31. issue_status_history - -This table shows the history of 'status change' of issues. This table can be used to break down _'issue lead time'_ to _'issue staying time in each status'_ to identify the bottleneck of the delivery workflow. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :------------------------------ | :-------------- | -| `issue_id` | varchar | 255 | Issue id | PK, FK_issue.id | -| `original_status` | varchar | 255 | The original status of an issue | | -| `start_date` | datetime | 3 | The start time of the status | | -| `end_date` | datetime | 3 | The end time of the status | | - -#### 32. Issue_assignee_history - -This table shows the 'assignee change history' of issues. This table can be used to identify _'the actual developer of an issue',_ or _'contributor involved in an issue'_ for contribution analysis. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :------------------------------------------------- | :-------------- | -| `issue_id` | varchar | 255 | Issue id | PK, FK_issue.id | -| `assignee` | varchar | 255 | The name of assignee of an issue | | -| `start_date` | datetime | 3 | The time when the issue is assigned to an assignee | | -| `end_date` | datetime | 3 | The time when the assignee changes | | - -#### 33. issue_sprints_history - -This table shows the 'scope change history' of sprints. This table can be used to analyze the _'how much and how frequently does a team change plans'_. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :------------------------------------------------- | :-------------- | -| `issue_id` | varchar | 255 | Issue id | PK, FK_issue.id | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | -| `start_date` | datetime | 3 | The time when the issue added to a sprint | | -| `end_date` | datetime | 3 | The time when the issue gets removed from a sprint | | - -#### 34. refs_issues_diffs - -This table shows the issues fixed by commits added in a new ref compared to an old one. The data is computed from [table.ref_commits_diff](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#yJOyqa), [table.pull_requests](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#Uc849c), [table.pull_request_commits](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#G9cPfj), and [table.pull_request_issues](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#we6Uac). - -This table can support tag-based analysis, for instance, '_No. of bugs closed in a tag_'. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :----------------------------------------------------- | :----------- | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `issue_number` | varchar | 255 | Issue number | | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | diff --git a/versioned_docs/version-v0.11/DataModels/_category_.json b/versioned_docs/version-v0.11/DataModels/_category_.json deleted file mode 100644 index e678e712e30..00000000000 --- a/versioned_docs/version-v0.11/DataModels/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Data Models", - "position": 5 -} diff --git a/versioned_docs/version-v0.11/DeveloperManuals/DBMigration.md b/versioned_docs/version-v0.11/DeveloperManuals/DBMigration.md deleted file mode 100644 index 95302379a39..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/DBMigration.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "DB Migration" -description: > - DB Migration -sidebar_position: 3 ---- - -## Summary -Starting in v0.10.0, DevLake provides a lightweight migration tool for executing migration scripts. -Both framework itself and plugins define their migration scripts in their own migration folder. -The migration scripts are written with gorm in Golang to support different SQL dialects. - - -## Migration Script -Migration script describes how to do database migration. -They implement the `Script` interface. -When DevLake starts, scripts register themselves to the framework by invoking the `Register` function - -```go -type Script interface { - Up(ctx context.Context, db *gorm.DB) error - Version() uint64 - Name() string -} -``` - -## Table `migration_history` - -The table tracks migration scripts execution and schemas changes. -From which, DevLake could figure out the current state of database schemas. - - -## How It Works -1. Check `migration_history` table, calculate all the migration scripts need to be executed. -2. Sort scripts by Version in ascending order. -3. Execute scripts. -4. Save results in the `migration_history` table. diff --git a/versioned_docs/version-v0.11/DeveloperManuals/Dal.md b/versioned_docs/version-v0.11/DeveloperManuals/Dal.md deleted file mode 100644 index 9b085425ae2..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/Dal.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Dal" -sidebar_position: 5 -description: > - The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12 ---- - -## Summary - -The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12. The advantages of introducing this isolation are: - - - Unit Test: Mocking an Interface is easier and more reliable than Patching a Pointer. - - Clean Code: DBS operations are more consistence than using `gorm ` directly. - - Replaceable: It would be easier to replace `gorm` in the future if needed. - -## The Dal Interface - -```go -type Dal interface { - AutoMigrate(entity interface{}, clauses ...Clause) error - Exec(query string, params ...interface{}) error - RawCursor(query string, params ...interface{}) (*sql.Rows, error) - Cursor(clauses ...Clause) (*sql.Rows, error) - Fetch(cursor *sql.Rows, dst interface{}) error - All(dst interface{}, clauses ...Clause) error - First(dst interface{}, clauses ...Clause) error - Count(clauses ...Clause) (int64, error) - Pluck(column string, dest interface{}, clauses ...Clause) error - Create(entity interface{}, clauses ...Clause) error - Update(entity interface{}, clauses ...Clause) error - CreateOrUpdate(entity interface{}, clauses ...Clause) error - CreateIfNotExist(entity interface{}, clauses ...Clause) error - Delete(entity interface{}, clauses ...Clause) error - AllTables() ([]string, error) -} -``` - - -## How to use - -### Query -```go -// Get a database cursor -user := &models.User{} -cursor, err := db.Cursor( - dal.From(user), - dal.Where("department = ?", "R&D"), - dal.Orderby("id DESC"), -) -if err != nil { - return err -} -for cursor.Next() { - err = dal.Fetch(cursor, user) // fetch one record at a time - ... -} - -// Get a database cursor by raw sql query -cursor, err := db.Raw("SELECT * FROM users") - -// USE WITH CAUTIOUS: loading a big table at once is slow and dangerous -// Load all records from database at once. -users := make([]models.Users, 0) -err := db.All(&users, dal.Where("department = ?", "R&D")) - -// Load a column as Scalar or Slice -var email string -err := db.Pluck("email", &username, dal.Where("id = ?", 1)) -var emails []string -err := db.Pluck("email", &emails) - -// Execute query -err := db.Exec("UPDATE users SET department = ? WHERE department = ?", "Research & Development", "R&D") -``` - -### Insert -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Update -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` -### Insert or Update -```go -err := db.CreateOrUpdate(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Insert if record(by PrimaryKey) didn't exist -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Delete -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primary key -}) -``` - -### DDL and others -```go -// Returns all table names -allTables, err := db.AllTables() - -// Automigrate: create/add missing table/columns -// Note: it won't delete any existing columns, nor does it update the column definition -err := db.AutoMigrate(&models.User{}) -``` - -## How to do Unit Test -First, run the command `make mock` to generate the Mocking Stubs, the generated source files should appear in `mocks` folder. -``` -mocks -├── ApiResourceHandler.go -├── AsyncResponseHandler.go -├── BasicRes.go -├── CloseablePluginTask.go -├── ConfigGetter.go -├── Dal.go -├── DataConvertHandler.go -├── ExecContext.go -├── InjectConfigGetter.go -├── InjectLogger.go -├── Iterator.go -├── Logger.go -├── Migratable.go -├── PluginApi.go -├── PluginBlueprintV100.go -├── PluginInit.go -├── PluginMeta.go -├── PluginTask.go -├── RateLimitedApiClient.go -├── SubTaskContext.go -├── SubTaskEntryPoint.go -├── SubTask.go -└── TaskContext.go -``` -With these Mocking stubs, you may start writing your TestCases using the `mocks.Dal`. -```go -import "github.com/apache/incubator-devlake/mocks" - -func TestCreateUser(t *testing.T) { - mockDal := new(mocks.Dal) - mockDal.On("Create", mock.Anything, mock.Anything).Return(nil).Once() - userService := &services.UserService{ - Dal: mockDal, - } - userService.Post(map[string]interface{}{ - "email": "helle@example.com", - "name": "hello", - "department": "R&D", - }) - mockDal.AssertExpectations(t) -``` - diff --git a/versioned_docs/version-v0.11/DeveloperManuals/DeveloperSetup.md b/versioned_docs/version-v0.11/DeveloperManuals/DeveloperSetup.md deleted file mode 100644 index 2a462de1f43..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/DeveloperSetup.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: "Developer Setup" -description: > - The steps to install DevLake in develper mode. -sidebar_position: 1 ---- - - -## Requirements - -- Docker v19.03.10+ -- Golang v1.17+ -- Make - - Mac (Already installed) - - Windows: [Download](http://gnuwin32.sourceforge.net/packages/make.htm) - - Ubuntu: `sudo apt-get install build-essential libssl-dev` - -## How to setup dev environment -1. Navigate to where you would like to install this project and clone the repository: - - ```sh - git clone https://github.com/apache/incubator-devlake - cd incubator-devlake - ``` - -2. Install dependencies for plugins: - - - [RefDiff](../Plugins/refdiff.md#development) - -3. Install Go packages - - ```sh - go get - ``` - -4. Copy the sample config file to new local file: - - ```sh - cp .env.example .env - ``` - -5. Update the following variables in the file `.env`: - - * `DB_URL`: Replace `mysql:3306` with `127.0.0.1:3306` - -6. Start the MySQL and Grafana containers: - - > Make sure the Docker daemon is running before this step. - - ```sh - docker-compose up -d mysql grafana - ``` - -7. Run lake and config UI in dev mode in two separate terminals: - - ```sh - # install mockery - go install github.com/vektra/mockery/v2@latest - # generate mocking stubs - make mock - # run lake - make dev - # run config UI - make configure-dev - ``` - - Q: I got an error saying: `libgit2.so.1.3: cannot open share object file: No such file or directory` - - A: Make sure your program can find `libgit2.so.1.3`. `LD_LIBRARY_PATH` can be assigned like this if your `libgit2.so.1.3` is located at `/usr/local/lib`: - - ```sh - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib - ``` - -8. Visit config UI at `localhost:4000` to configure data connections. - - Navigate to desired plugins pages on the Integrations page - - Enter the required information for the plugins you intend to use. - - Refer to the following for more details on how to configure each one: - - [Jira](../Plugins/jira.md) - - [GitLab](../Plugins/gitlab.md) - - [Jenkins](../Plugins/jenkins.md) - - [GitHub](../Plugins/github.md): For users who'd like to collect GitHub data, we recommend reading our [GitHub data collection guide](../UserManuals/GitHubUserGuide.md) which covers the following steps in detail. - - Submit the form to update the values by clicking on the **Save Connection** button on each form page - -9. Visit `localhost:4000/pipelines/create` to RUN a Pipeline and trigger data collection. - - - Pipelines Runs can be initiated by the new "Create Run" Interface. Simply enable the **Data Connection Providers** you wish to run collection for, and specify the data you want to collect, for instance, **Project ID** for Gitlab and **Repository Name** for GitHub. - - Once a valid pipeline configuration has been created, press **Create Run** to start/run the pipeline. - After the pipeline starts, you will be automatically redirected to the **Pipeline Activity** screen to monitor collection activity. - - **Pipelines** is accessible from the main menu of the config-ui for easy access. - - - Manage All Pipelines: `http://localhost:4000/pipelines` - - Create Pipeline RUN: `http://localhost:4000/pipelines/create` - - Track Pipeline Activity: `http://localhost:4000/pipelines/activity/[RUN_ID]` - - For advanced use cases and complex pipelines, please use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - - ```json - [ - [ - { - "plugin": "github", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] - ] - ``` - - Please refer to [Pipeline Advanced Mode](../UserManuals/AdvancedMode.md) for in-depth explanation. - - -10. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use Grafana as a visualization tool to build charts for the data stored in our database. Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/GrafanaUserGuide.md). - -11. (Optional) To run the tests: - - ```sh - make test - ``` - -12. For DB migrations, please refer to [Migration Doc](../DeveloperManuals/DBMigration.md). - diff --git a/versioned_docs/version-v0.11/DeveloperManuals/Notifications.md b/versioned_docs/version-v0.11/DeveloperManuals/Notifications.md deleted file mode 100644 index 23456b4f1e7..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/Notifications.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Notifications" -description: > - Notifications -sidebar_position: 4 ---- - -## Request -Example request -``` -POST /lake/notify?nouce=3-FDXxIootApWxEVtz&sign=424c2f6159bd9e9828924a53f9911059433dc14328a031e91f9802f062b495d5 - -{"TaskID":39,"PluginName":"jenkins","CreatedAt":"2021-09-30T15:28:00.389+08:00","UpdatedAt":"2021-09-30T15:28:00.785+08:00"} -``` - -## Configuration -If you want to use the notification feature, you should add two configuration key to `.env` file. -```shell -# .env -# notification request url, e.g.: http://example.com/lake/notify -NOTIFICATION_ENDPOINT= -# secret is used to calculate signature -NOTIFICATION_SECRET= -``` - -## Signature -You should check the signature before accepting the notification request. We use sha256 algorithm to calculate the checksum. -```go -// calculate checksum -sum := sha256.Sum256([]byte(requestBody + NOTIFICATION_SECRET + nouce)) -return hex.EncodeToString(sum[:]) -``` diff --git a/versioned_docs/version-v0.11/DeveloperManuals/PluginImplementation.md b/versioned_docs/version-v0.11/DeveloperManuals/PluginImplementation.md deleted file mode 100644 index c47ab4e1228..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/PluginImplementation.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: "Plugin Implementation" -sidebar_position: 2 -description: > - Plugin Implementation ---- - -## How to Implement a DevLake plugin? - -If your favorite DevOps tool is not yet supported by DevLake, don't worry. It's not difficult to implement a DevLake plugin. In this post, we'll go through the basics of DevLake plugins and build an example plugin from scratch together. - -## What is a plugin? - -A DevLake plugin is a shared library built with Go's `plugin` package that hooks up to DevLake core at run-time. - -A plugin may extend DevLake's capability in three ways: - -1. Integrating with new data sources -2. Transforming/enriching existing data -3. Exporting DevLake data to other data systems - - -## How do plugins work? - -A plugin mainly consists of a collection of subtasks that can be executed by DevLake core. For data source plugins, a subtask may be collecting a single entity from the data source (e.g., issues from Jira). Besides the subtasks, there're hooks that a plugin can implement to customize its initialization, migration, and more. See below for a list of the most important interfaces: - -1. [PluginMeta](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_meta.go) contains the minimal interface that a plugin should implement, with only two functions - - Description() returns the description of a plugin - - RootPkgPath() returns the root package path of a plugin -2. [PluginInit](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_init.go) allows a plugin to customize its initialization -3. [PluginTask](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_task.go) enables a plugin to prepare data prior to subtask execution -4. [PluginApi](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_api.go) lets a plugin exposes some self-defined APIs -5. [PluginMigration](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_migration.go) is where a plugin manages its database migrations - -The diagram below shows the control flow of executing a plugin: - -```mermaid -flowchart TD; - subgraph S4[Step4 sub-task extractor running process]; - direction LR; - D4[DevLake]; - D4 -- "Step4.1 create a new\n ApiExtractor\n and execute it" --> E["ExtractXXXMeta.\nEntryPoint"]; - E <-- "Step4.2 read from\n raw table" --> E2["RawDataSubTaskArgs\n.Table"]; - E -- "Step4.3 call with RawData" --> ApiExtractor.Extract; - ApiExtractor.Extract -- "decode and return gorm models" --> E - end - subgraph S3[Step3 sub-task collector running process] - direction LR - D3[DevLake] - D3 -- "Step3.1 create a new\n ApiCollector\n and execute it" --> C["CollectXXXMeta.\nEntryPoint"]; - C <-- "Step3.2 create\n raw table" --> C2["RawDataSubTaskArgs\n.RAW_BBB_TABLE"]; - C <-- "Step3.3 build query\n before sending requests" --> ApiCollectorArgs.\nQuery/UrlTemplate; - C <-. "Step3.4 send requests by ApiClient \n and return HTTP response" .-> A1["HTTP APIs"]; - C <-- "Step3.5 call and \nreturn decoded data \nfrom HTTP response" --> ResponseParser; - end - subgraph S2[Step2 DevLake register custom plugin] - direction LR - D2[DevLake] - D2 <-- "Step2.1 function \`Init\` \nneed to do init jobs" --> plugin.Init; - D2 <-- "Step2.2 (Optional) call \nand return migration scripts" --> plugin.MigrationScripts; - D2 <-- "Step2.3 (Optional) call \nand return taskCtx" --> plugin.PrepareTaskData; - D2 <-- "Step2.4 call and \nreturn subTasks for execting" --> plugin.SubTaskContext; - end - subgraph S1[Step1 Run DevLake] - direction LR - main -- "Transfer of control \nby \`runner.DirectRun\`" --> D1[DevLake]; - end - S1-->S2-->S3-->S4 -``` -There's a lot of information in the diagram but we don't expect you to digest it right away, simply use it as a reference when you go through the example below. - -## A step-by-step guide towards your first plugin - -In this guide, we'll walk through how to create a data source plugin from scratch. - -The example in this tutorial comes from DevLake's own needs of managing [CLAs](https://en.wikipedia.org/wiki/Contributor_License_Agreement). Whenever DevLake receives a new PR on GitHub, we need to check if the author has signed a CLA by referencing `https://people.apache.org/public/icla-info.json`. This guide will demonstrate how to collect the ICLA info from Apache API, cache the raw response, and extract the raw data into a relational table ready to be queried. - -### Step 1: Bootstrap the new plugin - -**Note:** Please make sure you have DevLake up and running before proceeding. - -> More info about plugin: -> Generally, we need these folders in plugin folders: `api`, `models` and `tasks` -> `api` interacts with `config-ui` for test/get/save connection of data source -> - connection [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/api/connection.go) -> - connection model [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/models/connection.go) -> `models` stores all `data entities` and `data migration scripts`. -> - entity -> - data migrations [template](https://github.com/apache/incubator-devlake/tree/main/generator/template/migrationscripts) -> `tasks` contains all of our `sub tasks` for a plugin -> - task data [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data.go-template) -> - api client [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data_with_api_client.go-template) - -Don't worry if you cannot figure out what these concepts mean immediately. We'll explain them one by one later. - -DevLake provides a generator to create a plugin conveniently. Let's scaffold our new plugin by running `go run generator/main.go create-plugin icla`, which would ask for `with_api_client` and `Endpoint`. - -* `with_api_client` is used for choosing if we need to request HTTP APIs by api_client. -* `Endpoint` use in which site we will request, in our case, it should be `https://people.apache.org/`. - -![create plugin](https://i.imgur.com/itzlFg7.png) - -Now we have three files in our plugin. `api_client.go` and `task_data.go` are in subfolder `tasks/`. -![plugin files](https://i.imgur.com/zon5waf.png) - -Have a try to run this plugin by function `main` in `plugin_main.go`. When you see result like this: -``` -$go run plugins/icla/plugin_main.go -[2022-06-02 18:07:30] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-02 18:07:30] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-02 18:07:30] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-02 18:07:30] INFO [icla] total step: 0 -``` -How exciting. It works! The plugin defined and initiated in `plugin_main.go` use some options in `task_data.go`. They are made up as the most straightforward plugin in Apache DevLake, and `api_client.go` will be used in the next step to request HTTP APIs. - -### Step 2: Create a sub-task for data collection -Before we start, it is helpful to know how collection task is executed: -1. First, Apache DevLake would call `plugin_main.PrepareTaskData()` to prepare needed data before any sub-tasks. We need to create an API client here. -2. Then Apache DevLake will call the sub-tasks returned by `plugin_main.SubTaskMetas()`. Sub-task is an independent task to do some job, like requesting API, processing data, etc. - -> Each sub-task must be defined as a SubTaskMeta, and implement SubTaskEntryPoint of SubTaskMeta. SubTaskEntryPoint is defined as -> ```go -> type SubTaskEntryPoint func(c SubTaskContext) error -> ``` -> More info at: https://devlake.apache.org/blog/how-DevLake-is-up-and-running/ - -#### Step 2.1 Create a sub-task(Collector) for data collection - -Let's run `go run generator/main.go create-collector icla committer` and confirm it. This sub-task is activated by registering in `plugin_main.go/SubTaskMetas` automatically. - -![](https://i.imgur.com/tkDuofi.png) - -> - Collector will collect data from HTTP or other data sources, and save the data into the raw layer. -> - Inside the func `SubTaskEntryPoint` of `Collector`, we use `helper.NewApiCollector` to create an object of [ApiCollector](https://github.com/apache/incubator-devlake/blob/main/backend/generator/template/plugin/tasks/api_collector.go-template), then call `execute()` to do the job. - -Now you can notice `data.ApiClient` is inited in `plugin_main.go/PrepareTaskData.ApiClient`. `PrepareTaskData` create a new `ApiClient`, and it's a tool Apache DevLake suggests to request data from HTTP Apis. This tool support some valuable features for HttpApi, like rateLimit, proxy and retry. Of course, if you like, you may use the lib `http` instead, but it will be more tedious. - -Let's move forward to use it. - -1. To collect data from `https://people.apache.org/public/icla-info.json`, - we have filled `https://people.apache.org/` into `tasks/api_client.go/ENDPOINT` in Step 1. - -![](https://i.imgur.com/q8Zltnl.png) - -2. And fill `public/icla-info.json` into `UrlTemplate`, delete unnecessary iterator and add `println("receive data:", res)` in `ResponseParser` to see if collection was successful. - -![](https://i.imgur.com/ToLMclH.png) - -Ok, now the collector sub-task has been added to the plugin, and we can kick it off by running `main` again. If everything goes smoothly, the output should look like this: -```bash -[2022-06-06 12:24:52] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 12:24:52] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 12:24:52] INFO [icla] total step: 1 -[2022-06-06 12:24:52] INFO [icla] executing subtask CollectCommitter -[2022-06-06 12:24:52] INFO [icla] [CollectCommitter] start api collection -receive data: 0x140005763f0 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 12:24:55] INFO [icla] finished step: 1 / 1 -``` - -Great! Now we can see data pulled from the server without any problem. The last step is to decode the response body in `ResponseParser` and return it to the framework, so it can be stored in the database. -```go -ResponseParser: func(res *http.Response) ([]json.RawMessage, error) { - body := &struct { - LastUpdated string `json:"last_updated"` - Committers json.RawMessage `json:"committers"` - }{} - err := helper.UnmarshalResponse(res, body) - if err != nil { - return nil, err - } - println("receive data:", len(body.Committers)) - return []json.RawMessage{body.Committers}, nil -}, - -``` -Ok, run the function `main` once again, then it turned out like this, and we should be able see some records show up in the table `_raw_icla_committer`. -```bash -…… -receive data: 272956 /* <- the number means 272956 models received */ -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 13:46:57] INFO [icla] finished step: 1 / 1 -``` - -![](https://i.imgur.com/aVYNMRr.png) - -#### Step 2.2 Create a sub-task(Extractor) to extract data from the raw layer - -> - Extractor will extract data from raw layer and save it into tool db table. -> - Except for some pre-processing, the main flow is similar to the collector. - -We have already collected data from HTTP API and saved them into the DB table `_raw_XXXX`. In this step, we will extract the names of committers from the raw data. As you may infer from the name, raw tables are temporary and not easy to use directly. - -Now Apache DevLake suggests to save data by [gorm](https://gorm.io/docs/index.html), so we will create a model by gorm and add it into `plugin_main.go/AutoSchemas.Up()`. - -plugins/icla/models/committer.go -```go -package models - -import ( - "github.com/apache/incubator-devlake/models/common" -) - -type IclaCommitter struct { - UserName string `gorm:"primaryKey;type:varchar(255)"` - Name string `gorm:"primaryKey;type:varchar(255)"` - common.NoPKModel -} - -func (IclaCommitter) TableName() string { - return "_tool_icla_committer" -} -``` - -plugins/icla/plugin_main.go -![](https://i.imgur.com/4f0zJty.png) - - -Ok, run the plugin, and table `_tool_icla_committer` will be created automatically just like the snapshot below: -![](https://i.imgur.com/7Z324IX.png) - -Next, let's run `go run generator/main.go create-extractor icla committer` and type in what the command prompt asks for. - -![](https://i.imgur.com/UyDP9Um.png) - -Let's look at the function `extract` in `committer_extractor.go` created just now, and some codes need to be written here. It's obviously `resData.data` is raw data, so we could decode them by json and add new `IclaCommitter` to save them. -```go -Extract: func(resData *helper.RawData) ([]interface{}, error) { - names := &map[string]string{} - err := json.Unmarshal(resData.Data, names) - if err != nil { - return nil, err - } - extractedModels := make([]interface{}, 0) - for userName, name := range *names { - extractedModels = append(extractedModels, &models.IclaCommitter{ - UserName: userName, - Name: name, - })fco - } - return extractedModels, nil -}, -``` - -Ok, run it then we get: -``` -[2022-06-06 15:39:40] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 15:39:40] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 15:39:40] INFO [icla] total step: 2 -[2022-06-06 15:39:40] INFO [icla] executing subtask CollectCommitter -[2022-06-06 15:39:40] INFO [icla] [CollectCommitter] start api collection -receive data: 272956 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 15:39:44] INFO [icla] finished step: 1 / 2 -[2022-06-06 15:39:44] INFO [icla] executing subtask ExtractCommitter -[2022-06-06 15:39:46] INFO [icla] [ExtractCommitter] finished records: 1 -[2022-06-06 15:39:46] INFO [icla] finished step: 2 / 2 -``` -Now committer data have been saved in _tool_icla_committer. -![](https://i.imgur.com/6svX0N2.png) - -#### Step 2.3 Convertor - -Notes: There are two ways here (open source or using it yourself). It is unnecessary, but we encourage it because convertors and the domain layer will significantly help build dashboards. More info about the domain layer at: https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/ - -> - Convertor will convert data from the tool layer and save it into the domain layer. -> - We use `helper.NewDataConverter` to create an object of [DataConvertor], then call `execute()`. - -#### Step 2.4 Let's try it -Sometimes OpenApi will be protected by token or other auth types, and we need to log in to gain a token to visit it. For example, only after logging in `private@apahce.com` could we gather the data about contributors signing ICLA. Here we briefly introduce how to authorize DevLake to collect data. - -Let's look at `api_client.go`. `NewIclaApiClient` load config `ICLA_TOKEN` by `.env`, so we can add `ICLA_TOKEN=XXXXXX` in `.env` and use it in `apiClient.SetHeaders()` to mock the login status. Code as below: -![](https://i.imgur.com/dPxooAx.png) - -Of course, we can use `username/password` to get a token after login mockery. Just try and adjust according to the actual situation. - -Look for more related details at https://github.com/apache/incubator-devlake - -#### Final step: Submit the code as open source code -Good ideas and we encourage contributions~ Let's learn about migration scripts and domain layers to write normative and platform-neutral codes. More info at https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema or contact us for ebullient help. - - -## Done! - -Congratulations! The first plugin has been created! 🎖 diff --git a/versioned_docs/version-v0.11/DeveloperManuals/_category_.json b/versioned_docs/version-v0.11/DeveloperManuals/_category_.json deleted file mode 100644 index fe67a680c11..00000000000 --- a/versioned_docs/version-v0.11/DeveloperManuals/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Developer Manuals", - "position": 4 -} diff --git a/versioned_docs/version-v0.11/EngineeringMetrics.md b/versioned_docs/version-v0.11/EngineeringMetrics.md deleted file mode 100644 index e73c04828a1..00000000000 --- a/versioned_docs/version-v0.11/EngineeringMetrics.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -sidebar_position: 06 -title: "Engineering Metrics" -linkTitle: "Engineering Metrics" -tags: [] -description: > - The definition, values and data required for the 20+ engineering metrics supported by DevLake. ---- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CategoryMetric NameDefinitionData RequiredUse Scenarios and Recommended PracticesValue            
Delivery VelocityRequirement CountNumber of issues in type "Requirement"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -
2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -
3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -
4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog.
1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -
2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources.
Requirement Delivery RateRatio of delivered requirements to all requirementsIssue/Task Management entities: Jira issues, GitHub issues, etc
Requirement Lead TimeLead time of issues with type "Requirement"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the trend of requirement lead time to observe if it has improved over time. -
2. Analyze and compare the requirement lead time of each project/team to identify key projects with abnormal lead time. -
3. Drill down to analyze a requirement's staying time in different phases of SDLC. Analyze the bottleneck of delivery velocity and improve the workflow.
1. Analyze key projects and critical points, identify good/to-be-improved practices that affect requirement lead time, and reduce the risk of delays -
2. Focus on the end-to-end velocity of value delivery process; coordinate different parts of R&D to avoid efficiency shafts; make targeted improvements to bottlenecks.
Requirement GranularityNumber of story points associated with an issueIssue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the story points/requirement lead time of requirements to evaluate whether the ticket size, ie. requirement complexity is optimal. -
2. Compare the estimated requirement granularity with the actual situation and evaluate whether the difference is reasonable by combining more microscopic workload metrics (e.g. lines of code/code equivalents)
1. Promote product teams to split requirements carefully, improve requirements quality, help developers understand requirements clearly, deliver efficiently and with high quality, and improve the project management capability of the team. -
2. Establish a data-supported workload estimation model to help R&D teams calibrate their estimation methods and more accurately assess the granularity of requirements, which is useful to achieve better issue planning in project management.
Commit CountNumber of CommitsSource Code Management entities: Git/GitHub/GitLab commits -1. Identify the main reasons for the unusual number of commits and the possible impact on the number of commits through comparison -
2. Evaluate whether the number of commits is reasonable in conjunction with more microscopic workload metrics (e.g. lines of code/code equivalents)
1. Identify potential bottlenecks that may affect output -
2. Encourage R&D practices of small step submissions and develop excellent coding habits
Added Lines of CodeAccumulated number of added lines of codeSource Code Management entities: Git/GitHub/GitLab commits -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -
2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -
3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison.
1. identify potential bottlenecks that may affect the output -
2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits
Deleted Lines of CodeAccumulated number of deleted lines of codeSource Code Management entities: Git/GitHub/GitLab commits
Pull Request Review TimeTime from Pull/Merge created time until mergedSource Code Management entities: GitHub PRs, GitLab MRs, etc -1. Observe the mean and distribution of code review time from the project/team/individual dimension to assess the rationality of the review time1. Take inventory of project/team code review resources to avoid lack of resources and backlog of review sessions, resulting in long waiting time -
2. Encourage teams to implement an efficient and responsive code review mechanism
Bug AgeLead time of issues in type "Bug"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Observe the trend of bug age and locate the key reasons.
-2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of bug and incident age.
1. Help the team to establish an effective hierarchical response mechanism for bugs and incidents. Focus on the resolution of important problems in the backlog.
-2. Improve team's and individual's bug/incident fixing efficiency. Identify good/to-be-improved practices that affect bug age or incident age
Incident AgeLead time of issues in type "Incident"Issue/Task Management entities: Jira issues, GitHub issues, etc
Delivery QualityPull Request CountNumber of Pull/Merge RequestsSource Code Management entities: GitHub PRs, GitLab MRs, etc -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds.
-2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds.
-3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks.
1. Code review metrics are process indicators to provide quick feedback on developers' code quality
-2. Promote the team to establish a unified coding specification and standardize the code review criteria
-3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation
Pull Request Pass RateRatio of Pull/Merge Review requests to mergedSource Code Management entities: GitHub PRs, GitLab MRs, etc
Pull Request Review RoundsNumber of cycles of commits followed by comments/final mergeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Pull Request Review CountNumber of Pull/Merge ReviewersSource Code Management entities: GitHub PRs, GitLab MRs, etc1. As a secondary indicator, assess the cost of labor invested in the code review process1. Take inventory of project/team code review resources to avoid long waits for review sessions due to insufficient resource input
Bug CountNumber of bugs found during testingIssue/Task Management entities: Jira issues, GitHub issues, etc -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc.
-2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality
-3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points
-4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values
1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process
-2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts
-3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects
Incident CountNumber of Incidents found after shippingSource Code Management entities: GitHub PRs, GitLab MRs, etc
Bugs Count per 1k Lines of CodeAmount of bugs per 1,000 lines of codeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Incidents Count per 1k Lines of CodeAmount of incidents per 1,000 lines of codeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Delivery CostCommit Author CountNumber of Contributors who have committed codeSource Code Management entities: Git/GitHub/GitLab commits1. As a secondary indicator, this helps assess the labor cost of participating in coding1. Take inventory of project/team R&D resource inputs, assess input-output ratio, and rationalize resource deployment
Delivery CapabilityBuild CountThe number of builds startedCI/CD entities: Jenkins PRs, GitLabCI MRs, etc1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks
-2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time
1. As a process indicator, it reflects the value flow efficiency of upstream production and research links
-2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery
Build DurationThe duration of successful buildsCI/CD entities: Jenkins PRs, GitLabCI MRs, etc
Build Success RateThe percentage of successful buildsCI/CD entities: Jenkins PRs, GitLabCI MRs, etc
-


diff --git a/versioned_docs/version-v0.11/Overview/Architecture.md b/versioned_docs/version-v0.11/Overview/Architecture.md deleted file mode 100755 index 2d780a504d6..00000000000 --- a/versioned_docs/version-v0.11/Overview/Architecture.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Architecture" -description: > - Understand the architecture of Apache DevLake -sidebar_position: 2 ---- - -## Architecture Overview - -

-

DevLake Components

- -A DevLake installation typically consists of the following components: - -- Config UI: A handy user interface to create, trigger, and debug data pipelines. -- API Server: The main programmatic interface of DevLake. -- Runner: The runner does all the heavy-lifting for executing tasks. In the default DevLake installation, it runs within the API Server, but DevLake provides a temporal-based runner (beta) for production environments. -- Database: The database stores both DevLake's metadata and user data collected by data pipelines. DevLake supports MySQL and PostgreSQL as of v0.11. -- Plugins: Plugins enable DevLake to collect and analyze dev data from any DevOps tools with an accessible API. DevLake community is actively adding plugins for popular DevOps tools, but if your preferred tool is not covered yet, feel free to open a GitHub issue to let us know or check out our doc on how to build a new plugin by yourself. -- Dashboards: Dashboards deliver data and insights to DevLake users. A dashboard is simply a collection of SQL queries along with corresponding visualization configurations. DevLake's official dashboard tool is Grafana and pre-built dashboards are shipped in Grafana's JSON format. Users are welcome to swap for their own choice of dashboard/BI tool if desired. - -## Dataflow - -

-

DevLake Dataflow

- -A typical plugin's dataflow is illustrated below: - -1. The Raw layer stores the API responses from data sources (DevOps tools) in JSON. This saves developers' time if the raw data is to be transformed differently later on. Please note that communicating with data sources' APIs is usually the most time-consuming step. -2. The Tool layer extracts raw data from JSONs into a relational schema that's easier to consume by analytical tasks. Each DevOps tool would have a schema that's tailored to their data structure, hence the name, the Tool layer. -3. The Domain layer attempts to build a layer of abstraction on top of the Tool layer so that analytics logics can be re-used across different tools. For example, GitHub's Pull Request (PR) and GitLab's Merge Request (MR) are similar entities. They each have their own table name and schema in the Tool layer, but they're consolidated into a single entity in the Domain layer, so that developers only need to implement metrics like Cycle Time and Code Review Rounds once against the domain layer schema. - -## Principles - -1. Extensible: DevLake's plugin system allows users to integrate with any DevOps tool. DevLake also provides a dbt plugin that enables users to define their own data transformation and analysis workflows. -2. Portable: DevLake has a modular design and provides multiple options for each module. Users of different setups can freely choose the right configuration for themselves. -3. Robust: DevLake provides an SDK to help plugins efficiently and reliably collect data from data sources while respecting their API rate limits and constraints. - -
diff --git a/versioned_docs/version-v0.11/Overview/Introduction.md b/versioned_docs/version-v0.11/Overview/Introduction.md deleted file mode 100755 index c8aacd90d7f..00000000000 --- a/versioned_docs/version-v0.11/Overview/Introduction.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: "Introduction" -description: General introduction of Apache DevLake -sidebar_position: 1 ---- - -## What is Apache DevLake? -Apache DevLake is an open-source dev data platform that ingests, analyzes, and visualizes the fragmented data from DevOps tools to distill insights for engineering productivity. - -Apache DevLake is designed for developer teams looking to make better sense of their development process and to bring a more data-driven approach to their own practices. You can ask Apache DevLake many questions regarding your development process. Just connect and query. - -## What can be accomplished with DevLake? -1. Collect DevOps data across the entire Software Development Life Cycle (SDLC) and connect the siloed data with a standard [data model](../DataModels/DevLakeDomainLayerSchema.md). -2. Visualize out-of-the-box engineering [metrics](../EngineeringMetrics.md) in a series of use-case driven dashboards -3. Easily extend DevLake to support your data sources, metrics, and dashboards with a flexible [framework](Architecture.md) for data collection and ETL. - diff --git a/versioned_docs/version-v0.11/Overview/Roadmap.md b/versioned_docs/version-v0.11/Overview/Roadmap.md deleted file mode 100644 index 9dcf0b3dc0f..00000000000 --- a/versioned_docs/version-v0.11/Overview/Roadmap.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Roadmap" -description: > - The goals and roadmap for DevLake in 2022 -sidebar_position: 3 ---- - - -## Goals -DevLake has joined the Apache Incubator and is aiming to become a top-level project. To achieve this goal, the Apache DevLake (Incubating) community will continue to make efforts in helping development teams to analyze and improve their engineering productivity. In the 2022 Roadmap, we have summarized three major goals followed by the feature breakdown to invite the broader community to join us and grow together. - -1. As a dev data analysis application, discover and implement 3 (or even more!) usage scenarios: - - A collection of metrics to track the contribution, quality and growth of open-source projects - - DORA metrics for DevOps engineers - - To be decided ([let us know](https://join.slack.com/t/devlake-io/shared_invite/zt-17b6vuvps-x98pqseoUagM7EAmKC82xQ) if you have any suggestions!) -2. As dev data infrastructure, provide robust data collection modules, customizable data models, and data extensibility. -3. Design better user experience for end-users and contributors. - -## Feature Breakdown -Apache DevLake is currently under rapid development. You are more than welcome to use the following table to explore your intereted features and make contributions. We deeply appreciate the collective effort of our community to make this project possible! - -| Category | Features| -| --- | --- | -| More data sources across different [DevOps domains](../DataModels/DevLakeDomainLayerSchema.md) (Goal No.1 & 2)| Features in **bold** are of higher priority

Issue/Task Management: Source Code Management: Code Review: CI/CD: Quality: QA: Calendar: OSS Community Metrics: | -| Improved data collection, [data models](../DataModels/DevLakeDomainLayerSchema.md) and data extensibility (Goal No.2)| Data Collection:
Data Models: Data Extensibility: | -| Better user experience (Goal No.3) | For new users: For returning users: For contributors: | - - -## How to Influence the Roadmap -A roadmap is only useful when it captures real user needs. We are glad to hear from you if you have specific use cases, feedback, or ideas. You can submit an issue to let us know! -Also, if you plan to work (or are already working) on a new or existing feature, tell us, so that we can update the roadmap accordingly. We are happy to share knowledge and context to help your feature land successfully. -


- diff --git a/versioned_docs/version-v0.11/Overview/_category_.json b/versioned_docs/version-v0.11/Overview/_category_.json deleted file mode 100644 index e224ed81cd3..00000000000 --- a/versioned_docs/version-v0.11/Overview/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Overview", - "position": 1 -} diff --git a/versioned_docs/version-v0.11/Plugins/_category_.json b/versioned_docs/version-v0.11/Plugins/_category_.json deleted file mode 100644 index 534bad899e8..00000000000 --- a/versioned_docs/version-v0.11/Plugins/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Plugins", - "position": 7 -} diff --git a/versioned_docs/version-v0.11/Plugins/dbt.md b/versioned_docs/version-v0.11/Plugins/dbt.md deleted file mode 100644 index 059bf12c61d..00000000000 --- a/versioned_docs/version-v0.11/Plugins/dbt.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "DBT" -description: > - DBT Plugin ---- - - -## Summary - -dbt (data build tool) enables analytics engineers to transform data in their warehouses by simply writing select statements. dbt handles turning these select statements into tables and views. -dbt does the T in ELT (Extract, Load, Transform) processes – it doesn’t extract or load data, but it’s extremely good at transforming data that’s already loaded into your warehouse. - -## User setup -- If you plan to use this product, you need to install some environments first. - -#### Required Packages to Install -- [python3.7+](https://www.python.org/downloads/) -- [dbt-mysql](https://pypi.org/project/dbt-mysql/#configuring-your-profile) - -#### Commands to run or create in your terminal and the dbt project -1. pip install dbt-mysql -2. dbt init demoapp (demoapp is project name) -3. create your SQL transformations and data models - -## Convert Data By DBT - -Use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - -```json -[ - [ - { - "plugin": "dbt", - "options": { - "projectPath": "/Users/abeizn/demoapp", - "projectName": "demoapp", - "projectTarget": "dev", - "selectedModels": ["my_first_dbt_model","my_second_dbt_model"], - "projectVars": { - "demokey1": "demovalue1", - "demokey2": "demovalue2" - } - } - } - ] -] -``` - -- `projectPath`: the absolute path of the dbt project. (required) -- `projectName`: the name of the dbt project. (required) -- `projectTarget`: this is the default target your dbt project will use. (optional) -- `selectedModels`: a model is a select statement. Models are defined in .sql files, and typically in your models directory. (required) -And selectedModels accepts one or more arguments. Each argument can be one of: -1. a package name, runs all models in your project, example: example -2. a model name, runs a specific model, example: my_fisrt_dbt_model -3. a fully-qualified path to a directory of models. - -- `projectVars`: variables to parametrize dbt models. (optional) -example: -`select * from events where event_type = '{{ var("event_type") }}'` -To execute this SQL query in your model, you need set a value for `event_type`. - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers - -


diff --git a/versioned_docs/version-v0.11/Plugins/feishu.md b/versioned_docs/version-v0.11/Plugins/feishu.md deleted file mode 100644 index c3e0eb646e7..00000000000 --- a/versioned_docs/version-v0.11/Plugins/feishu.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: "Feishu" -description: > - Feishu Plugin ---- - -## Summary - -This plugin collects Feishu meeting data through [Feishu Openapi](https://open.feishu.cn/document/home/user-identity-introduction/introduction). - -## Configuration - -In order to fully use this plugin, you will need to get app_id and app_secret from a Feishu administrator (for help on App info, please see [official Feishu Docs](https://open.feishu.cn/document/ukTMukTMukTM/ukDNz4SO0MjL5QzM/auth-v3/auth/tenant_access_token_internal)), -then set these two parameters via Dev Lake's `.env`. - -### By `.env` - -The connection aspect of the configuration screen requires the following key fields to connect to the Feishu API. As Feishu is a single-source data provider at the moment, the connection name is read-only as there is only one instance to manage. As we continue our development roadmap we may enable multi-source connections for Feishu in the future. - -``` -FEISHU_APPID=app_id -FEISHU_APPSCRECT=app_secret -``` - -## Collect data from Feishu - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - - -```json -[ - [ - { - "plugin": "feishu", - "options": { - "numOfDaysToCollect" : 80, - "rateLimitPerSecond" : 5 - } - } - ] -] -``` - -> `numOfDaysToCollect`: The number of days you want to collect - -> `rateLimitPerSecond`: The number of requests to send(Maximum is 8) - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu 20211126", - "tasks": [[{ - "plugin": "feishu", - "options": { - "numOfDaysToCollect" : 80, - "rateLimitPerSecond" : 5 - } - }]] -} -' -``` \ No newline at end of file diff --git a/versioned_docs/version-v0.11/Plugins/gitee.md b/versioned_docs/version-v0.11/Plugins/gitee.md deleted file mode 100644 index 6066fd2e725..00000000000 --- a/versioned_docs/version-v0.11/Plugins/gitee.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: "Gitee(WIP)" -description: > - Gitee Plugin ---- - -## Summary - -## Configuration - -### Provider (Datasource) Connection -The connection aspect of the configuration screen requires the following key fields to connect to the **Gitee API**. As gitee is a _single-source data provider_ at the moment, the connection name is read-only as there is only one instance to manage. As we continue our development roadmap we may enable _multi-source_ connections for gitee in the future. - -- **Connection Name** [`READONLY`] - - ⚠️ Defaults to "**Gitee**" and may not be changed. -- **Endpoint URL** (REST URL, starts with `https://` or `http://`) - - This should be a valid REST API Endpoint eg. `https://gitee.com/api/v5/` - - ⚠️ URL should end with`/` -- **Auth Token(s)** (Personal Access Token) - - For help on **Creating a personal access token** - - Provide at least one token for Authentication with the . This field accepts a comma-separated list of values for multiple tokens. The data collection will take longer for gitee since they have a **rate limit of 2k requests per hour**. You can accelerate the process by configuring _multiple_ personal access tokens. - -"For API requests using `Basic Authentication` or `OAuth` - - -If you have a need for more api rate limits, you can set many tokens in the config file and we will use all of your tokens. - -For an overview of the **gitee REST API**, please see official [gitee Docs on REST](https://gitee.com/api/v5/swagger) - -Click **Save Connection** to update connection settings. - - -### Provider (Datasource) Settings -Manage additional settings and options for the gitee Datasource Provider. Currently there is only one **optional** setting, *Proxy URL*. If you are behind a corporate firewall or VPN you may need to utilize a proxy server. - -**gitee Proxy URL [ `Optional`]** -Enter a valid proxy server address on your Network, e.g. `http://your-proxy-server.com:1080` - -Click **Save Settings** to update additional settings. - -### Regular Expression Configuration -Define regex pattern in .env -- GITEE_PR_BODY_CLOSE_PATTERN: Define key word to associate issue in pr body, please check the example in .env.example - -## Sample Request -In order to collect data, you have to compose a JSON looks like following one, and send it by selecting `Advanced Mode` on `Create Pipeline Run` page: -1. Configure-UI Mode -```json -[ - [ - { - "plugin": "gitee", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` -and if you want to perform certain subtasks. -```json -[ - [ - { - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -2. Curl Mode: - You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "tasks": [[{ - "plugin": "gitee", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` -and if you want to perform certain subtasks. -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "tasks": [[{ - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.11/Plugins/gitextractor.md b/versioned_docs/version-v0.11/Plugins/gitextractor.md deleted file mode 100644 index ae3fecb616e..00000000000 --- a/versioned_docs/version-v0.11/Plugins/gitextractor.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: "GitExtractor" -description: > - GitExtractor Plugin ---- - -## Summary -This plugin extracts commits and references from a remote or local git repository. It then saves the data into the database or csv files. - -## Steps to make this plugin work - -1. Use the Git repo extractor to retrieve data about commits and branches from your repository. -2. Use the GitHub plugin to retrieve data about Github issues and PRs from your repository. -NOTE: you can run only one issue collection stage as described in the Github Plugin README. -3. Use the [RefDiff](./refdiff.md) plugin to calculate version diff, which will be stored in `refs_commits_diffs` table. - -## Sample Request - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "git repo extractor", - "tasks": [ - [ - { - "Plugin": "gitextractor", - "Options": { - "url": "https://github.com/merico-dev/lake.git", - "repoId": "github:GithubRepo:384111310" - } - } - ] - ] -} -' -``` -- `url`: the location of the git repository. It should start with `http`/`https` for a remote git repository and with `/` for a local one. -- `repoId`: column `id` of `repos`. -- `proxy`: optional, http proxy, e.g. `http://your-proxy-server.com:1080`. -- `user`: optional, for cloning private repository using HTTP/HTTPS -- `password`: optional, for cloning private repository using HTTP/HTTPS -- `privateKey`: optional, for SSH cloning, base64 encoded `PEM` file -- `passphrase`: optional, passphrase for the private key - - -## Standalone Mode - -You call also run this plugin in a standalone mode without any DevLake service running using the following command: - -``` -go run plugins/gitextractor/main.go -url https://github.com/merico-dev/lake.git -id github:GithubRepo:384111310 -db "merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True" -``` - -For more options (e.g., saving to a csv file instead of a db), please read `plugins/gitextractor/main.go`. - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. [Click here](./refdiff.md#Development) for a brief guide. - -


diff --git a/versioned_docs/version-v0.11/Plugins/github-connection-in-config-ui.png b/versioned_docs/version-v0.11/Plugins/github-connection-in-config-ui.png deleted file mode 100644 index 5359fb1551b..00000000000 Binary files a/versioned_docs/version-v0.11/Plugins/github-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.11/Plugins/github.md b/versioned_docs/version-v0.11/Plugins/github.md deleted file mode 100644 index cca87b74364..00000000000 --- a/versioned_docs/version-v0.11/Plugins/github.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: "GitHub" -description: > - GitHub Plugin ---- - - - -## Summary - -This plugin gathers data from `GitHub` to display information to the user in `Grafana`. We can help tech leaders answer such questions as: - -- Is this month more productive than last? -- How fast do we respond to customer requirements? -- Was our quality improved or not? - -## Metrics - -Here are some examples metrics using `GitHub` data: -- Avg Requirement Lead Time By Assignee -- Bug Count per 1k Lines of Code -- Commit Count over Time - -## Screenshot - -![image](/img/Plugins/github-demo.png) - - -## Configuration - -### Provider (Datasource) Connection -The connection section of the configuration screen requires the following key fields to connect to the **GitHub API**. - -![connection-in-config-ui](github-connection-in-config-ui.png) - -- **Connection Name** [`READONLY`] - - ⚠️ Defaults to "**Github**" and may not be changed. As GitHub is a _single-source data provider_ at the moment, the connection name is read-only as there is only one instance to manage. As we advance on our development roadmap we may enable _multi-source_ connections for GitHub in the future. -- **Endpoint URL** (REST URL, starts with `https://` or `http://`) - - This should be a valid REST API Endpoint eg. `https://api.github.com/` - - ⚠️ URL should end with`/` -- **Auth Token(s)** (Personal Access Token) - - For help on **Creating a personal access token**, please see official [GitHub Docs on Personal Tokens](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) - - Provide at least one token for Authentication. - - This field accepts a comma-separated list of values for multiple tokens. The data collection will take longer for GitHub since they have a **rate limit of [5,000 requests](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) per hour** (15,000 requests/hour if you pay for `GitHub` enterprise). You can accelerate the process by configuring _multiple_ personal access tokens. - -Click **Save Connection** to update connection settings. - - -### Provider (Datasource) Settings -Manage additional settings and options for the GitHub Datasource Provider. Currently there is only one **optional** setting, *Proxy URL*. If you are behind a corporate firewall or VPN you may need to utilize a proxy server. - -- **GitHub Proxy URL [`Optional`]** -Enter a valid proxy server address on your Network, e.g. `http://your-proxy-server.com:1080` - -Click **Save Settings** to update additional settings. - -### Regular Expression Configuration -Define regex pattern in .env -- GITHUB_PR_BODY_CLOSE_PATTERN: Define key word to associate issue in PR body, please check the example in .env.example - -## Sample Request -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "github", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "github 20211126", - "tasks": [[{ - "plugin": "github", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` -


diff --git a/versioned_docs/version-v0.11/Plugins/gitlab-connection-in-config-ui.png b/versioned_docs/version-v0.11/Plugins/gitlab-connection-in-config-ui.png deleted file mode 100644 index 7aacee8d828..00000000000 Binary files a/versioned_docs/version-v0.11/Plugins/gitlab-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.11/Plugins/gitlab.md b/versioned_docs/version-v0.11/Plugins/gitlab.md deleted file mode 100644 index 21a86d7f7fb..00000000000 --- a/versioned_docs/version-v0.11/Plugins/gitlab.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: "GitLab" -description: > - GitLab Plugin ---- - - -## Metrics - -| Metric Name | Description | -|:----------------------------|:-------------------------------------------------------------| -| Pull Request Count | Number of Pull/Merge Requests | -| Pull Request Pass Rate | Ratio of Pull/Merge Review requests to merged | -| Pull Request Reviewer Count | Number of Pull/Merge Reviewers | -| Pull Request Review Time | Time from Pull/Merge created time until merged | -| Commit Author Count | Number of Contributors | -| Commit Count | Number of Commits | -| Added Lines | Accumulated Number of New Lines | -| Deleted Lines | Accumulated Number of Removed Lines | -| Pull Request Review Rounds | Number of cycles of commits followed by comments/final merge | - -## Configuration - -### Provider (Datasource) Connection -The connection section of the configuration screen requires the following key fields to connect to the **GitLab API**. - -![connection-in-config-ui](gitlab-connection-in-config-ui.png) - -- **Connection Name** [`READONLY`] - - ⚠️ Defaults to "**GitLab**" and may not be changed. As GitLab is a _single-source data provider_ at the moment, the connection name is read-only as there is only one instance to manage. As we advance on our development roadmap we may enable _multi-source_ connections for GitLab in the future. -- **Endpoint URL** (REST URL, starts with `https://` or `http://`) - - This should be a valid REST API Endpoint eg. `https://gitlab.example.com/api/v4/` - - ⚠️ URL should end with`/` -- **Personal Access Token** (HTTP Basic Auth) - - Login to your GitLab Account and create a **Personal Access Token** to authenticate with the API using HTTP Basic Authentication. The token must be 20 characters long. Save the personal access token somewhere safe. After you leave the page, you no longer have access to the token. - - 1. In the top-right corner, select your **avatar**. - 2. Click on **Edit profile**. - 3. On the left sidebar, select **Access Tokens**. - 4. Enter a **name** and optional **expiry date** for the token. - 5. Select the desired **scopes**. - 6. Click on **Create personal access token**. - - For help on **Creating a personal access token**, please see official [GitLab Docs on Personal Tokens](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html). - For an overview of the **GitLab REST API**, please see official [GitLab Docs on REST](https://docs.gitlab.com/ee/development/documentation/restful_api_styleguide.html#restful-api) - -Click **Save Connection** to update connection settings. - -### Provider (Datasource) Settings -There are no additional settings for the GitLab Datasource Provider at this time. - -> NOTE: `GitLab Project ID` Mappings feature has been deprecated. - -## Gathering Data with GitLab - -To collect data, you can make a POST request to `/pipelines` - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitlab 20211126", - "tasks": [[{ - "plugin": "gitlab", - "options": { - "projectId": - } - }]] -} -' -``` - -## Finding Project Id - -To get the project id for a specific `GitLab` repository: -- Visit the repository page on GitLab -- Find the project id just below the title - - ![Screen Shot 2021-08-06 at 4 32 53 PM](https://user-images.githubusercontent.com/3789273/128568416-a47b2763-51d8-4a6a-8a8b-396512bffb03.png) - -> Use this project id in your requests, to collect data from this project - -## ⚠️ (WIP) Create a GitLab API Token - -1. When logged into `GitLab` visit `https://gitlab.com/-/profile/personal_access_tokens` -2. Give the token any name, no expiration date and all scopes (excluding write access) - - ![Screen Shot 2021-08-06 at 4 44 01 PM](https://user-images.githubusercontent.com/3789273/128569148-96f50d4e-5b3b-4110-af69-a68f8d64350a.png) - -3. Click the **Create Personal Access Token** button -4. Save the API token into `.env` file via `cofnig-ui` or edit the file directly. - -


diff --git a/versioned_docs/version-v0.11/Plugins/jenkins.md b/versioned_docs/version-v0.11/Plugins/jenkins.md deleted file mode 100644 index 792165dd94e..00000000000 --- a/versioned_docs/version-v0.11/Plugins/jenkins.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: "Jenkins" -description: > - Jenkins Plugin ---- - -## Summary - -This plugin collects Jenkins data through [Remote Access API](https://www.jenkins.io/doc/book/using/remote-access-api/). It then computes and visualizes various DevOps metrics from the Jenkins data. - -![image](https://user-images.githubusercontent.com/61080/141943122-dcb08c35-cb68-4967-9a7c-87b63c2d6988.png) - -## Metrics - -| Metric Name | Description | -|:-------------------|:------------------------------------| -| Build Count | The number of builds created | -| Build Success Rate | The percentage of successful builds | - -## Configuration - -In order to fully use this plugin, you will need to set various configurations via Dev Lake's `config-ui`. - -### By `config-ui` - -The connection section of the configuration screen requires the following key fields to connect to the Jenkins API. - -- Connection Name [READONLY] - - ⚠️ Defaults to "Jenkins" and may not be changed. As Jenkins is a _single-source data provider_ at the moment, the connection name is read-only as there is only one instance to manage. As we advance on our development roadmap we may enable multi-source connections for Jenkins in the future. -- Endpoint URL (REST URL, starts with `https://` or `http://`i, ends with `/`) - - This should be a valid REST API Endpoint eg. `https://ci.jenkins.io/` -- Username (E-mail) - - Your User ID for the Jenkins Instance. -- Password (Secret Phrase or API Access Token) - - Secret password for common credentials. - - For help on Username and Password, please see official Jenkins Docs on Using Credentials - - Or you can use **API Access Token** for this field, which can be generated at `User` -> `Configure` -> `API Token` section on Jenkins. - -Click Save Connection to update connection settings. - -## Collect Data From Jenkins - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "jenkins", - "options": {} - } - ] -] -``` - -## Relationship between job and build - -Build is kind of a snapshot of job. Running job each time creates a build. -


diff --git a/versioned_docs/version-v0.11/Plugins/jira-connection-config-ui.png b/versioned_docs/version-v0.11/Plugins/jira-connection-config-ui.png deleted file mode 100644 index df2e8e39875..00000000000 Binary files a/versioned_docs/version-v0.11/Plugins/jira-connection-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.11/Plugins/jira-more-setting-in-config-ui.png b/versioned_docs/version-v0.11/Plugins/jira-more-setting-in-config-ui.png deleted file mode 100644 index dffb0c994d2..00000000000 Binary files a/versioned_docs/version-v0.11/Plugins/jira-more-setting-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.11/Plugins/jira.md b/versioned_docs/version-v0.11/Plugins/jira.md deleted file mode 100644 index 8ac28d62377..00000000000 --- a/versioned_docs/version-v0.11/Plugins/jira.md +++ /dev/null @@ -1,253 +0,0 @@ ---- -title: "Jira" -description: > - Jira Plugin ---- - - -## Summary - -This plugin collects Jira data through Jira Cloud REST API. It then computes and visualizes various engineering metrics from the Jira data. - -jira metric display - -## Project Metrics This Covers - -| Metric Name | Description | -|:------------------------------------|:--------------------------------------------------------------------------------------------------| -| Requirement Count | Number of issues with type "Requirement" | -| Requirement Lead Time | Lead time of issues with type "Requirement" | -| Requirement Delivery Rate | Ratio of delivered requirements to all requirements | -| Requirement Granularity | Number of story points associated with an issue | -| Bug Count | Number of issues with type "Bug"
bugs are found during testing | -| Bug Age | Lead time of issues with type "Bug"
both new and deleted lines count | -| Bugs Count per 1k Lines of Code | Amount of bugs per 1000 lines of code | -| Incident Count | Number of issues with type "Incident"
incidents are found when running in production | -| Incident Age | Lead time of issues with type "Incident" | -| Incident Count per 1k Lines of Code | Amount of incidents per 1000 lines of code | - -## Configuration - -In order to fully use this plugin, you will need to set various configurations via Dev Lake's `config-ui` service. Open `config-ui` on browser, by default the URL is http://localhost:4000, then go to **Data Integrations / JIRA** page. JIRA plugin currently supports multiple data connections, Here you can **add** new connection to your JIRA connection or **update** the settings if needed. - -For each connection, you will need to set up following items first: - -![connection at config ui](jira-connection-config-ui.png) - -- Connection Name: This allow you to distinguish different connections. -- Endpoint URL: The JIRA instance API endpoint, for JIRA Cloud Service: `https://.atlassian.net/rest`. DevLake officially supports JIRA Cloud Service on atlassian.net, but may or may not work for JIRA Server Instance. -- Basic Auth Token: First, generate a **JIRA API TOKEN** for your JIRA account on the JIRA console (see [Generating API token](#generating-api-token)), then, in `config-ui` click the KEY icon on the right side of the input to generate a full `HTTP BASIC AUTH` token for you. -- Proxy Url: Just use when you want collect through VPN. - -### More custom configuration -If you want to add more custom config, you can click "settings" to change these config -![More config in config ui](jira-more-setting-in-config-ui.png) -- Issue Type Mapping: JIRA is highly customizable, each JIRA instance may have a different set of issue types than others. In order to compute and visualize metrics for different instances, you need to map your issue types to standard ones. See [Issue Type Mapping](#issue-type-mapping) for detail. -- Epic Key: unfortunately, epic relationship implementation in JIRA is based on `custom field`, which is vary from instance to instance. Please see [Find Out Custom Fields](#find-out-custom-fields). -- Story Point Field: same as Epic Key. -- Remotelink Commit SHA:A regular expression that matches commit links to determine whether an external link is a link to a commit. Taking gitlab as an example, to match all commits similar to https://gitlab.com/merico-dev/ce/example-repository/-/commit/8ab8fb319930dbd8615830276444b8545fd0ad24, you can directly use the regular expression **/commit/([0-9a-f]{40})$** - - -### Generating API token -1. Once logged into Jira, visit the url `https://id.atlassian.com/manage-profile/security/api-tokens` -2. Click the **Create API Token** button, and give it any label name -![image](https://user-images.githubusercontent.com/27032263/129363611-af5077c9-7a27-474a-a685-4ad52366608b.png) - - -### Issue Type Mapping - -Devlake supports 3 standard types, all metrics are computed based on these types: - - - `Bug`: Problems found during the `test` phase, before they can reach the production environment. - - `Incident`: Problems that went through the `test` phase, got deployed into production environment. - - `Requirement`: Normally, it would be `Story` on your instance if you adopted SCRUM. - -You can map arbitrary **YOUR OWN ISSUE TYPE** to a single **STANDARD ISSUE TYPE**. Normally, one would map `Story` to `Requirement`, but you could map both `Story` and `Task` to `Requirement` if that was your case. Unspecified types are copied directly for your convenience, so you don't need to map your `Bug` to standard `Bug`. - -Type mapping is critical for some metrics, like **Requirement Count**, make sure to map your custom type correctly. - -### Find Out Custom Field - -Please follow this guide: [How to find the custom field ID in Jira?](https://github.com/apache/incubator-devlake/wiki/How-to-find-the-custom-field-ID-in-Jira) - - -## Collect Data From JIRA - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -> Warning: Data collection only supports single-task execution, and the results of concurrent multi-task execution may not meet expectations. - -``` -[ - [ - { - "plugin": "jira", - "options": { - "connectionId": 1, - "boardId": 8, - "since": "2006-01-02T15:04:05Z" - } - } - ] -] -``` - -- `connectionId`: The `ID` field from **JIRA Integration** page. -- `boardId`: JIRA board id, see "Find Board Id" for details. -- `since`: optional, download data since a specified date only. - - -### Find Board Id - -1. Navigate to the Jira board in the browser -2. in the URL bar, get the board id from the parameter `?rapidView=` - -**Example:** - -`https://{your_jira_endpoint}/secure/RapidBoard.jspa?rapidView=51` - -![Screenshot](https://user-images.githubusercontent.com/27032263/129363083-df0afa18-e147-4612-baf9-d284a8bb7a59.png) - -Your board id is used in all REST requests to Apache DevLake. You do not need to configure this at the data connection level. - - - -## API - -### Data Connections - -1. Get all data connection - -```GET /plugins/jira/connections -[ - { - "ID": 14, - "CreatedAt": "2021-10-11T11:49:19.029Z", - "UpdatedAt": "2021-10-11T11:49:19.029Z", - "name": "test-jira-connection", - "endpoint": "https://merico.atlassian.net/rest", - "basicAuthEncoded": "basicAuth", - "epicKeyField": "epicKeyField", - "storyPointField": "storyPointField" - } -] -``` - -2. Create a new data connection - -```POST /plugins/jira/connections -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type" - } - } -} -``` - - -3. Update data connection - -```PUT /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -4. Get data connection detail -```GET /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -5. Delete data connection - -```DELETE /plugins/jira/connections/:connectionId -``` - - -### Type mappings - -1. Get all type mappings -```GET /plugins/jira/connections/:connectionId/type-mappings -[ - { - "jiraConnectionId": 16, - "userType": "userType", - "standardType": "standardType" - } -] -``` - -2. Create a new type mapping - -```POST /plugins/jira/connections/:connectionId/type-mappings -{ - "userType": "userType", - "standardType": "standardType" -} -``` - -3. Update type mapping - -```PUT /plugins/jira/connections/:connectionId/type-mapping/:userType -{ - "standardType": "standardTypeUpdated" -} -``` - - -4. Delete type mapping - -```DELETE /plugins/jira/connections/:connectionId/type-mapping/:userType -``` - -5. API forwarding -For example: -Requests to `http://your_devlake_host/plugins/jira/connections/1/proxy/rest/agile/1.0/board/8/sprint` -would be forwarded to `https://your_jira_host/rest/agile/1.0/board/8/sprint` - -```GET /plugins/jira/connections/:connectionId/proxy/rest/*path -{ - "maxResults": 1, - "startAt": 0, - "isLast": false, - "values": [ - { - "id": 7, - "self": "https://merico.atlassian.net/rest/agile/1.0/sprint/7", - "state": "closed", - "name": "EE Sprint 7", - "startDate": "2020-06-12T00:38:51.882Z", - "endDate": "2020-06-26T00:38:00.000Z", - "completeDate": "2020-06-22T05:59:58.980Z", - "originBoardId": 8, - "goal": "" - } - ] -} -``` diff --git a/versioned_docs/version-v0.11/Plugins/refdiff.md b/versioned_docs/version-v0.11/Plugins/refdiff.md deleted file mode 100644 index 12950f4f0b5..00000000000 --- a/versioned_docs/version-v0.11/Plugins/refdiff.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: "RefDiff" -description: > - RefDiff Plugin ---- - - -## Summary - -For development workload analysis, we often need to know how many commits have been created between 2 releases. This plugin calculates which commits differ between 2 Ref (branch/tag), and the result will be stored back into database for further analysis. - -## Important Note - -You need to run gitextractor before the refdiff plugin. The gitextractor plugin should create records in the `refs` table in your DB before this plugin can be run. - -## Configuration - -This is a enrichment plugin based on Domain Layer data, no configuration needed - -## How to use - -In order to trigger the enrichment, you need to insert a new task into your pipeline. - -1. Make sure `commits` and `refs` are collected into your database, `refs` table should contain records like following: -``` -id ref_type -github:GithubRepo:384111310:refs/tags/0.3.5 TAG -github:GithubRepo:384111310:refs/tags/0.3.6 TAG -github:GithubRepo:384111310:refs/tags/0.5.0 TAG -github:GithubRepo:384111310:refs/tags/v0.0.1 TAG -github:GithubRepo:384111310:refs/tags/v0.2.0 TAG -github:GithubRepo:384111310:refs/tags/v0.3.0 TAG -github:GithubRepo:384111310:refs/tags/v0.4.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.1 TAG -``` -2. If you want to run calculateIssuesDiff, please configure GITHUB_PR_BODY_CLOSE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -3. If you want to run calculatePrCherryPick, please configure GITHUB_PR_TITLE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -4. And then, trigger a pipeline like following, you can also define sub tasks, calculateRefDiff will calculate commits between two ref, and creatRefBugStats will create a table to show bug list between two ref: -``` -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "tasks": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "pairs": [ - { "newRef": "refs/tags/v0.6.0", "oldRef": "refs/tags/0.5.0" }, - { "newRef": "refs/tags/0.5.0", "oldRef": "refs/tags/0.4.0" } - ], - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. - -### Ubuntu - -``` -apt install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -### MacOS -1. [MacPorts](https://guide.macports.org/#introduction) install -``` -port install libgit2@1.3.0 -``` -2. Source install -``` -brew install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -#### Troubleshooting (MacOS) - -> Q: I got an error saying: `pkg-config: exec: "pkg-config": executable file not found in $PATH` - -> A: -> 1. Make sure you have pkg-config installed: -> -> `brew install pkg-config` -> -> 2. Make sure your pkg config path covers the installation: -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig` - -


diff --git a/versioned_docs/version-v0.11/Plugins/tapd.md b/versioned_docs/version-v0.11/Plugins/tapd.md deleted file mode 100644 index b8db89fca87..00000000000 --- a/versioned_docs/version-v0.11/Plugins/tapd.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: "TAPD" -description: > - TAPD Plugin ---- - -## Summary - -This plugin collects TAPD data. - -This plugin is in development so you can't modify settings in config-ui. - -## Configuration - -In order to fully use this plugin, you will need to get endpoint/basic_auth_encoded/rate_limit and insert it into table `_tool_tapd_connections`. - diff --git a/versioned_docs/version-v0.11/QuickStart/KubernetesSetup.md b/versioned_docs/version-v0.11/QuickStart/KubernetesSetup.md deleted file mode 100644 index bb7d0908d32..00000000000 --- a/versioned_docs/version-v0.11/QuickStart/KubernetesSetup.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Kubernetes Setup" -description: > - The steps to install Apache DevLake in Kubernetes -sidebar_position: 2 ---- - - -We provide a sample [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) for users interested in deploying Apache DevLake on a k8s cluster. - -[k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) will create a namespace `devlake` on your k8s cluster, and use `nodePort 30004` for `config-ui`, `nodePort 30002` for `grafana` dashboards. If you would like to use certain version of Apache DevLake, please update the image tag of `grafana`, `devlake` and `config-ui` services to specify versions like `v0.10.1`. - -## Step-by-step guide - -1. Download [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) to local machine -2. Some key points: - - `config-ui` deployment: - * `GRAFANA_ENDPOINT`: FQDN of grafana service which can be reached from user's browser - * `DEVLAKE_ENDPOINT`: FQDN of devlake service which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `ADMIN_USER`/`ADMIN_PASS`: Not required, but highly recommended - - `devlake-config` config map: - * `MYSQL_USER`: shared between `mysql` and `grafana` service - * `MYSQL_PASSWORD`: shared between `mysql` and `grafana` service - * `MYSQL_DATABASE`: shared between `mysql` and `grafana` service - * `MYSQL_ROOT_PASSWORD`: set root password for `mysql` service - - `devlake` deployment: - * `DB_URL`: update this value if `MYSQL_USER`, `MYSQL_PASSWORD` or `MYSQL_DATABASE` were changed -3. The `devlake` deployment store its configuration in `/app/.env`. In our sample yaml, we use `hostPath` volume, so please make sure directory `/var/lib/devlake` exists on your k8s workers, or employ other techniques to persist `/app/.env` file. Please do NOT mount the entire `/app` directory, because plugins are located in `/app/bin` folder. -4. Finally, execute the following command, Apache DevLake should be up and running: - ```sh - kubectl apply -f k8s-deploy.yaml - ``` -


diff --git a/versioned_docs/version-v0.11/QuickStart/LocalSetup.md b/versioned_docs/version-v0.11/QuickStart/LocalSetup.md deleted file mode 100644 index 5ae0e0ef9f5..00000000000 --- a/versioned_docs/version-v0.11/QuickStart/LocalSetup.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: "Local Setup" -description: > - The steps to install DevLake locally -sidebar_position: 1 ---- - - -## Prerequisites - -- [Docker v19.03.10+](https://docs.docker.com/get-docker) -- [docker-compose v2.2.3+](https://docs.docker.com/compose/install/) - -## Launch DevLake - -- Commands written `like this` are to be run in your terminal. - -1. Download `docker-compose.yml` and `env.example` from [latest release page](https://github.com/apache/incubator-devlake/releases/latest) into a folder. -2. Rename `env.example` to `.env`. For Mac/Linux users, please run `mv env.example .env` in the terminal. -3. Run `docker-compose up -d` to launch DevLake. - -## Configure data connections and collect data - -1. Visit `config-ui` at `http://localhost:4000` in your browser to configure data connections. - - Navigate to desired plugins on the Integrations page - - Please reference the following for more details on how to configure each one:
- - [Jira](../Plugins/jira.md) - - [GitHub](../Plugins/github.md): For users who'd like to collect GitHub data, we recommend reading our [GitHub data collection guide](../UserManuals/GitHubUserGuide.md) which covers the following steps in detail. - - [GitLab](../Plugins/gitlab.md) - - [Jenkins](../Plugins/jenkins.md) - - Submit the form to update the values by clicking on the **Save Connection** button on each form page - - `devlake` takes a while to fully boot up. if `config-ui` complaining about api being unreachable, please wait a few seconds and try refreshing the page. -2. Create pipelines to trigger data collection in `config-ui` -3. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use [Grafana](https://grafana.com/) as a visualization tool to build charts for the [data](../DataModels/DataSupport.md) stored in our database. - - Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/GrafanaUserGuide.md). -4. To synchronize data periodically, users can set up recurring pipelines with DevLake's [pipeline blueprint](../UserManuals/RecurringPipelines.md) for details. - -## Upgrade to a newer version - -Support for database schema migration was introduced to DevLake in v0.10.0. From v0.10.0 onwards, users can upgrade their instance smoothly to a newer version. However, versions prior to v0.10.0 do not support upgrading to a newer version with a different database schema. We recommend users to deploy a new instance if needed. - -
diff --git a/versioned_docs/version-v0.11/QuickStart/_category_.json b/versioned_docs/version-v0.11/QuickStart/_category_.json deleted file mode 100644 index 133c30f6449..00000000000 --- a/versioned_docs/version-v0.11/QuickStart/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Quick Start", - "position": 2 -} diff --git a/versioned_docs/version-v0.11/UserManuals/AdvancedMode.md b/versioned_docs/version-v0.11/UserManuals/AdvancedMode.md deleted file mode 100644 index d463e889e47..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/AdvancedMode.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: "Advanced Mode" -sidebar_position: 2 -description: > - Advanced Mode ---- - - -## Why advanced mode? - -Advanced mode allows users to create any pipeline by writing JSON. This is useful for users who want to: - -1. Collect multiple GitHub/GitLab repos or Jira projects within a single pipeline -2. Have fine-grained control over what entities to collect or what subtasks to run for each plugin -3. Orchestrate a complex pipeline that consists of multiple stages of plugins. - -Advanced mode gives the most flexibility to users by exposing the JSON API. - -## How to use advanced mode to create pipelines? - -1. Visit the "Create Pipeline Run" page on `config-ui` - -![image](https://user-images.githubusercontent.com/2908155/164569669-698da2f2-47c1-457b-b7da-39dfa7963e09.png) - -2. Scroll to the bottom and toggle on the "Advanced Mode" button - -![image](https://user-images.githubusercontent.com/2908155/164570039-befb86e2-c400-48fe-8867-da44654194bd.png) - -3. The pipeline editor expects a 2D array of plugins. The first dimension represents different stages of the pipeline and the second dimension describes the plugins in each stage. Stages run in sequential order and plugins within the same stage runs in parallel. We provide some templates for users to get started. Please also see the next section for some examples. - -![image](https://user-images.githubusercontent.com/2908155/164576122-fc015fea-ca4a-48f2-b2f5-6f1fae1ab73c.png) - -## Examples - -1. Collect multiple GitLab repos sequentially. - ->When there're multiple collection tasks against a single data source, we recommend running these tasks sequentially since the collection speed is mostly limited by the API rate limit of the data source. ->Running multiple tasks against the same data source is unlikely to speed up the process and may overwhelm the data source. - - -Below is an example for collecting 2 GitLab repos sequentially. It has 2 stages, each contains a GitLab task. - - -``` -[ - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 15238074 - } - } - ], - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 11624398 - } - } - ] -] -``` - - -2. Collect a GitHub repo and a Jira board in parallel - -Below is an example for collecting a GitHub repo and a Jira board in parallel. It has a single stage with a GitHub task and a Jira task. Since users can configure multiple Jira connection, it's required to pass in a `connectionId` for Jira task to specify which connection to use. - -``` -[ - [ - { - "Plugin": "github", - "Options": { - "repo": "lake", - "owner": "merico-dev" - } - }, - { - "Plugin": "jira", - "Options": { - "connectionId": 1, - "boardId": 76 - } - } - ] -] - -``` \ No newline at end of file diff --git a/versioned_docs/version-v0.11/UserManuals/GitHubUserGuide.md b/versioned_docs/version-v0.11/UserManuals/GitHubUserGuide.md deleted file mode 100644 index fa6745610a0..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/GitHubUserGuide.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: "GitHub User Guide" -sidebar_position: 4 -description: > - GitHub User Guide ---- - -## Summary - -GitHub has a rate limit of 5,000 API calls per hour for their REST API. -As a result, it may take hours to collect commits data from GitHub API for a repo that has 10,000+ commits. -To accelerate the process, DevLake introduces GitExtractor, a new plugin that collects git data by cloning the git repo instead of by calling GitHub APIs. - -Starting from v0.10.0, DevLake will collect GitHub data in 2 separate plugins: - -- GitHub plugin (via GitHub API): collect repos, issues, pull requests -- GitExtractor (via cloning repos): collect commits, refs - -Note that GitLab plugin still collects commits via API by default since GitLab has a much higher API rate limit. - -This doc details the process of collecting GitHub data in v0.10.0. We're working on simplifying this process in the next releases. - -Before start, please make sure all services are started. - -## GitHub Data Collection Procedure - -There're 3 steps. - -1. Configure GitHub connection -2. Create a pipeline to run GitHub plugin -3. Create a pipeline to run GitExtractor plugin -4. [Optional] Set up a recurring pipeline to keep data fresh - -### Step 1 - Configure GitHub connection - -1. Visit `config-ui` at `http://localhost:4000` and click the GitHub icon - -2. Click the default connection 'Github' in the list - ![image](https://user-images.githubusercontent.com/14050754/163591959-11d83216-057b-429f-bb35-a9d845b3de5a.png) - -3. Configure connection by providing your GitHub API endpoint URL and your personal access token(s). - ![image](https://user-images.githubusercontent.com/14050754/163592015-b3294437-ce39-45d6-adf6-293e620d3942.png) - -- Endpoint URL: Leave this unchanged if you're using github.com. Otherwise replace it with your own GitHub instance's REST API endpoint URL. This URL should end with '/'. -- Auth Token(s): Fill in your personal access tokens(s). For how to generate personal access tokens, please see GitHub's [official documentation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). -You can provide multiple tokens to speed up the data collection process, simply concatenating tokens with commas. -- GitHub Proxy URL: This is optional. Enter a valid proxy server address on your Network, e.g. http://your-proxy-server.com:1080 - -4. Click 'Test Connection' and see it's working, then click 'Save Connection'. - -5. [Optional] Help DevLake understand your GitHub data by customizing data enrichment rules shown below. - ![image](https://user-images.githubusercontent.com/14050754/163592506-1873bdd1-53cb-413b-a528-7bda440d07c5.png) - - 1. Pull Request Enrichment Options - - 1. `Type`: PRs with label that matches given Regular Expression, their properties `type` will be set to the value of first sub match. For example, with Type being set to `type/(.*)$`, a PR with label `type/bug`, its `type` would be set to `bug`, with label `type/doc`, it would be `doc`. - 2. `Component`: Same as above, but for `component` property. - - 2. Issue Enrichment Options - - 1. `Severity`: Same as above, but for `issue.severity` of course. - - 2. `Component`: Same as above. - - 3. `Priority`: Same as above. - - 4. **Requirement** : Issues with label that matches given Regular Expression, their properties `type` will be set to `REQUIREMENT`. Unlike `PR.type`, submatch does nothing, because for Issue Management Analysis, people tend to focus on 3 kinds of types (Requirement/Bug/Incident), however, the concrete naming varies from repo to repo, time to time, so we decided to standardize them to help analysts make general purpose metrics. - - 5. **Bug**: Same as above, with `type` setting to `BUG` - - 6. **Incident**: Same as above, with `type` setting to `INCIDENT` - -6. Click 'Save Settings' - -### Step 2 - Create a pipeline to collect GitHub data - -1. Select 'Pipelines > Create Pipeline Run' from `config-ui` - -![image](https://user-images.githubusercontent.com/14050754/163592542-8b9d86ae-4f16-492c-8f90-12f1e90c5772.png) - -2. Toggle on GitHub plugin, enter the repo you'd like to collect data from. - -![image](https://user-images.githubusercontent.com/14050754/163592606-92141c7e-e820-4644-b2c9-49aa44f10871.png) - -3. Click 'Run Pipeline' - -You'll be redirected to newly created pipeline: - -![image](https://user-images.githubusercontent.com/14050754/163592677-268e6b77-db3f-4eec-8a0e-ced282f5a361.png) - - -See the pipeline finishes (progress 100%): - -![image](https://user-images.githubusercontent.com/14050754/163592709-cce0d502-92e9-4c19-8504-6eb521b76169.png) - -### Step 3 - Create a pipeline to run GitExtractor plugin - -1. Enable the `GitExtractor` plugin, and enter your `Git URL` and, select the `Repository ID` from dropdown menu. - -![image](https://user-images.githubusercontent.com/2908155/164125950-37822d7f-6ee3-425d-8523-6f6b6213cb89.png) - -2. Click 'Run Pipeline' and wait until it's finished. - -3. Click `View Dashboards` on the top left corner of `config-ui`, the default username and password of Grafana are `admin`. - -![image](https://user-images.githubusercontent.com/61080/163666814-e48ac68d-a0cc-4413-bed7-ba123dd291c8.png) - -4. See dashboards populated with GitHub data. - -### Step 4 - [Optional] Set up a recurring pipeline to keep data fresh - -Please see [How to create recurring pipelines](./RecurringPipelines.md) for details. - - - - - - diff --git a/versioned_docs/version-v0.11/UserManuals/GrafanaUserGuide.md b/versioned_docs/version-v0.11/UserManuals/GrafanaUserGuide.md deleted file mode 100644 index e4757022472..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/GrafanaUserGuide.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: "Grafana User Guide" -sidebar_position: 1 -description: > - Grafana User Guide ---- - - -# Grafana - - - -When first visiting Grafana, you will be provided with a sample dashboard with some basic charts setup from the database. - -## Contents - -Section | Link -:------------ | :------------- -Logging In | [View Section](#logging-in) -Viewing All Dashboards | [View Section](#viewing-all-dashboards) -Customizing a Dashboard | [View Section](#customizing-a-dashboard) -Dashboard Settings | [View Section](#dashboard-settings) -Provisioning a Dashboard | [View Section](#provisioning-a-dashboard) -Troubleshooting DB Connection | [View Section](#troubleshooting-db-connection) - -## Logging In - -Once the app is up and running, visit `http://localhost:3002` to view the Grafana dashboard. - -Default login credentials are: - -- Username: `admin` -- Password: `admin` - -## Viewing All Dashboards - -To see all dashboards created in Grafana visit `/dashboards` - -Or, use the sidebar and click on **Manage**: - -![Screen Shot 2021-08-06 at 11 27 08 AM](https://user-images.githubusercontent.com/3789273/128534617-1992c080-9385-49d5-b30f-be5c96d5142a.png) - - -## Customizing a Dashboard - -When viewing a dashboard, click the top bar of a panel, and go to **edit** - -![Screen Shot 2021-08-06 at 11 35 36 AM](https://user-images.githubusercontent.com/3789273/128535505-a56162e0-72ad-46ac-8a94-70f1c7a910ed.png) - -**Edit Dashboard Panel Page:** - -![grafana-sections](https://user-images.githubusercontent.com/3789273/128540136-ba36ee2f-a544-4558-8282-84a7cb9df27a.png) - -### 1. Preview Area -- **Top Left** is the variable select area (custom dashboard variables, used for switching projects, or grouping data) -- **Top Right** we have a toolbar with some buttons related to the display of the data: - - View data results in a table - - Time range selector - - Refresh data button -- **The Main Area** will display the chart and should update in real time - -> Note: Data should refresh automatically, but may require a refresh using the button in some cases - -### 2. Query Builder -Here we form the SQL query to pull data into our chart, from our database -- Ensure the **Data Source** is the correct database - - ![Screen Shot 2021-08-06 at 10 14 22 AM](https://user-images.githubusercontent.com/3789273/128545278-be4846e0-852d-4bc8-8994-e99b79831d8c.png) - -- Select **Format as Table**, and **Edit SQL** buttons to write/edit queries as SQL - - ![Screen Shot 2021-08-06 at 10 17 52 AM](https://user-images.githubusercontent.com/3789273/128545197-a9ff9cb3-f12d-4331-bf6a-39035043667a.png) - -- The **Main Area** is where the queries are written, and in the top right is the **Query Inspector** button (to inspect returned data) - - ![Screen Shot 2021-08-06 at 10 18 23 AM](https://user-images.githubusercontent.com/3789273/128545557-ead5312a-e835-4c59-b9ca-dd5c08f2a38b.png) - -### 3. Main Panel Toolbar -In the top right of the window are buttons for: -- Dashboard settings (regarding entire dashboard) -- Save/apply changes (to specific panel) - -### 4. Grafana Parameter Sidebar -- Change chart style (bar/line/pie chart etc) -- Edit legends, chart parameters -- Modify chart styling -- Other Grafana specific settings - -## Dashboard Settings - -When viewing a dashboard click on the settings icon to view dashboard settings. Here are 2 important sections to use: - -![Screen Shot 2021-08-06 at 1 51 14 PM](https://user-images.githubusercontent.com/3789273/128555763-4d0370c2-bd4d-4462-ae7e-4b140c4e8c34.png) - -- Variables - - Create variables to use throughout the dashboard panels, that are also built on SQL queries - - ![Screen Shot 2021-08-06 at 2 02 40 PM](https://user-images.githubusercontent.com/3789273/128553157-a8e33042-faba-4db4-97db-02a29036e27c.png) - -- JSON Model - - Copy `json` code here and save it to a new file in `/grafana/dashboards/` with a unique name in the `lake` repo. This will allow us to persist dashboards when we load the app - - ![Screen Shot 2021-08-06 at 2 02 52 PM](https://user-images.githubusercontent.com/3789273/128553176-65a5ae43-742f-4abf-9c60-04722033339e.png) - -## Provisioning a Dashboard - -To save a dashboard in the `lake` repo and load it: - -1. Create a dashboard in browser (visit `/dashboard/new`, or use sidebar) -2. Save dashboard (in top right of screen) -3. Go to dashboard settings (in top right of screen) -4. Click on _JSON Model_ in sidebar -5. Copy code into a new `.json` file in `/grafana/dashboards` - -## Troubleshooting DB Connection - -To ensure we have properly connected our database to the data source in Grafana, check database settings in `./grafana/datasources/datasource.yml`, specifically: -- `database` -- `user` -- `secureJsonData/password` diff --git a/versioned_docs/version-v0.11/UserManuals/RecurringPipelines.md b/versioned_docs/version-v0.11/UserManuals/RecurringPipelines.md deleted file mode 100644 index ce82b1eb00d..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/RecurringPipelines.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: "Recurring Pipelines" -sidebar_position: 3 -description: > - Recurring Pipelines ---- - -## How to create recurring pipelines? - -Once you've verified that a pipeline works, most likely you'll want to run that pipeline periodically to keep data fresh, and DevLake's pipeline blueprint feature have got you covered. - - -1. Click 'Create Pipeline Run' and - - Toggle the plugins you'd like to run, here we use GitHub and GitExtractor plugin as an example - - Toggle on Automate Pipeline - ![image](https://user-images.githubusercontent.com/14050754/163596590-484e4300-b17e-4119-9818-52463c10b889.png) - - -2. Click 'Add Blueprint'. Fill in the form and 'Save Blueprint'. - - - **NOTE**: The schedule syntax is standard unix cron syntax, [Crontab.guru](https://crontab.guru/) is an useful reference - - **IMPORANT**: The scheduler is running using the `UTC` timezone. If you want data collection to happen at 3 AM New York time (UTC-04:00) every day, use **Custom Shedule** and set it to `0 7 * * *` - - ![image](https://user-images.githubusercontent.com/14050754/163596655-db59e154-405f-4739-89f2-7dceab7341fe.png) - -3. Click 'Save Blueprint'. - -4. Click 'Pipeline Blueprints', you can view and edit the new blueprint in the blueprint list. - - ![image](https://user-images.githubusercontent.com/14050754/163596773-4fb4237e-e3f2-4aef-993f-8a1499ca30e2.png) \ No newline at end of file diff --git a/versioned_docs/version-v0.11/UserManuals/TemporalSetup.md b/versioned_docs/version-v0.11/UserManuals/TemporalSetup.md deleted file mode 100644 index f893a830dfd..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/TemporalSetup.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Temporal Setup" -sidebar_position: 5 -description: > - The steps to install DevLake in Temporal mode. ---- - - -Normally, DevLake would execute pipelines on a local machine (we call it `local mode`), it is sufficient most of the time. However, when you have too many pipelines that need to be executed in parallel, it can be problematic, as the horsepower and throughput of a single machine is limited. - -`temporal mode` was added to support distributed pipeline execution, you can fire up arbitrary workers on multiple machines to carry out those pipelines in parallel to overcome the limitations of a single machine. - -But, be careful, many API services like JIRA/GITHUB have a request rate limit mechanism. Collecting data in parallel against the same API service with the same identity would most likely hit such limit. - -## How it works - -1. DevLake Server and Workers connect to the same temporal server by setting up `TEMPORAL_URL` -2. DevLake Server sends a `pipeline` to the temporal server, and one of the Workers pick it up and execute it - - -**IMPORTANT: This feature is in early stage of development. Please use with caution** - - -## Temporal Demo - -### Requirements - -- [Docker](https://docs.docker.com/get-docker) -- [docker-compose](https://docs.docker.com/compose/install/) -- [temporalio](https://temporal.io/) - -### How to setup - -1. Clone and fire up [temporalio](https://temporal.io/) services -2. Clone this repo, and fire up DevLake with command `docker-compose -f docker-compose-temporal.yml up -d` \ No newline at end of file diff --git a/versioned_docs/version-v0.11/UserManuals/_category_.json b/versioned_docs/version-v0.11/UserManuals/_category_.json deleted file mode 100644 index b47bdfd7d09..00000000000 --- a/versioned_docs/version-v0.11/UserManuals/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "User Manuals", - "position": 3 -} diff --git a/versioned_docs/version-v0.12/DataModels/DataSupport.md b/versioned_docs/version-v0.12/DataModels/DataSupport.md deleted file mode 100644 index 4cb4b619131..00000000000 --- a/versioned_docs/version-v0.12/DataModels/DataSupport.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: "Data Support" -description: > - Data sources that DevLake supports -sidebar_position: 1 ---- - - -## Data Sources and Data Plugins -DevLake supports the following data sources. The data from each data source is collected with one or more plugins. There are 9 data plugins in total: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira`, `refdiff` and `tapd`. - - -| Data Source | Versions | Plugins | -|-------------|--------------------------------------|-------- | -| AE | | `ae` | -| Feishu | Cloud |`feishu` | -| GitHub | Cloud |`github`, `gitextractor`, `refdiff` | -| Gitlab | Cloud, Community Edition 13.x+ |`gitlab`, `gitextractor`, `refdiff` | -| Jenkins | 2.263.x+ |`jenkins` | -| Jira | Cloud, Server 8.x+, Data Center 8.x+ |`jira` | -| TAPD | Cloud | `tapd` | - - - -## Data Collection Scope By Each Plugin -This table shows the entities collected by each plugin. Domain layer entities in this table are consistent with the entities [here](./DevLakeDomainLayerSchema.md). - -| Domain Layer Entities | ae | gitextractor | github | gitlab | jenkins | jira | refdiff | tapd | -| --------------------- | -------------- | ------------ | -------------- | ------- | ------- | ------- | ------- | ------- | -| commits | update commits | default | not-by-default | default | | | | | -| commit_parents | | default | | | | | | | -| commit_files | | default | | | | | | | -| pull_requests | | | default | default | | | | | -| pull_request_commits | | | default | default | | | | | -| pull_request_comments | | | default | default | | | | | -| pull_request_labels | | | default | | | | | | -| refs | | default | | | | | | | -| refs_commits_diffs | | | | | | | default | | -| refs_issues_diffs | | | | | | | default | | -| ref_pr_cherry_picks | | | | | | | default | | -| repos | | | default | default | | | | | -| repo_commits | | default | default | | | | | | -| board_repos | | | | | | | | | -| issue_commits | | | | | | | | | -| issue_repo_commits | | | | | | | | | -| pull_request_issues | | | | | | | | | -| refs_issues_diffs | | | | | | | | | -| boards | | | default | | | default | | default | -| board_issues | | | default | | | default | | default | -| issue_changelogs | | | | | | default | | default | -| issues | | | default | | | default | | default | -| issue_comments | | | | | | default | | default | -| issue_labels | | | default | | | | | | -| sprints | | | | | | default | | default | -| issue_worklogs | | | | | | default | | default | -| users o | | | default | | | default | | default | -| builds | | | | | default | | | | -| jobs | | | | | default | | | | - diff --git a/versioned_docs/version-v0.12/DataModels/DevLakeDomainLayerSchema.md b/versioned_docs/version-v0.12/DataModels/DevLakeDomainLayerSchema.md deleted file mode 100644 index 10c80d907a1..00000000000 --- a/versioned_docs/version-v0.12/DataModels/DevLakeDomainLayerSchema.md +++ /dev/null @@ -1,544 +0,0 @@ ---- -title: "Domain Layer Schema" -description: > - DevLake Domain Layer Schema -sidebar_position: 2 ---- - -## Summary - -This document describes the entities in DevLake's domain layer schema and their relationships. - -Data in the domain layer is transformed from the data in the tool layer. The tool layer schema is based on the data from specific tools such as Jira, GitHub, Gitlab, Jenkins, etc. The domain layer schema can be regarded as an abstraction of tool-layer schemas. - -Domain layer schema itself includes 2 logical layers: a `DWD` layer and a `DWM` layer. The DWD layer stores the detailed data points, while the DWM is the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -## Use Cases -1. Users can make customized Grafana dashboards based on the domain layer schema. -2. Contributors can complete the ETL logic when adding new data source plugins refering to this data model. - - -## Data Models - -This is the up-to-date domain layer schema for DevLake v0.10.x. Tables (entities) are categorized into 5 domains. -1. Issue tracking domain entities: Jira issues, GitHub issues, GitLab issues, etc. -2. Source code management domain entities: Git/GitHub/Gitlab commits and refs(tags and branches), etc. -3. Code review domain entities: GitHub PRs, Gitlab MRs, etc. -4. CI/CD domain entities: Jenkins jobs & builds, etc. -5. Cross-domain entities: entities that map entities from different domains to break data isolation. - - -### Schema Diagram -![Domain Layer Schema](/img/DomainLayerSchema/schema-diagram-v0.14.png) - -When reading the schema, you'll notice that many tables' primary key is called `id`. Unlike auto-increment id or UUID, `id` is a string composed of several parts to uniquely identify similar entities (e.g. repo) from different platforms (e.g. Github/Gitlab) and allow them to co-exist in a single table. - -Tables that end with WIP are still under development. - - -### Naming Conventions - -1. The name of a table is in plural form. Eg. boards, issues, etc. -2. The name of a table which describe the relation between 2 entities is in the form of [BigEntity in singular form]\_[SmallEntity in plural form]. Eg. board_issues, sprint_issues, pull_request_comments, etc. -3. Value of the field in enum type are in capital letters. Eg. [table.issues.type](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k) has 3 values, REQUIREMENT, BUG, INCIDENT. Values that are phrases, such as 'IN_PROGRESS' of [table.issues.status](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k), are separated with underscore '\_'. - -
- -## DWD Entities - (Data Warehouse Detail) - -### Domain 1 - Issue Tracking - -#### issues - -An `issue` is the abstraction of Jira/Github/GitLab/TAPD/... issues. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | An issue's `id` is composed of < plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `issue_key` | varchar | 255 | The key of this issue. For example, the key of this Github [issue](https://github.com/apache/incubator-devlake/issues/1145) is 1145. | | -| `url` | varchar | 255 | The url of the issue. It's a web address in most cases. | | -| `title` | varchar | 255 | The title of an issue | | -| `description` | longtext | | The detailed description/summary of an issue | | -| `type` | varchar | 255 | The standard type of this issue. There're 3 standard types: The 3 standard types are transformed from the original types of an issue. The transformation rule is set in the '.env' file or 'config-ui' before data collection. For issues with an original type that has not mapped to a standard type, the value of `type` will be the issue's original type. | | -| `status` | varchar | 255 | The standard statuses of this issue. There're 3 standard statuses: The 3 standard statuses are transformed from the original statuses of an issue. The transformation rule: | | -| `original_status` | varchar | 255 | The original status of an issue. | | -| `story_point` | int | | The story point of this issue. It's default to an empty string for data sources such as Github issues and Gitlab issues. | | -| `priority` | varchar | 255 | The priority of the issue | | -| `component` | varchar | 255 | The component a bug-issue affects. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `severity` | varchar | 255 | The severity level of a bug-issue. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `parent_issue_id` | varchar | 255 | The id of its parent issue | | -| `epic_key` | varchar | 255 | The key of the epic this issue belongs to. For tools with no epic-type issues such as Github and Gitlab, this field is default to an empty string | | -| `original_estimate_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_spent_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_remaining_minutes` | int | | The remaining time to resolve the issue | | -| `creator_id` | varchar | 255 | The id of issue creator | | -| `creator_name` | varchar | 255 | The name of the creator | | -| `assignee_id` | varchar | 255 | The id of issue assignee. | | -| `assignee_name` | varchar | 255 | The name of the assignee | | -| `created_date` | datetime | 3 | The time issue created | | -| `updated_date` | datetime | 3 | The last time issue gets updated | | -| `resolution_date` | datetime | 3 | The time the issue changes to 'DONE'. | | -| `lead_time_minutes` | int | | Describes the cycle time from issue creation to issue resolution. | | - -#### issue_labels - -This table shows the labels of issues. Multiple entries can exist per issue. This table can be used to filter issues by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `name` | varchar | 255 | Label name | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### issue_comments(WIP) - -This table shows the comments of issues. Issues with multiple comments are shown as multiple records. This table can be used to calculate _metric - issue response time_. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------- | -| `id` | varchar | 255 | The unique id of a comment | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `account_id` | varchar | 255 | The id of the account who made the comment | FK_accounts.id | -| `body` | longtext | | The body/detail of the comment | | -| `created_date` | datetime | 3 | The creation date of the comment | | -| `updated_date` | datetime | 3 | The last time comment gets updated | | - -#### issue_changelogs - -This table shows the changelogs of issues. Issues with multiple changelogs are shown as multiple records. This is transformed from Jira or TAPD changelogs. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :--------------------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The unique id of an issue changelog | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `author_id` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `author_name` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `field_id` | varchar | 255 | The id of changed field | | -| `field_name` | varchar | 255 | The id of changed field | | -| `original_from_value` | varchar | 255 | The original value of the changed field | | -| `original_to_value` | varchar | 255 | The new value of the changed field | | -| `from_value` | varchar | 255 | The transformed/standardized original value of the changed field | | -| `to_value` | varchar | 255 | The transformed/standardized new value of the changed field | | -| `created_date` | datetime | 3 | The creation date of the changelog | | - - -#### issue_worklogs - -This table shows the work logged under issues. Usually, an issue has multiple worklogs logged by different developers. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------- | :--------------- | -| `id` | varchar | 255 | The id of the worklog | PK | -| `author_id` | varchar | 255 | The id of the author who logged the work | FK_acccounts.id | -| `comment` | longtext | 255 | The comment made while logging the work. | | -| `time_spent_minutes` | int | | The time logged. The unit of value is normalized to minute. Eg. 1d =) 480, 4h30m =) 270 | | -| `logged_date` | datetime | 3 | The time of this logging action | | -| `started_date` | datetime | 3 | Start time of the worklog | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### boards - -A `board` is an issue list or a collection of issues. It's the abstraction of a Jira board, a Jira project, a [Github issue list](https://github.com/merico-dev/lake/issues) or a GitLab issue list. This table can be used to filter issues by the boards they belong to. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A board's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `name` | varchar | 255 | The name of the board. Note: the board name of a Github project 'merico-dev/lake' is 'merico-dev/lake', representing the [default issue list](https://github.com/merico-dev/lake/issues). | | -| `description` | varchar | 255 | The description of the board. | | -| `url` | varchar | 255 | The url of the board. Eg. https://Github.com/merico-dev/lake | | -| `created_date` | datetime | 3 | Board creation time | | - -#### board_issues - -This table shows the relation between boards and issues. This table can be used to filter issues by board. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | - -#### sprints - -A `sprint` is the abstraction of Jira sprints, TAPD iterations and Github milestones. A sprint contains a list of issues. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------ | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :----------- | -| `id` | varchar | 255 | A sprint's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `name` | varchar | 255 | The name of sprint.
For Github projects, the sprint name is the milestone name. For instance, 'v0.10.0 - Introduce Temporal to DevLake' is the name of this [sprint](https://github.com/apache/incubator-devlake/milestone/5). | | -| `url` | varchar | 255 | The url of sprint. | | -| `status` | varchar | 255 | There're 3 statuses of a sprint: | | -| `started_date` | datetime | 3 | The start time of a sprint | | -| `ended_date` | datetime | 3 | The planned/estimated end time of a sprint. It's usually set when planning a sprint. | | -| `completed_date` | datetime | 3 | The actual time to complete a sprint. | | -| `original_board_id` | datetime | 3 | The id of board where the sprint first created. This field is not null only when this entity is transformed from Jira sprintas.
In Jira, sprint and board entities have 2 types of relation: | FK_boards.id | - -#### sprint_issues - -This table shows the relation between sprints and issues that have been added to sprints. This table can be used to show metrics such as _'ratio of unplanned issues'_, _'completion rate of sprint issues'_, etc - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `is_removed` | bool | | If the issue is removed from this sprint, then TRUE; else FALSE | | -| `added_date` | datetime | 3 | The time this issue added to the sprint. If an issue is added to a sprint multiple times, the latest time will be the value. | | -| `removed_date` | datetime | 3 | The time this issue gets removed from the sprint. If an issue is removed multiple times, the latest time will be the value. | | -| `added_stage` | varchar | 255 | The stage when issue is added to this sprint. There're 3 possible values: | | -| `resolved_stage` | varchar | 255 | The stage when an issue is resolved (issue status turns to 'DONE'). There're 3 possible values: | | - -#### board_sprints - -| **field** | **type** | **length** | **description** | **key** | -| :---------- | :------- | :--------- | :-------------- | :------------ | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | - -
- -### Domain 2 - Source Code Management - -#### repos - -Information about Github or Gitlab repositories. A repository is always owned by a user. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `name` | varchar | 255 | The name of repo. | | -| `description` | varchar | 255 | The description of repo. | | -| `url` | varchar | 255 | The url of repo. Eg. https://Github.com/merico-dev/lake | | -| `owner_id` | varchar | 255 | The id of the owner of repo | FK_accounts.id | -| `language` | varchar | 255 | The major language of repo. Eg. The language for merico-dev/lake is 'Go' | | -| `forked_from` | varchar | 255 | Empty unless the repo is a fork in which case it contains the `id` of the repo the repo is forked from. | | -| `deleted` | tinyint | 255 | 0: repo is active 1: repo has been deleted | | -| `created_date` | datetime | 3 | Repo creation date | | -| `updated_date` | datetime | 3 | Last full update was done for this repo | | - -#### repo_languages(WIP) - -Languages that are used in the repository along with byte counts for all files in those languages. This is in line with how Github calculates language percentages in a repository. Multiple entries can exist per repo. - -The table is filled in when the repo has been first inserted on when an update round for all repos is made. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `language` | varchar | 255 | The language of repo.
These are the [languages](https://api.github.com/repos/merico-dev/lake/languages) for merico-dev/lake | | -| `bytes` | int | | The byte counts for all files in those languages | | -| `created_date` | datetime | 3 | The field is filled in with the latest timestamp the query for a specific `repo_id` was done. | | - -#### repo_commits - -The commits belong to the history of a repository. More than one repos can share the same commits if one is a fork of the other. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### refs - -A ref is the abstraction of a branch or tag. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A ref's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github ref is composed of "github:GithubRepos:< GithubRepoId >:< RefUrl >". Eg. The id of release v5.3.0 of PingCAP/TiDB project is 'github:GithubRepos:384111310:refs/tags/v5.3.0' A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `ref_name` | varchar | 255 | The name of ref. Eg. '[refs/tags/v0.9.3](https://github.com/merico-dev/lake/tree/v0.9.3)' | | -| `repo_id` | varchar | 255 | The id of repo this ref belongs to | FK_repos.id | -| `commit_sha` | char | 40 | The commit this ref points to at the time of collection | | -| `is_default` | int | | | | -| `merge_base` | char | 40 | The merge base commit of the main ref and the current ref | | -| `ref_type` | varchar | 64 | There're 2 typical types: | | - -#### refs_commits_diffs - -This table shows the commits added in a new ref compared to an old ref. This table can be used to support tag-based analysis, for instance, '_No. of commits of a tag_', '_No. of merged pull request of a tag_', etc. - -The records of this table are computed by [RefDiff](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/refdiff) plugin. The computation should be manually triggered after using [GitRepoExtractor](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/gitextractor) to collect commits and refs. The algorithm behind is similar to [this](https://github.com/merico-dev/lake/compare/v0.8.0%E2%80%A6v0.9.0). - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :-------------------------------------------------------------- | :------------- | -| `commit_sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `sorting_index` | varchar | 255 | An index for debugging, please skip it | | - -#### commits - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `message` | varchar | 255 | Commit message | | -| `author_name` | varchar | 255 | The value is set with command `git config user.name xxxxx` commit | | -| `author_email` | varchar | 255 | The value is set with command `git config user.email xxxxx` author | | -| `authored_date` | datetime | 3 | The date when this commit was originally made | | -| `author_id` | varchar | 255 | The id of commit author | FK_accounts.id | -| `committer_name` | varchar | 255 | The name of committer | | -| `committer_email` | varchar | 255 | The email of committer | | -| `committed_date` | datetime | 3 | The last time the commit gets modified.
For example, when rebasing the branch where the commit is in on another branch, the committed_date changes. | | -| `committer_id` | varchar | 255 | The id of committer | FK_accounts.id | -| `additions` | int | | Added lines of code | | -| `deletions` | int | | Deleted lines of code | | -| `dev_eq` | int | | A metric that quantifies the amount of code contribution. The data can be retrieved from [AE plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/ae). | | - -#### commit_files - -The files have been changed via commits. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The `id` is composed of "< Commit_sha >:< file_path >" | FK_commits.sha | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | -| `file_path` | varchar | 255 | Path of a changed file in a commit | | -| `additions` | int | | The added lines of code in this file by the commit | | -| `deletions` | int | | The deleted lines of code in this file by the commit | | - -#### components - -The components of files extracted from the file paths. This can be used to analyze Git metrics by component. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :---------- | -| `repo_id` | varchar | 255 | The repo id | FK_repos.id | -| `name` | varchar | 255 | The name of component | | -| `path_regex` | varchar | 255 | The regex to extract components from this repo's paths | | - -#### commit_file_components - -The relationship between commit_file and component_name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :--------------------------- | :----------------- | -| `commit_file_id` | varchar | 255 | The id of commit file | FK_commit_files.id | -| `component_name` | varchar | 255 | The component name of a file | | - -#### commit_parents - -The parent commit(s) for each commit, as specified by Git. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------- | :------------- | -| `commit_sha` | char | 40 | commit sha | FK_commits.sha | -| `parent` | char | 40 | Parent commit sha | FK_commits.sha | - -
- -### Domain 3 - Code Review - -#### pull_requests - -A pull request is the abstraction of Github pull request and Gitlab merge request. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `id` | char | 40 | A pull request's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." Eg. For 'github:GithubPullRequests:1347' | FK_commits.sha | -| `title` | varchar | 255 | The title of pull request | | -| `description` | longtext | | The body/description of pull request | | -| `status` | varchar | 255 | the status of pull requests. For a Github pull request, the status can either be 'open' or 'closed'. | | -| `parent_pr_id` | varchar | 255 | The id of the parent PR | | -| `pull_request_key` | varchar | 255 | The key of PR. Eg, 1536 is the key of this [PR](https://github.com/apache/incubator-devlake/pull/1563) | | -| `base_repo_id` | varchar | 255 | The repo that will be updated. | | -| `head_reop_id` | varchar | 255 | The repo containing the changes that will be added to the base. If the head repository is NULL, this means that the corresponding project had been deleted when DevLake processed the pull request. | | -| `base_ref` | varchar | 255 | The branch name in the base repo that will be updated | | -| `head_ref` | varchar | 255 | The branch name in the head repo that contains the changes that will be added to the base | | -| `author_name` | varchar | 255 | The author's name of the pull request | | -| `author_id` | varchar | 255 | The author's id of the pull request | | -| `url` | varchar | 255 | the web link of the pull request | | -| `type` | varchar | 255 | The work-type of a pull request. For example: feature-development, bug-fix, docs, etc.
The value is transformed from Github pull request labels by configuring `GITHUB_PR_TYPE` in `.env` file during installation. | | -| `component` | varchar | 255 | The component this PR affects.
The value is transformed from Github/Gitlab pull request labels by configuring `GITHUB_PR_COMPONENT` in `.env` file during installation. | | -| `created_date` | datetime | 3 | The time PR created. | | -| `merged_date` | datetime | 3 | The time PR gets merged. Null when the PR is not merged. | | -| `closed_date` | datetime | 3 | The time PR closed. Null when the PR is not closed. | | -| `merge_commit_sha` | char | 40 | the merge commit of this PR. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), when you click the default Merge pull request option on a pull request on Github, all commits from the feature branch are added to the base branch in a merge commit. | | -| `base_commit_sha` | char | 40 | The base commit of this PR. | | -| `head_commit_sha` | char | 40 | The head commit of this PR. | | - - -#### pull_request_labels - -This table shows the labels of pull request. Multiple entries can exist per pull request. This table can be used to filter pull requests by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `name` | varchar | 255 | Label name | | -| `pull_request_id` | varchar | 255 | Pull request ID | FK_pull_requests.id | - -#### pull_request_commits - -A commit associated with a pull request - -The list is additive. This means if a rebase with commit squashing takes place after the commits of a pull request have been processed, the old commits will not be deleted. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_comments - -Normal comments, review bodies, reviews' inline comments of GitHub's pull requests or GitLab's merge requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Comment id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `body` | longtext | | The body of the comments | | -| `account_id` | varchar | 255 | The account who made the comment | FK_accounts.id | -| `created_date` | datetime | 3 | Comment creation time | | -| `position` | int | | Deprecated | | -| `type` | varchar | 255 | - For normal comments: NORMAL
- For review comments, ie. diff/inline comments: DIFF
- For reviews' body (exist in GitHub but not GitLab): REVIEW | | -| `review_id` | varchar | 255 | Review_id of the comment if the type is `REVIEW` or `DIFF` | | -| `status` | varchar | 255 | Status of the comment | | - - -#### pull_request_events(WIP) - -Events of pull requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Event id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `action` | varchar | 255 | The action to be taken, some values: | | -| `actor_id` | varchar | 255 | The account id of the event performer | FK_accounts.id | -| `created_date` | datetime | 3 | Event creation time | | - -
- -### Domain 4 - CI/CD(WIP) - -#### jobs - -The CI/CD schedule, not a specific task. - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :------- | :--------- | :-------------- | :------ | -| `id` | varchar | 255 | Job id | PK | -| `name` | varchar | 255 | Name of job | | - -#### builds - -A build is an execution of a job. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------- | :--------- | -| `id` | varchar | 255 | Build id | PK | -| `job_id` | varchar | 255 | Id of the job this build belongs to | FK_jobs.id | -| `name` | varchar | 255 | Name of build | | -| `duration_sec` | bigint | | The duration of build in seconds | | -| `started_date` | datetime | 3 | Started time of the build | | -| `status` | varchar | 255 | The result of build. The values may be 'success', 'failed', etc. | | -| `commit_sha` | char | 40 | The specific commit being built on. Nullable. | | - - -### Cross-Domain Entities - -These entities are used to map entities between different domains. They are the key players to break data isolation. - -There're low-level entities such as issue_commits, users, and higher-level cross domain entities such as board_repos - -#### issue_commits - -A low-level mapping between "issue tracking" and "source code management" domain by mapping `issues` and `commits`. Issue(n): Commit(n). - -The original connection between these two entities lies in either issue tracking tools like Jira or source code management tools like GitLab. You have to use tools to accomplish this. - -For example, a common method to connect Jira issue and GitLab commit is a GitLab plugin [Jira Integration](https://docs.gitlab.com/ee/integration/jira/). With this plugin, the Jira issue key in the commit message written by the committers will be parsed. Then, the plugin will add the commit urls under this jira issue. Hence, DevLake's [Jira plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/jira) can get the related commits (including repo, commit_id, url) of an issue. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_issues - -This table shows the issues closed by pull requests. It's a medium-level mapping between "issue tracking" and "source code management" domain by mapping issues and commits. Issue(n): Commit(n). - -The data is extracted from the body of pull requests conforming to certain regular expression. The regular expression can be defined in GITHUB_PR_BODY_CLOSE_PATTERN in the .env file - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :------------------ | :------------------ | -| `pull_request_id` | char | 40 | Pull request id | FK_pull_requests.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `pull_request_number` | varchar | 255 | Pull request key | | -| `issue_number` | varchar | 255 | Issue key | | - -#### board_repos (Deprecated) - -A way to link "issue tracking" and "source code management" domain by mapping `boards` and `repos`. Board(n): Repo(n). - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | - -#### accounts - -This table stores of user accounts across different tools such as GitHub, Jira, GitLab, etc. This table can be joined to get the metadata of all accounts. - metrics, such as _'No. of Issue closed by contributor', 'No. of commits by contributor',_ - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | -| `id` | varchar | 255 | An account's `id` is the identifier of the account of a specific tool. It is composed of "< Plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github account's id is composed of "< github >:< GithubAccounts >:< GithubUserId >)". Eg. 'github:GithubUsers:14050754' | PK | -| `email` | varchar | 255 | Email of the account | | -| `full_name` | varchar | 255 | Full name | | -| `user_name` | varchar | 255 | Username, nickname or Github login of an account | | -| `avatar_url` | varchar | 255 | | | -| `organization` | varchar | 255 | User's organization(s) | | -| `created_date` | datetime | 3 | User creation time | | -| `status` | int | | 0: default, the user is active. 1: the user is not active | | - -#### users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------- | ------- | -| `id` | varchar | 255 | id of a person | PK | -| `email` | varchar | 255 | the primary email of a person | | -| `name` | varchar | 255 | name of a person | | - -#### user_accounts -| **field** | **type** | **length** | **description** | **key** | -| ------------ | -------- | ---------- | --------------- | ---------------- | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | -| `account_id` | varchar | 255 | accounts.id | Composite PK, FK | - -#### teams -| **field** | **type** | **length** | **description** | **key** | -| --------------- | -------- | ---------- | -------------------------------------------------- | ------- | -| `id` | varchar | 255 | id from the data sources, decided by DevLake users | PK | -| `name` | varchar | 255 | name of the team. Eg. team A, team B, etc. | | -| `alias` | varchar | 255 | alias or abbreviation of a team | | -| `parent_id` | varchar | 255 | teams.id, default to null | FK | -| `sorting_index` | int | 255 | the field to sort team | | - -#### team_users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------------------------- | ---------------- | -| `team_id` | varchar | 255 | Full name of the team. Eg. team A, team B, etc. | Composite PK, FK | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | - - -
- -## DWM Entities - (Data Warehouse Middle) - -DWM entities are the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -#### refs_issues_diffs - -This table shows the issues fixed by commits added in a new ref compared to an old one. The data is computed from [table.ref_commits_diff](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#yJOyqa), [table.pull_requests](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#Uc849c), [table.pull_request_commits](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#G9cPfj), and [table.pull_request_issues](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#we6Uac). - -This table can support tag-based analysis, for instance, '_No. of bugs closed in a tag_'. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :----------------------------------------------------- | :----------- | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `issue_number` | varchar | 255 | Issue number | | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | diff --git a/versioned_docs/version-v0.12/DataModels/_category_.json b/versioned_docs/version-v0.12/DataModels/_category_.json deleted file mode 100644 index 7420a65e2de..00000000000 --- a/versioned_docs/version-v0.12/DataModels/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Data Models", - "position": 7 -} diff --git a/versioned_docs/version-v0.12/DeveloperManuals/DBMigration.md b/versioned_docs/version-v0.12/DeveloperManuals/DBMigration.md deleted file mode 100644 index 95302379a39..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/DBMigration.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "DB Migration" -description: > - DB Migration -sidebar_position: 3 ---- - -## Summary -Starting in v0.10.0, DevLake provides a lightweight migration tool for executing migration scripts. -Both framework itself and plugins define their migration scripts in their own migration folder. -The migration scripts are written with gorm in Golang to support different SQL dialects. - - -## Migration Script -Migration script describes how to do database migration. -They implement the `Script` interface. -When DevLake starts, scripts register themselves to the framework by invoking the `Register` function - -```go -type Script interface { - Up(ctx context.Context, db *gorm.DB) error - Version() uint64 - Name() string -} -``` - -## Table `migration_history` - -The table tracks migration scripts execution and schemas changes. -From which, DevLake could figure out the current state of database schemas. - - -## How It Works -1. Check `migration_history` table, calculate all the migration scripts need to be executed. -2. Sort scripts by Version in ascending order. -3. Execute scripts. -4. Save results in the `migration_history` table. diff --git a/versioned_docs/version-v0.12/DeveloperManuals/Dal.md b/versioned_docs/version-v0.12/DeveloperManuals/Dal.md deleted file mode 100644 index 9b085425ae2..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/Dal.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Dal" -sidebar_position: 5 -description: > - The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12 ---- - -## Summary - -The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12. The advantages of introducing this isolation are: - - - Unit Test: Mocking an Interface is easier and more reliable than Patching a Pointer. - - Clean Code: DBS operations are more consistence than using `gorm ` directly. - - Replaceable: It would be easier to replace `gorm` in the future if needed. - -## The Dal Interface - -```go -type Dal interface { - AutoMigrate(entity interface{}, clauses ...Clause) error - Exec(query string, params ...interface{}) error - RawCursor(query string, params ...interface{}) (*sql.Rows, error) - Cursor(clauses ...Clause) (*sql.Rows, error) - Fetch(cursor *sql.Rows, dst interface{}) error - All(dst interface{}, clauses ...Clause) error - First(dst interface{}, clauses ...Clause) error - Count(clauses ...Clause) (int64, error) - Pluck(column string, dest interface{}, clauses ...Clause) error - Create(entity interface{}, clauses ...Clause) error - Update(entity interface{}, clauses ...Clause) error - CreateOrUpdate(entity interface{}, clauses ...Clause) error - CreateIfNotExist(entity interface{}, clauses ...Clause) error - Delete(entity interface{}, clauses ...Clause) error - AllTables() ([]string, error) -} -``` - - -## How to use - -### Query -```go -// Get a database cursor -user := &models.User{} -cursor, err := db.Cursor( - dal.From(user), - dal.Where("department = ?", "R&D"), - dal.Orderby("id DESC"), -) -if err != nil { - return err -} -for cursor.Next() { - err = dal.Fetch(cursor, user) // fetch one record at a time - ... -} - -// Get a database cursor by raw sql query -cursor, err := db.Raw("SELECT * FROM users") - -// USE WITH CAUTIOUS: loading a big table at once is slow and dangerous -// Load all records from database at once. -users := make([]models.Users, 0) -err := db.All(&users, dal.Where("department = ?", "R&D")) - -// Load a column as Scalar or Slice -var email string -err := db.Pluck("email", &username, dal.Where("id = ?", 1)) -var emails []string -err := db.Pluck("email", &emails) - -// Execute query -err := db.Exec("UPDATE users SET department = ? WHERE department = ?", "Research & Development", "R&D") -``` - -### Insert -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Update -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` -### Insert or Update -```go -err := db.CreateOrUpdate(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Insert if record(by PrimaryKey) didn't exist -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Delete -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primary key -}) -``` - -### DDL and others -```go -// Returns all table names -allTables, err := db.AllTables() - -// Automigrate: create/add missing table/columns -// Note: it won't delete any existing columns, nor does it update the column definition -err := db.AutoMigrate(&models.User{}) -``` - -## How to do Unit Test -First, run the command `make mock` to generate the Mocking Stubs, the generated source files should appear in `mocks` folder. -``` -mocks -├── ApiResourceHandler.go -├── AsyncResponseHandler.go -├── BasicRes.go -├── CloseablePluginTask.go -├── ConfigGetter.go -├── Dal.go -├── DataConvertHandler.go -├── ExecContext.go -├── InjectConfigGetter.go -├── InjectLogger.go -├── Iterator.go -├── Logger.go -├── Migratable.go -├── PluginApi.go -├── PluginBlueprintV100.go -├── PluginInit.go -├── PluginMeta.go -├── PluginTask.go -├── RateLimitedApiClient.go -├── SubTaskContext.go -├── SubTaskEntryPoint.go -├── SubTask.go -└── TaskContext.go -``` -With these Mocking stubs, you may start writing your TestCases using the `mocks.Dal`. -```go -import "github.com/apache/incubator-devlake/mocks" - -func TestCreateUser(t *testing.T) { - mockDal := new(mocks.Dal) - mockDal.On("Create", mock.Anything, mock.Anything).Return(nil).Once() - userService := &services.UserService{ - Dal: mockDal, - } - userService.Post(map[string]interface{}{ - "email": "helle@example.com", - "name": "hello", - "department": "R&D", - }) - mockDal.AssertExpectations(t) -``` - diff --git a/versioned_docs/version-v0.12/DeveloperManuals/DeveloperSetup.md b/versioned_docs/version-v0.12/DeveloperManuals/DeveloperSetup.md deleted file mode 100644 index a3f56c57a65..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/DeveloperSetup.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -title: "Developer Setup" -description: > - The steps to install DevLake in develper mode. -sidebar_position: 1 ---- - - -## Requirements - -- Docker v19.03.10+ -- Golang v1.17+ -- Make - - Mac (Already installed) - - Windows: [Download](http://gnuwin32.sourceforge.net/packages/make.htm) - - Ubuntu: `sudo apt-get install build-essential libssl-dev` - -## How to setup dev environment -1. Navigate to where you would like to install this project and clone the repository: - - ```sh - git clone https://github.com/apache/incubator-devlake - cd incubator-devlake - ``` - -2. Install dependencies for plugins: - - - [RefDiff](../Plugins/refdiff.md#development) - -3. Install Go packages - - ```sh - go get - ``` - -4. Copy the sample config file to new local file: - - ```sh - cp .env.example .env - ``` - -5. Update the following variables in the file `.env`: - - * `DB_URL`: Replace `mysql:3306` with `127.0.0.1:3306` - -6. Start the MySQL and Grafana containers: - - > Make sure the Docker daemon is running before this step. - - ```sh - docker-compose up -d mysql grafana - ``` - -7. Run lake and config UI in dev mode in two separate terminals: - - ```sh - # install mockery - go install github.com/vektra/mockery/v2@latest - # generate mocking stubs - make mock - # run lake - make dev - # run config UI - make configure-dev - ``` - - Q: I got an error saying: `libgit2.so.1.3: cannot open share object file: No such file or directory` - - A: Make sure your program can find `libgit2.so.1.3`. `LD_LIBRARY_PATH` can be assigned like this if your `libgit2.so.1.3` is located at `/usr/local/lib`: - - ```sh - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib - ``` - -8. Visit config UI at `localhost:4000` to configure data connections. - - Please follow the [tutorial](UserManuals/ConfigUI/Tutorial.md) - - Submit the form to update the values by clicking on the **Save Connection** button on each form page - -9. Visit `localhost:4000/pipelines/create` to RUN a Pipeline and trigger data collection. - - - Pipelines Runs can be initiated by the new "Create Run" Interface. Simply enable the **Data Connection Providers** you wish to run collection for, and specify the data you want to collect, for instance, **Project ID** for Gitlab and **Repository Name** for GitHub. - - Once a valid pipeline configuration has been created, press **Create Run** to start/run the pipeline. - After the pipeline starts, you will be automatically redirected to the **Pipeline Activity** screen to monitor collection activity. - - **Pipelines** is accessible from the main menu of the config-ui for easy access. - - - Manage All Pipelines: `http://localhost:4000/pipelines` - - Create Pipeline RUN: `http://localhost:4000/pipelines/create` - - Track Pipeline Activity: `http://localhost:4000/pipelines/activity/[RUN_ID]` - - For advanced use cases and complex pipelines, please use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - - ```json - [ - [ - { - "plugin": "github", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] - ] - ``` - - Please refer to [Pipeline Advanced Mode](../UserManuals/ConfigUI/AdvancedMode.md) for in-depth explanation. - - -10. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use Grafana as a visualization tool to build charts for the data stored in our database. Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - -11. (Optional) To run the tests: - - ```sh - make test - ``` - -12. For DB migrations, please refer to [Migration Doc](../DeveloperManuals/DBMigration.md). - diff --git a/versioned_docs/version-v0.12/DeveloperManuals/Notifications.md b/versioned_docs/version-v0.12/DeveloperManuals/Notifications.md deleted file mode 100644 index 23456b4f1e7..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/Notifications.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Notifications" -description: > - Notifications -sidebar_position: 4 ---- - -## Request -Example request -``` -POST /lake/notify?nouce=3-FDXxIootApWxEVtz&sign=424c2f6159bd9e9828924a53f9911059433dc14328a031e91f9802f062b495d5 - -{"TaskID":39,"PluginName":"jenkins","CreatedAt":"2021-09-30T15:28:00.389+08:00","UpdatedAt":"2021-09-30T15:28:00.785+08:00"} -``` - -## Configuration -If you want to use the notification feature, you should add two configuration key to `.env` file. -```shell -# .env -# notification request url, e.g.: http://example.com/lake/notify -NOTIFICATION_ENDPOINT= -# secret is used to calculate signature -NOTIFICATION_SECRET= -``` - -## Signature -You should check the signature before accepting the notification request. We use sha256 algorithm to calculate the checksum. -```go -// calculate checksum -sum := sha256.Sum256([]byte(requestBody + NOTIFICATION_SECRET + nouce)) -return hex.EncodeToString(sum[:]) -``` diff --git a/versioned_docs/version-v0.12/DeveloperManuals/PluginImplementation.md b/versioned_docs/version-v0.12/DeveloperManuals/PluginImplementation.md deleted file mode 100644 index fe7816b4447..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/PluginImplementation.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: "Plugin Implementation" -sidebar_position: 2 -description: > - Plugin Implementation ---- - -## How to Implement a DevLake plugin? - -If your favorite DevOps tool is not yet supported by DevLake, don't worry. It's not difficult to implement a DevLake plugin. In this post, we'll go through the basics of DevLake plugins and build an example plugin from scratch together. - -## What is a plugin? - -A DevLake plugin is a shared library built with Go's `plugin` package that hooks up to DevLake core at run-time. - -A plugin may extend DevLake's capability in three ways: - -1. Integrating with new data sources -2. Transforming/enriching existing data -3. Exporting DevLake data to other data systems - - -## How do plugins work? - -A plugin mainly consists of a collection of subtasks that can be executed by DevLake core. For data source plugins, a subtask may be collecting a single entity from the data source (e.g., issues from Jira). Besides the subtasks, there're hooks that a plugin can implement to customize its initialization, migration, and more. See below for a list of the most important interfaces: - -1. [PluginMeta](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_meta.go) contains the minimal interface that a plugin should implement, with only two functions - - Description() returns the description of a plugin - - RootPkgPath() returns the root package path of a plugin -2. [PluginInit](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_init.go) allows a plugin to customize its initialization -3. [PluginTask](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_task.go) enables a plugin to prepare data prior to subtask execution -4. [PluginApi](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_api.go) lets a plugin exposes some self-defined APIs -5. [PluginMigration](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_migration.go) is where a plugin manages its database migrations - -The diagram below shows the control flow of executing a plugin: - -```mermaid -flowchart TD; - subgraph S4[Step4 sub-task extractor running process]; - direction LR; - D4[DevLake]; - D4 -- "Step4.1 create a new\n ApiExtractor\n and execute it" --> E["ExtractXXXMeta.\nEntryPoint"]; - E <-- "Step4.2 read from\n raw table" --> E2["RawDataSubTaskArgs\n.Table"]; - E -- "Step4.3 call with RawData" --> ApiExtractor.Extract; - ApiExtractor.Extract -- "decode and return gorm models" --> E - end - subgraph S3[Step3 sub-task collector running process] - direction LR - D3[DevLake] - D3 -- "Step3.1 create a new\n ApiCollector\n and execute it" --> C["CollectXXXMeta.\nEntryPoint"]; - C <-- "Step3.2 create\n raw table" --> C2["RawDataSubTaskArgs\n.RAW_BBB_TABLE"]; - C <-- "Step3.3 build query\n before sending requests" --> ApiCollectorArgs.\nQuery/UrlTemplate; - C <-. "Step3.4 send requests by ApiClient \n and return HTTP response" .-> A1["HTTP APIs"]; - C <-- "Step3.5 call and \nreturn decoded data \nfrom HTTP response" --> ResponseParser; - end - subgraph S2[Step2 DevLake register custom plugin] - direction LR - D2[DevLake] - D2 <-- "Step2.1 function \`Init\` \nneed to do init jobs" --> plugin.Init; - D2 <-- "Step2.2 (Optional) call \nand return migration scripts" --> plugin.MigrationScripts; - D2 <-- "Step2.3 (Optional) call \nand return taskCtx" --> plugin.PrepareTaskData; - D2 <-- "Step2.4 call and \nreturn subTasks for execting" --> plugin.SubTaskContext; - end - subgraph S1[Step1 Run DevLake] - direction LR - main -- "Transfer of control \nby \`runner.DirectRun\`" --> D1[DevLake]; - end - S1-->S2-->S3-->S4 -``` -There's a lot of information in the diagram but we don't expect you to digest it right away, simply use it as a reference when you go through the example below. - -## A step-by-step guide towards your first plugin - -In this guide, we'll walk through how to create a data source plugin from scratch. - -The example in this tutorial comes from DevLake's own needs of managing [CLAs](https://en.wikipedia.org/wiki/Contributor_License_Agreement). Whenever DevLake receives a new PR on GitHub, we need to check if the author has signed a CLA by referencing `https://people.apache.org/public/icla-info.json`. This guide will demonstrate how to collect the ICLA info from Apache API, cache the raw response, and extract the raw data into a relational table ready to be queried. - -### Step 1: Bootstrap the new plugin - -**Note:** Please make sure you have DevLake up and running before proceeding. - -> More info about plugin: -> Generally, we need these folders in plugin folders: `api`, `models` and `tasks` -> `api` interacts with `config-ui` for test/get/save connection of data source -> - connection [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/api/connection.go) -> - connection model [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/models/connection.go) -> `models` stores all `data entities` and `data migration scripts`. -> - entity -> - data migrations [template](https://github.com/apache/incubator-devlake/tree/main/generator/template/migrationscripts) -> `tasks` contains all of our `sub tasks` for a plugin -> - task data [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data.go-template) -> - api client [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data_with_api_client.go-template) - -Don't worry if you cannot figure out what these concepts mean immediately. We'll explain them one by one later. - -DevLake provides a generator to create a plugin conveniently. Let's scaffold our new plugin by running `go run generator/main.go create-plugin icla`, which would ask for `with_api_client` and `Endpoint`. - -* `with_api_client` is used for choosing if we need to request HTTP APIs by api_client. -* `Endpoint` use in which site we will request, in our case, it should be `https://people.apache.org/`. - -![create plugin](https://i.imgur.com/itzlFg7.png) - -Now we have three files in our plugin. `api_client.go` and `task_data.go` are in subfolder `tasks/`. -![plugin files](https://i.imgur.com/zon5waf.png) - -Have a try to run this plugin by function `main` in `plugin_main.go`. When you see result like this: -``` -$go run plugins/icla/plugin_main.go -[2022-06-02 18:07:30] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-02 18:07:30] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-02 18:07:30] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-02 18:07:30] INFO [icla] total step: 0 -``` -How exciting. It works! The plugin defined and initiated in `plugin_main.go` use some options in `task_data.go`. They are made up as the most straightforward plugin in Apache DevLake, and `api_client.go` will be used in the next step to request HTTP APIs. - -### Step 2: Create a sub-task for data collection -Before we start, it is helpful to know how collection task is executed: -1. First, Apache DevLake would call `plugin_main.PrepareTaskData()` to prepare needed data before any sub-tasks. We need to create an API client here. -2. Then Apache DevLake will call the sub-tasks returned by `plugin_main.SubTaskMetas()`. Sub-task is an independent task to do some job, like requesting API, processing data, etc. - -> Each sub-task must be defined as a SubTaskMeta, and implement SubTaskEntryPoint of SubTaskMeta. SubTaskEntryPoint is defined as -> ```go -> type SubTaskEntryPoint func(c SubTaskContext) error -> ``` -> More info at: https://devlake.apache.org/blog/how-DevLake-is-up-and-running/ - -#### Step 2.1 Create a sub-task(Collector) for data collection - -Let's run `go run generator/main.go create-collector icla committer` and confirm it. This sub-task is activated by registering in `plugin_main.go/SubTaskMetas` automatically. - -![](https://i.imgur.com/tkDuofi.png) - -> - Collector will collect data from HTTP or other data sources, and save the data into the raw layer. -> - Inside the func `SubTaskEntryPoint` of `Collector`, we use `helper.NewApiCollector` to create an object of [ApiCollector](https://github.com/apache/incubator-devlake/blob/main/backend/generator/template/plugin/tasks/api_collector.go-template), then call `execute()` to do the job. - -Now you can notice `data.ApiClient` is inited in `plugin_main.go/PrepareTaskData.ApiClient`. `PrepareTaskData` create a new `ApiClient`, and it's a tool Apache DevLake suggests to request data from HTTP Apis. This tool support some valuable features for HttpApi, like rateLimit, proxy and retry. Of course, if you like, you may use the lib `http` instead, but it will be more tedious. - -Let's move forward to use it. - -1. To collect data from `https://people.apache.org/public/icla-info.json`, -we have filled `https://people.apache.org/` into `tasks/api_client.go/ENDPOINT` in Step 1. - -![](https://i.imgur.com/q8Zltnl.png) - -2. And fill `public/icla-info.json` into `UrlTemplate`, delete unnecessary iterator and add `println("receive data:", res)` in `ResponseParser` to see if collection was successful. - -![](https://i.imgur.com/ToLMclH.png) - -Ok, now the collector sub-task has been added to the plugin, and we can kick it off by running `main` again. If everything goes smoothly, the output should look like this: -```bash -[2022-06-06 12:24:52] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 12:24:52] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 12:24:52] INFO [icla] total step: 1 -[2022-06-06 12:24:52] INFO [icla] executing subtask CollectCommitter -[2022-06-06 12:24:52] INFO [icla] [CollectCommitter] start api collection -receive data: 0x140005763f0 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 12:24:55] INFO [icla] finished step: 1 / 1 -``` - -Great! Now we can see data pulled from the server without any problem. The last step is to decode the response body in `ResponseParser` and return it to the framework, so it can be stored in the database. -```go -ResponseParser: func(res *http.Response) ([]json.RawMessage, error) { - body := &struct { - LastUpdated string `json:"last_updated"` - Committers json.RawMessage `json:"committers"` - }{} - err := helper.UnmarshalResponse(res, body) - if err != nil { - return nil, err - } - println("receive data:", len(body.Committers)) - return []json.RawMessage{body.Committers}, nil -}, - -``` -Ok, run the function `main` once again, then it turned out like this, and we should be able see some records show up in the table `_raw_icla_committer`. -```bash -…… -receive data: 272956 /* <- the number means 272956 models received */ -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 13:46:57] INFO [icla] finished step: 1 / 1 -``` - -![](https://i.imgur.com/aVYNMRr.png) - -#### Step 2.2 Create a sub-task(Extractor) to extract data from the raw layer - -> - Extractor will extract data from raw layer and save it into tool db table. -> - Except for some pre-processing, the main flow is similar to the collector. - -We have already collected data from HTTP API and saved them into the DB table `_raw_XXXX`. In this step, we will extract the names of committers from the raw data. As you may infer from the name, raw tables are temporary and not easy to use directly. - -Now Apache DevLake suggests to save data by [gorm](https://gorm.io/docs/index.html), so we will create a model by gorm and add it into `plugin_main.go/AutoSchemas.Up()`. - -plugins/icla/models/committer.go -```go -package models - -import ( - "github.com/apache/incubator-devlake/models/common" -) - -type IclaCommitter struct { - UserName string `gorm:"primaryKey;type:varchar(255)"` - Name string `gorm:"primaryKey;type:varchar(255)"` - common.NoPKModel -} - -func (IclaCommitter) TableName() string { - return "_tool_icla_committer" -} -``` - -plugins/icla/plugin_main.go -![](https://i.imgur.com/4f0zJty.png) - - -Ok, run the plugin, and table `_tool_icla_committer` will be created automatically just like the snapshot below: -![](https://i.imgur.com/7Z324IX.png) - -Next, let's run `go run generator/main.go create-extractor icla committer` and type in what the command prompt asks for. - -![](https://i.imgur.com/UyDP9Um.png) - -Let's look at the function `extract` in `committer_extractor.go` created just now, and some codes need to be written here. It's obviously `resData.data` is raw data, so we could decode them by json and add new `IclaCommitter` to save them. -```go -Extract: func(resData *helper.RawData) ([]interface{}, error) { - names := &map[string]string{} - err := json.Unmarshal(resData.Data, names) - if err != nil { - return nil, err - } - extractedModels := make([]interface{}, 0) - for userName, name := range *names { - extractedModels = append(extractedModels, &models.IclaCommitter{ - UserName: userName, - Name: name, - })fco - } - return extractedModels, nil -}, -``` - -Ok, run it then we get: -``` -[2022-06-06 15:39:40] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 15:39:40] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 15:39:40] INFO [icla] total step: 2 -[2022-06-06 15:39:40] INFO [icla] executing subtask CollectCommitter -[2022-06-06 15:39:40] INFO [icla] [CollectCommitter] start api collection -receive data: 272956 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 15:39:44] INFO [icla] finished step: 1 / 2 -[2022-06-06 15:39:44] INFO [icla] executing subtask ExtractCommitter -[2022-06-06 15:39:46] INFO [icla] [ExtractCommitter] finished records: 1 -[2022-06-06 15:39:46] INFO [icla] finished step: 2 / 2 -``` -Now committer data have been saved in _tool_icla_committer. -![](https://i.imgur.com/6svX0N2.png) - -#### Step 2.3 Convertor - -Notes: There are two ways here (open source or using it yourself). It is unnecessary, but we encourage it because convertors and the domain layer will significantly help build dashboards. More info about the domain layer at: https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/ - -> - Convertor will convert data from the tool layer and save it into the domain layer. -> - We use `helper.NewDataConverter` to create an object of [DataConvertor], then call `execute()`. - -#### Step 2.4 Let's try it -Sometimes OpenApi will be protected by token or other auth types, and we need to log in to gain a token to visit it. For example, only after logging in `private@apahce.com` could we gather the data about contributors signing ICLA. Here we briefly introduce how to authorize DevLake to collect data. - -Let's look at `api_client.go`. `NewIclaApiClient` load config `ICLA_TOKEN` by `.env`, so we can add `ICLA_TOKEN=XXXXXX` in `.env` and use it in `apiClient.SetHeaders()` to mock the login status. Code as below: -![](https://i.imgur.com/dPxooAx.png) - -Of course, we can use `username/password` to get a token after login mockery. Just try and adjust according to the actual situation. - -Look for more related details at https://github.com/apache/incubator-devlake - -#### Final step: Submit the code as open source code -Good ideas and we encourage contributions~ Let's learn about migration scripts and domain layers to write normative and platform-neutral codes. More info at https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema or contact us for ebullient help. - - -## Done! - -Congratulations! The first plugin has been created! 🎖 diff --git a/versioned_docs/version-v0.12/DeveloperManuals/TagNamingConventions.md b/versioned_docs/version-v0.12/DeveloperManuals/TagNamingConventions.md deleted file mode 100644 index 7195070f6cb..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/TagNamingConventions.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: "Tag Naming Conventions" -description: > - Tag Naming Conventions -sidebar_position: 6 ---- - -Please refer to the rules when creating a new tag for Apache DevLake -- alpha: internal testing/preview, i.e. v0.12.0-alpha1 -- beta: communtity/customer testing/preview, i.e. v0.12.0-beta1 -- rc: asf release candidate, i.e. v0.12.0-rc1 - - diff --git a/versioned_docs/version-v0.12/DeveloperManuals/_category_.json b/versioned_docs/version-v0.12/DeveloperManuals/_category_.json deleted file mode 100644 index 4c4363329e9..00000000000 --- a/versioned_docs/version-v0.12/DeveloperManuals/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Developer Manuals", - "position": 6 -} diff --git a/versioned_docs/version-v0.12/EngineeringMetrics.md b/versioned_docs/version-v0.12/EngineeringMetrics.md deleted file mode 100644 index 35805b293fe..00000000000 --- a/versioned_docs/version-v0.12/EngineeringMetrics.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -sidebar_position: 8 -title: "Engineering Metrics" -linkTitle: "Engineering Metrics" -tags: [] -description: > - The definition, values and data required for the 20+ engineering metrics supported by DevLake. ---- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CategoryMetric NameDefinitionData RequiredUse Scenarios and Recommended PracticesValue            
Delivery VelocityRequirement CountNumber of issues in type "Requirement"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -
2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -
3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -
4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog.
1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -
2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources.
Requirement Delivery RateRatio of delivered requirements to all requirementsIssue/Task Management entities: Jira issues, GitHub issues, etc
Requirement Lead TimeLead time of issues with type "Requirement"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the trend of requirement lead time to observe if it has improved over time. -
2. Analyze and compare the requirement lead time of each project/team to identify key projects with abnormal lead time. -
3. Drill down to analyze a requirement's staying time in different phases of SDLC. Analyze the bottleneck of delivery velocity and improve the workflow.
1. Analyze key projects and critical points, identify good/to-be-improved practices that affect requirement lead time, and reduce the risk of delays -
2. Focus on the end-to-end velocity of value delivery process; coordinate different parts of R&D to avoid efficiency shafts; make targeted improvements to bottlenecks.
Requirement GranularityNumber of story points associated with an issueIssue/Task Management entities: Jira issues, GitHub issues, etc -1. Analyze the story points/requirement lead time of requirements to evaluate whether the ticket size, ie. requirement complexity is optimal. -
2. Compare the estimated requirement granularity with the actual situation and evaluate whether the difference is reasonable by combining more microscopic workload metrics (e.g. lines of code/code equivalents)
1. Promote product teams to split requirements carefully, improve requirements quality, help developers understand requirements clearly, deliver efficiently and with high quality, and improve the project management capability of the team. -
2. Establish a data-supported workload estimation model to help R&D teams calibrate their estimation methods and more accurately assess the granularity of requirements, which is useful to achieve better issue planning in project management.
Commit CountNumber of CommitsSource Code Management entities: Git/GitHub/GitLab commits -1. Identify the main reasons for the unusual number of commits and the possible impact on the number of commits through comparison -
2. Evaluate whether the number of commits is reasonable in conjunction with more microscopic workload metrics (e.g. lines of code/code equivalents)
1. Identify potential bottlenecks that may affect output -
2. Encourage R&D practices of small step submissions and develop excellent coding habits
Added Lines of CodeAccumulated number of added lines of codeSource Code Management entities: Git/GitHub/GitLab commits -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -
2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -
3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison.
1. identify potential bottlenecks that may affect the output -
2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits
Deleted Lines of CodeAccumulated number of deleted lines of codeSource Code Management entities: Git/GitHub/GitLab commits
Pull Request Review TimeTime from Pull/Merge created time until mergedSource Code Management entities: GitHub PRs, GitLab MRs, etc -1. Observe the mean and distribution of code review time from the project/team/individual dimension to assess the rationality of the review time1. Take inventory of project/team code review resources to avoid lack of resources and backlog of review sessions, resulting in long waiting time -
2. Encourage teams to implement an efficient and responsive code review mechanism
Bug AgeLead time of issues in type "Bug"Issue/Task Management entities: Jira issues, GitHub issues, etc -1. Observe the trend of bug age and locate the key reasons.
-2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of bug and incident age.
1. Help the team to establish an effective hierarchical response mechanism for bugs and incidents. Focus on the resolution of important problems in the backlog.
-2. Improve team's and individual's bug/incident fixing efficiency. Identify good/to-be-improved practices that affect bug age or incident age
Incident AgeLead time of issues in type "Incident"Issue/Task Management entities: Jira issues, GitHub issues, etc
Delivery QualityPull Request CountNumber of Pull/Merge RequestsSource Code Management entities: GitHub PRs, GitLab MRs, etc -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds.
-2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds.
-3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks.
1. Code review metrics are process indicators to provide quick feedback on developers' code quality
-2. Promote the team to establish a unified coding specification and standardize the code review criteria
-3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation
Pull Request Pass RateRatio of Pull/Merge Review requests to mergedSource Code Management entities: GitHub PRs, GitLab MRs, etc
Pull Request Review RoundsNumber of cycles of commits followed by comments/final mergeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Pull Request Review CountNumber of Pull/Merge ReviewersSource Code Management entities: GitHub PRs, GitLab MRs, etc1. As a secondary indicator, assess the cost of labor invested in the code review process1. Take inventory of project/team code review resources to avoid long waits for review sessions due to insufficient resource input
Bug CountNumber of bugs found during testingIssue/Task Management entities: Jira issues, GitHub issues, etc -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc.
-2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality
-3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points
-4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values
1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process
-2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts
-3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects
Incident CountNumber of Incidents found after shippingSource Code Management entities: GitHub PRs, GitLab MRs, etc
Bugs Count per 1k Lines of CodeAmount of bugs per 1,000 lines of codeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Incidents Count per 1k Lines of CodeAmount of incidents per 1,000 lines of codeSource Code Management entities: GitHub PRs, GitLab MRs, etc
Delivery CostCommit Author CountNumber of Contributors who have committed codeSource Code Management entities: Git/GitHub/GitLab commits1. As a secondary indicator, this helps assess the labor cost of participating in coding1. Take inventory of project/team R&D resource inputs, assess input-output ratio, and rationalize resource deployment
Delivery CapabilityBuild CountThe number of builds startedCI/CD entities: Jenkins PRs, GitLabCI MRs, etc1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks
-2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time
1. As a process indicator, it reflects the value flow efficiency of upstream production and research links
-2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery
Build DurationThe duration of successful buildsCI/CD entities: Jenkins PRs, GitLabCI MRs, etc
Build Success RateThe percentage of successful buildsCI/CD entities: Jenkins PRs, GitLabCI MRs, etc
-


diff --git a/versioned_docs/version-v0.12/Glossary.md b/versioned_docs/version-v0.12/Glossary.md deleted file mode 100644 index 3e91e880394..00000000000 --- a/versioned_docs/version-v0.12/Glossary.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -sidebar_position: 10 -title: "Glossary" -linkTitle: "Glossary" -tags: [] -categories: [] -weight: 6 -description: > - DevLake Glossary ---- - -*Last updated: May 16 2022* - - -## In Configuration UI (Regular Mode) - -The following terms are arranged in the order of their appearance in the actual user workflow. - -### Blueprints -**A blueprint is the plan that covers all the work to get your raw data ready for query and metric computation in the dashboards.** Creating a blueprint consists of four steps: -1. **Adding [Data Connections](Glossary.md#data-connections)**: For each [data source](Glossary.md#data-sources), one or more data connections can be added to a single blueprint, depending on the data you want to sync to DevLake. -2. **Setting the [Data Scope](Glossary.md#data-scope)**: For each data connection, you need to configure the scope of data, such as GitHub projects, Jira boards, and their corresponding [data entities](Glossary.md#data-entities). -3. **Adding [Transformation Rules](Glossary.md#transformation-rules) (optional)**: You can optionally apply transformation for the data scope you have just selected, in order to view more advanced metrics. -3. **Setting the Sync Frequency**: You can specify the sync frequency for your blueprint to achieve recurring data syncs and transformation. Alternatively, you can set the frequency to manual if you wish to run the tasks in the blueprint manually. - -The relationship among Blueprint, Data Connections, Data Scope and Transformation Rules is explained as follows: - -![Blueprint ERD](/img/Glossary/blueprint-erd.svg) -- Each blueprint can have multiple data connections. -- Each data connection can have multiple sets of data scope. -- Each set of data scope only consists of one GitHub/GitLab project or Jira board, along with their corresponding data entities. -- Each set of data scope can only have one set of transformation rules. - -### Data Sources -**A data source is a specific DevOps tool from which you wish to sync your data, such as GitHub, GitLab, Jira and Jenkins.** - -DevLake normally uses one [data plugin](Glossary.md#data-plugins) to pull data for a single data source. However, in some cases, DevLake uses multiple data plugins for one data source for the purpose of improved sync speed, among many other advantages. For instance, when you pull data from GitHub or GitLab, aside from the GitHub or GitLab plugin, Git Extractor is also used to pull data from the repositories. In this case, DevLake still refers GitHub or GitLab as a single data source. - -### Data Connections -**A data connection is a specific instance of a data source that stores information such as `endpoint` and `auth`.** A single data source can have one or more data connections (e.g. two Jira instances). Currently, DevLake supports one data connection for GitHub, GitLab and Jenkins, and multiple connections for Jira. - -You can set up a new data connection either during the first step of creating a blueprint, or in the Connections page that can be accessed from the navigation bar. Because one single data connection can be reused in multiple blueprints, you can update the information of a particular data connection in Connections, to ensure all its associated blueprints will run properly. For example, you may want to update your GitHub token in a data connection if it goes expired. - -### Data Scope -**In a blueprint, each data connection can have multiple sets of data scope configurations, including GitHub or GitLab projects, Jira boards and their corresponding[data entities](Glossary.md#data-entities).** The fields for data scope configuration vary according to different data sources. - -Each set of data scope refers to one GitHub or GitLab project, or one Jira board and the data entities you would like to sync for them, for the convenience of applying transformation in the next step. For instance, if you wish to sync 5 GitHub projects, you will have 5 sets of data scope for GitHub. - -To learn more about the default data scope of all data sources and data plugins, please refer to [Data Support](./DataModels/DataSupport.md). - -### Data Entities -**Data entities refer to the data fields from one of the five data domains: Issue Tracking, Source Code Management, Code Review, CI/CD and Cross-Domain.** - -For instance, if you wish to pull Source Code Management data from GitHub and Issue Tracking data from Jira, you can check the corresponding data entities during setting the data scope of these two data connections. - -To learn more details, please refer to [Domain Layer Schema](./DataModels/DevLakeDomainLayerSchema.md). - -### Transformation Rules -**Transformation rules are a collection of methods that allow you to customize how DevLake normalizes raw data for query and metric computation.** Each set of data scope is strictly accompanied with one set of transformation rules. However, for your convenience, transformation rules can also be duplicated across different sets of data scope. - -DevLake uses these normalized values in the transformation to design more advanced dashboards, such as the Weekly Bug Retro dashboard. Although configuring transformation rules is not mandatory, if you leave the rules blank or have not configured correctly, only the basic dashboards (e.g. GitHub Basic Metrics) will be displayed as expected, while the advanced dashboards will not. - -### Historical Runs -**A historical run of a blueprint is an actual execution of the data collection and transformation [tasks](Glossary.md#tasks) defined in the blueprint at its creation.** A list of historical runs of a blueprint is the entire running history of that blueprint, whether executed automatically or manually. Historical runs can be triggered in three ways: -- By the blueprint automatically according to its schedule in the Regular Mode of the Configuration UI -- By running the JSON in the Advanced Mode of the Configuration UI -- By calling the API `/pipelines` endpoint manually - -However, the name Historical Runs is only used in the Configuration UI. In DevLake API, they are called [pipelines](Glossary.md#pipelines). - -## In Configuration UI (Advanced Mode) and API - -The following terms have not appeared in the Regular Mode of Configuration UI for simplification, but can be very useful if you want to learn about the underlying framework of DevLake or use Advanced Mode and the DevLake API. - -### Data Plugins -**A data plugin is a specific module that syncs or transforms data.** There are two types of data plugins: Data Collection Plugins and Data Transformation Plugins. - -Data Collection Plugins pull data from one or more data sources. DevLake supports 8 data plugins in this category: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira` and `tapd`. - -Data Transformation Plugins transform the data pulled by other Data Collection Plugins. `refdiff` is currently the only plugin in this category. - -Although the names of the data plugins are not displayed in the regular mode of DevLake Configuration UI, they can be used directly in JSON in the Advanced Mode. - -For detailed information about the relationship between data sources and data plugins, please refer to [Data Support](./DataModels/DataSupport.md). - - -### Pipelines -**A pipeline is an orchestration of [tasks](Glossary.md#tasks) of data `collection`, `extraction`, `conversion` and `enrichment`, defined in the DevLake API.** A pipeline is composed of one or multiple [stages](Glossary.md#stages) that are executed in a sequential order. Any error occurring during the execution of any stage, task or subtask will cause the immediate fail of the pipeline. - -The composition of a pipeline is explained as follows: -![Blueprint ERD](/img/Glossary/pipeline-erd.svg) -Notice: **You can manually orchestrate the pipeline in Configuration UI Advanced Mode and the DevLake API; whereas in Configuration UI regular mode, an optimized pipeline orchestration will be automatically generated for you.** - - -### Stages -**A stages is a collection of tasks performed by data plugins.** Stages are executed in a sequential order in a pipeline. - -### Tasks -**A task is a collection of [subtasks](Glossary.md#subtasks) that perform any of the `collection`, `extraction`, `conversion` and `enrichment` jobs of a particular data plugin.** Tasks are executed in a parallel order in any stages. - -### Subtasks -**A subtask is the minimal work unit in a pipeline that performs in any of the four roles: `Collectors`, `Extractors`, `Converters` and `Enrichers`.** Subtasks are executed in sequential orders. -- `Collectors`: Collect raw data from data sources, normally via DevLake API and stored into `raw data table` -- `Extractors`: Extract data from `raw data table` to `domain layer tables` -- `Converters`: Convert data from `tool layer tables` into `domain layer tables` -- `Enrichers`: Enrich data from one domain to other domains. For instance, the Fourier Transformation can examine `issue_changelog` to show time distribution of an issue on every assignee. diff --git a/versioned_docs/version-v0.12/Overview/Architecture.md b/versioned_docs/version-v0.12/Overview/Architecture.md deleted file mode 100755 index d4c6a9c5340..00000000000 --- a/versioned_docs/version-v0.12/Overview/Architecture.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Architecture" -description: > - Understand the architecture of Apache DevLake -sidebar_position: 2 ---- - -## Architecture Overview - -

-

DevLake Components

- -A DevLake installation typically consists of the following components: - -- Config UI: A handy user interface to create, trigger, and debug Blueprints. A Blueprint specifies the where (data connection), what (data scope), how (transformation rule), and when (sync frequency) of a data pipeline. -- API Server: The main programmatic interface of DevLake. -- Runner: The runner does all the heavy-lifting for executing tasks. In the default DevLake installation, it runs within the API Server, but DevLake provides a temporal-based runner (beta) for production environments. -- Database: The database stores both DevLake's metadata and user data collected by data pipelines. DevLake supports MySQL and PostgreSQL as of v0.11. -- Plugins: Plugins enable DevLake to collect and analyze dev data from any DevOps tools with an accessible API. DevLake community is actively adding plugins for popular DevOps tools, but if your preferred tool is not covered yet, feel free to open a GitHub issue to let us know or check out our doc on how to build a new plugin by yourself. -- Dashboards: Dashboards deliver data and insights to DevLake users. A dashboard is simply a collection of SQL queries along with corresponding visualization configurations. DevLake's official dashboard tool is Grafana and pre-built dashboards are shipped in Grafana's JSON format. Users are welcome to swap for their own choice of dashboard/BI tool if desired. - -## Dataflow - -

-

DevLake Dataflow

- -A typical plugin's dataflow is illustrated below: - -1. The Raw layer stores the API responses from data sources (DevOps tools) in JSON. This saves developers' time if the raw data is to be transformed differently later on. Please note that communicating with data sources' APIs is usually the most time-consuming step. -2. The Tool layer extracts raw data from JSONs into a relational schema that's easier to consume by analytical tasks. Each DevOps tool would have a schema that's tailored to their data structure, hence the name, the Tool layer. -3. The Domain layer attempts to build a layer of abstraction on top of the Tool layer so that analytics logics can be re-used across different tools. For example, GitHub's Pull Request (PR) and GitLab's Merge Request (MR) are similar entities. They each have their own table name and schema in the Tool layer, but they're consolidated into a single entity in the Domain layer, so that developers only need to implement metrics like Cycle Time and Code Review Rounds once against the domain layer schema. - -## Principles - -1. Extensible: DevLake's plugin system allows users to integrate with any DevOps tool. DevLake also provides a dbt plugin that enables users to define their own data transformation and analysis workflows. -2. Portable: DevLake has a modular design and provides multiple options for each module. Users of different setups can freely choose the right configuration for themselves. -3. Robust: DevLake provides an SDK to help plugins efficiently and reliably collect data from data sources while respecting their API rate limits and constraints. - -
diff --git a/versioned_docs/version-v0.12/Overview/Introduction.md b/versioned_docs/version-v0.12/Overview/Introduction.md deleted file mode 100755 index 94c58865e27..00000000000 --- a/versioned_docs/version-v0.12/Overview/Introduction.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Introduction" -description: General introduction of Apache DevLake -sidebar_position: 1 ---- - -## What is Apache DevLake? -Apache DevLake is an open-source dev data platform that ingests, analyzes, and visualizes the fragmented data from DevOps tools to distill insights for engineering productivity. - -Apache DevLake is designed for developer teams looking to make better sense of their development process and to bring a more data-driven approach to their own practices. You can ask Apache DevLake many questions regarding your development process. Just connect and query. - -## What can be accomplished with DevLake? -1. Collect DevOps data across the entire Software Development Life Cycle (SDLC) and connect the siloed data with a standard [data model](../DataModels/DevLakeDomainLayerSchema.md). -2. Visualize out-of-the-box engineering [metrics](../EngineeringMetrics.md) in a series of use-case driven dashboards -3. Easily extend DevLake to support your data sources, metrics, and dashboards with a flexible [framework](Architecture.md) for data collection and ETL (Extract, Transform, Load). - -## How do I use DevLake? -### 1. Set up DevLake -You can easily set up Apache DevLake by following our step-by step instructions for [Docker Compose setup](../QuickStart/DockerComposeSetup.md) or [Kubernetes setup](../QuickStart/KubernetesSetup.md). - -### 2. Create a Blueprint -The DevLake Configuration UI will guide you through the process (a Blueprint) to define the data connections, data scope, transformation and sync frequency of the data you wish to collect. - -![img](/img/Introduction/userflow1.svg) - -### 3. Track the Blueprint's progress -You can track the progress of the Blueprint you have just set up. - -![img](/img/Introduction/userflow2.svg) - -### 4. View the pre-built dashboards -Once the first run of the Blueprint is completed, you can view the corresponding dashboards. - -![img](/img/Introduction/userflow3.png) - -### 5. Customize the dahsboards with SQL -If the pre-built dashboards are limited for your use cases, you can always customize or create your own metrics or dashboards with SQL. - -![img](/img/Introduction/userflow4.png) diff --git a/versioned_docs/version-v0.12/Overview/Roadmap.md b/versioned_docs/version-v0.12/Overview/Roadmap.md deleted file mode 100644 index 9dcf0b3dc0f..00000000000 --- a/versioned_docs/version-v0.12/Overview/Roadmap.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Roadmap" -description: > - The goals and roadmap for DevLake in 2022 -sidebar_position: 3 ---- - - -## Goals -DevLake has joined the Apache Incubator and is aiming to become a top-level project. To achieve this goal, the Apache DevLake (Incubating) community will continue to make efforts in helping development teams to analyze and improve their engineering productivity. In the 2022 Roadmap, we have summarized three major goals followed by the feature breakdown to invite the broader community to join us and grow together. - -1. As a dev data analysis application, discover and implement 3 (or even more!) usage scenarios: - - A collection of metrics to track the contribution, quality and growth of open-source projects - - DORA metrics for DevOps engineers - - To be decided ([let us know](https://join.slack.com/t/devlake-io/shared_invite/zt-17b6vuvps-x98pqseoUagM7EAmKC82xQ) if you have any suggestions!) -2. As dev data infrastructure, provide robust data collection modules, customizable data models, and data extensibility. -3. Design better user experience for end-users and contributors. - -## Feature Breakdown -Apache DevLake is currently under rapid development. You are more than welcome to use the following table to explore your intereted features and make contributions. We deeply appreciate the collective effort of our community to make this project possible! - -| Category | Features| -| --- | --- | -| More data sources across different [DevOps domains](../DataModels/DevLakeDomainLayerSchema.md) (Goal No.1 & 2)| Features in **bold** are of higher priority

Issue/Task Management: Source Code Management: Code Review: CI/CD: Quality: QA: Calendar: OSS Community Metrics: | -| Improved data collection, [data models](../DataModels/DevLakeDomainLayerSchema.md) and data extensibility (Goal No.2)| Data Collection:
Data Models: Data Extensibility: | -| Better user experience (Goal No.3) | For new users: For returning users: For contributors: | - - -## How to Influence the Roadmap -A roadmap is only useful when it captures real user needs. We are glad to hear from you if you have specific use cases, feedback, or ideas. You can submit an issue to let us know! -Also, if you plan to work (or are already working) on a new or existing feature, tell us, so that we can update the roadmap accordingly. We are happy to share knowledge and context to help your feature land successfully. -


- diff --git a/versioned_docs/version-v0.12/Overview/_category_.json b/versioned_docs/version-v0.12/Overview/_category_.json deleted file mode 100644 index e224ed81cd3..00000000000 --- a/versioned_docs/version-v0.12/Overview/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Overview", - "position": 1 -} diff --git a/versioned_docs/version-v0.12/Plugins/_category_.json b/versioned_docs/version-v0.12/Plugins/_category_.json deleted file mode 100644 index 72c1aa5fa39..00000000000 --- a/versioned_docs/version-v0.12/Plugins/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Plugins", - "position": 9 -} diff --git a/versioned_docs/version-v0.12/Plugins/dbt.md b/versioned_docs/version-v0.12/Plugins/dbt.md deleted file mode 100644 index 059bf12c61d..00000000000 --- a/versioned_docs/version-v0.12/Plugins/dbt.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "DBT" -description: > - DBT Plugin ---- - - -## Summary - -dbt (data build tool) enables analytics engineers to transform data in their warehouses by simply writing select statements. dbt handles turning these select statements into tables and views. -dbt does the T in ELT (Extract, Load, Transform) processes – it doesn’t extract or load data, but it’s extremely good at transforming data that’s already loaded into your warehouse. - -## User setup -- If you plan to use this product, you need to install some environments first. - -#### Required Packages to Install -- [python3.7+](https://www.python.org/downloads/) -- [dbt-mysql](https://pypi.org/project/dbt-mysql/#configuring-your-profile) - -#### Commands to run or create in your terminal and the dbt project -1. pip install dbt-mysql -2. dbt init demoapp (demoapp is project name) -3. create your SQL transformations and data models - -## Convert Data By DBT - -Use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - -```json -[ - [ - { - "plugin": "dbt", - "options": { - "projectPath": "/Users/abeizn/demoapp", - "projectName": "demoapp", - "projectTarget": "dev", - "selectedModels": ["my_first_dbt_model","my_second_dbt_model"], - "projectVars": { - "demokey1": "demovalue1", - "demokey2": "demovalue2" - } - } - } - ] -] -``` - -- `projectPath`: the absolute path of the dbt project. (required) -- `projectName`: the name of the dbt project. (required) -- `projectTarget`: this is the default target your dbt project will use. (optional) -- `selectedModels`: a model is a select statement. Models are defined in .sql files, and typically in your models directory. (required) -And selectedModels accepts one or more arguments. Each argument can be one of: -1. a package name, runs all models in your project, example: example -2. a model name, runs a specific model, example: my_fisrt_dbt_model -3. a fully-qualified path to a directory of models. - -- `projectVars`: variables to parametrize dbt models. (optional) -example: -`select * from events where event_type = '{{ var("event_type") }}'` -To execute this SQL query in your model, you need set a value for `event_type`. - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers - -


diff --git a/versioned_docs/version-v0.12/Plugins/feishu.md b/versioned_docs/version-v0.12/Plugins/feishu.md deleted file mode 100644 index 306f3bd9893..00000000000 --- a/versioned_docs/version-v0.12/Plugins/feishu.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: "Feishu" -description: > - Feishu Plugin ---- - -## Summary - -This plugin collects Feishu meeting data through [Feishu Openapi](https://open.feishu.cn/document/home/user-identity-introduction/introduction). - -## Configuration - -In order to fully use this plugin, you will need to get `app_id` and `app_secret` from a Feishu administrator (for help on App info, please see [official Feishu Docs](https://open.feishu.cn/document/ukTMukTMukTM/ukDNz4SO0MjL5QzM/auth-v3/auth/tenant_access_token_internal)), - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/feishu/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu", - "endpoint": "https://open.feishu.cn/open-apis/vc/v1/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "appId": "", - "appSecret": "" -} -' -``` - -## Collect data from Feishu - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - - -```json -[ - [ - { - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - } - ] -] -``` - -> `numOfDaysToCollect`: The number of days you want to collect - -> `rateLimitPerSecond`: The number of requests to send(Maximum is 8) - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu 20211126", - "plan": [[{ - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.12/Plugins/gitee.md b/versioned_docs/version-v0.12/Plugins/gitee.md deleted file mode 100644 index 79c3c907dac..00000000000 --- a/versioned_docs/version-v0.12/Plugins/gitee.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: "Gitee(WIP)" -description: > - Gitee Plugin ---- - -## Summary - -This plugin collects `Gitee` data through [Gitee Openapi](https://gitee.com/api/v5/swagger). - -## Configuration - -In order to fully use this plugin, you will need to get `token` on the Gitee website. - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/gitee/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee", - "endpoint": "https://gitee.com/api/v5/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "token": "" -} -' -``` - - - -## Collect data from Gitee - -In order to collect data, you have to compose a JSON looks like following one, and send it by selecting `Advanced Mode` on `Create Pipeline Run` page: - -1. Configure-UI Mode -```json -[ - [ - { - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` -and if you want to perform certain subtasks. -```json -[ - [ - { - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -2. Curl Mode: - You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` -and if you want to perform certain subtasks. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.12/Plugins/gitextractor.md b/versioned_docs/version-v0.12/Plugins/gitextractor.md deleted file mode 100644 index 33393298434..00000000000 --- a/versioned_docs/version-v0.12/Plugins/gitextractor.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: "GitExtractor" -description: > - GitExtractor Plugin ---- - -## Summary -This plugin extracts commits and references from a remote or local git repository. It then saves the data into the database or csv files. - -## Steps to make this plugin work - -1. Use the Git repo extractor to retrieve data about commits and branches from your repository. -2. Use the GitHub plugin to retrieve data about Github issues and PRs from your repository. -NOTE: you can run only one issue collection stage as described in the Github Plugin README. -3. Use the [RefDiff](./refdiff.md) plugin to calculate version diff, which will be stored in `refs_commits_diffs` table. - -## Sample Request - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "git repo extractor", - "plan": [ - [ - { - "Plugin": "gitextractor", - "Options": { - "url": "https://github.com/merico-dev/lake.git", - "repoId": "github:GithubRepo:384111310" - } - } - ] - ] -} -' -``` -- `url`: the location of the git repository. It should start with `http`/`https` for a remote git repository and with `/` for a local one. -- `repoId`: column `id` of `repos`. -- `proxy`: optional, http proxy, e.g. `http://your-proxy-server.com:1080`. -- `user`: optional, for cloning private repository using HTTP/HTTPS -- `password`: optional, for cloning private repository using HTTP/HTTPS -- `privateKey`: optional, for SSH cloning, base64 encoded `PEM` file -- `passphrase`: optional, passphrase for the private key - - -## Standalone Mode - -You call also run this plugin in a standalone mode without any DevLake service running using the following command: - -``` -go run plugins/gitextractor/main.go -url https://github.com/merico-dev/lake.git -id github:GithubRepo:384111310 -db "merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True" -``` - -For more options (e.g., saving to a csv file instead of a db), please read `plugins/gitextractor/main.go`. - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. [Click here](./refdiff.md#Development) for a brief guide. - -


diff --git a/versioned_docs/version-v0.12/Plugins/github-connection-in-config-ui.png b/versioned_docs/version-v0.12/Plugins/github-connection-in-config-ui.png deleted file mode 100644 index 5359fb1551b..00000000000 Binary files a/versioned_docs/version-v0.12/Plugins/github-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.12/Plugins/github.md b/versioned_docs/version-v0.12/Plugins/github.md deleted file mode 100644 index fd804a14569..00000000000 --- a/versioned_docs/version-v0.12/Plugins/github.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "GitHub" -description: > - GitHub Plugin ---- - - - -## Summary - -This plugin gathers data from `GitHub` to display information to the user in `Grafana`. We can help tech leaders answer such questions as: - -- Is this month more productive than last? -- How fast do we respond to customer requirements? -- Was our quality improved or not? - -## Metrics - -Here are some examples metrics using `GitHub` data: -- Avg Requirement Lead Time By Assignee -- Bug Count per 1k Lines of Code -- Commit Count over Time - -## Screenshot - -![image](/img/Plugins/github-demo.png) - - -## Configuration -- Configuring GitHub via [config-ui](/UserManuals/ConfigUI/GitHub.md). - -## Sample Request -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "github 20211126", - "plan": [[{ - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.12/Plugins/gitlab-connection-in-config-ui.png b/versioned_docs/version-v0.12/Plugins/gitlab-connection-in-config-ui.png deleted file mode 100644 index 7aacee8d828..00000000000 Binary files a/versioned_docs/version-v0.12/Plugins/gitlab-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.12/Plugins/gitlab.md b/versioned_docs/version-v0.12/Plugins/gitlab.md deleted file mode 100644 index d2270f6604c..00000000000 --- a/versioned_docs/version-v0.12/Plugins/gitlab.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: "GitLab" -description: > - GitLab Plugin ---- - - -## Metrics - -| Metric Name | Description | -|:----------------------------|:-------------------------------------------------------------| -| Pull Request Count | Number of Pull/Merge Requests | -| Pull Request Pass Rate | Ratio of Pull/Merge Review requests to merged | -| Pull Request Reviewer Count | Number of Pull/Merge Reviewers | -| Pull Request Review Time | Time from Pull/Merge created time until merged | -| Commit Author Count | Number of Contributors | -| Commit Count | Number of Commits | -| Added Lines | Accumulated Number of New Lines | -| Deleted Lines | Accumulated Number of Removed Lines | -| Pull Request Review Rounds | Number of cycles of commits followed by comments/final merge | - -## Configuration -Configuring GitLab via [config-ui](/UserManuals/ConfigUI/GitLab.md). - -## Gathering Data with GitLab - -To collect data, you can either utilize the `config-ui` or make a POST request to `/pipelines` - -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitlab 20211126", - "plan": [[{ - "plugin": "gitlab", - "options": { - "connectionId": 1, - "projectId": - } - }]] -} -' -``` - -


diff --git a/versioned_docs/version-v0.12/Plugins/jenkins.md b/versioned_docs/version-v0.12/Plugins/jenkins.md deleted file mode 100644 index 9bb0177d0b6..00000000000 --- a/versioned_docs/version-v0.12/Plugins/jenkins.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: "Jenkins" -description: > - Jenkins Plugin ---- - -## Summary - -This plugin collects Jenkins data through [Remote Access API](https://www.jenkins.io/doc/book/using/remote-access-api/). It then computes and visualizes various DevOps metrics from the Jenkins data. - -![image](https://user-images.githubusercontent.com/61080/141943122-dcb08c35-cb68-4967-9a7c-87b63c2d6988.png) - -## Metrics - -| Metric Name | Description | -|:-------------------|:------------------------------------| -| Build Count | The number of builds created | -| Build Success Rate | The percentage of successful builds | - -## Configuration - -In order to fully use this plugin, you will need to set various configurations via Dev Lake's `config-ui`. - -### By `config-ui` - -The connection section of the configuration screen requires the following key fields to connect to the Jenkins API. - -## Collect Data From Jenkins - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "jenkins", - "options": { - "connectionId": 1 - } - } - ] -] -``` - -## Relationship between job and build - -Build is kind of a snapshot of job. Running job each time creates a build. diff --git a/versioned_docs/version-v0.12/Plugins/jira-connection-config-ui.png b/versioned_docs/version-v0.12/Plugins/jira-connection-config-ui.png deleted file mode 100644 index df2e8e39875..00000000000 Binary files a/versioned_docs/version-v0.12/Plugins/jira-connection-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.12/Plugins/jira-more-setting-in-config-ui.png b/versioned_docs/version-v0.12/Plugins/jira-more-setting-in-config-ui.png deleted file mode 100644 index dffb0c994d2..00000000000 Binary files a/versioned_docs/version-v0.12/Plugins/jira-more-setting-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.12/Plugins/jira.md b/versioned_docs/version-v0.12/Plugins/jira.md deleted file mode 100644 index 77b065d3de8..00000000000 --- a/versioned_docs/version-v0.12/Plugins/jira.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: "Jira" -description: > - Jira Plugin ---- - - -## Summary - -This plugin collects Jira data through Jira Cloud REST API. It then computes and visualizes various engineering metrics from the Jira data. - -jira metric display - -## Project Metrics This Covers - -| Metric Name | Description | -|:------------------------------------|:--------------------------------------------------------------------------------------------------| -| Requirement Count | Number of issues with type "Requirement" | -| Requirement Lead Time | Lead time of issues with type "Requirement" | -| Requirement Delivery Rate | Ratio of delivered requirements to all requirements | -| Requirement Granularity | Number of story points associated with an issue | -| Bug Count | Number of issues with type "Bug"
bugs are found during testing | -| Bug Age | Lead time of issues with type "Bug"
both new and deleted lines count | -| Bugs Count per 1k Lines of Code | Amount of bugs per 1000 lines of code | -| Incident Count | Number of issues with type "Incident"
incidents are found when running in production | -| Incident Age | Lead time of issues with type "Incident" | -| Incident Count per 1k Lines of Code | Amount of incidents per 1000 lines of code | - -## Configuration -Configuring Jira via [config-ui](/UserManuals/ConfigUI/Jira.md). - -## Collect Data From JIRA - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -> Warning: Data collection only supports single-task execution, and the results of concurrent multi-task execution may not meet expectations. - -``` -[ - [ - { - "plugin": "jira", - "options": { - "connectionId": 1, - "boardId": 8, - "since": "2006-01-02T15:04:05Z" - } - } - ] -] -``` - -- `connectionId`: The `ID` field from **JIRA Integration** page. -- `boardId`: JIRA board id, see "Find Board Id" for details. -- `since`: optional, download data since a specified date only. - - diff --git a/versioned_docs/version-v0.12/Plugins/refdiff.md b/versioned_docs/version-v0.12/Plugins/refdiff.md deleted file mode 100644 index a177281eca9..00000000000 --- a/versioned_docs/version-v0.12/Plugins/refdiff.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: "RefDiff" -description: > - RefDiff Plugin ---- - - -## Summary - -For development workload analysis, we often need to know how many commits have been created between 2 releases. This plugin calculates which commits differ between 2 Ref (branch/tag), and the result will be stored back into database for further analysis. - -## Important Note - -You need to run gitextractor before the refdiff plugin. The gitextractor plugin should create records in the `refs` table in your DB before this plugin can be run. - -## Configuration - -This is a enrichment plugin based on Domain Layer data, no configuration needed - -## How to use - -In order to trigger the enrichment, you need to insert a new task into your pipeline. - -1. Make sure `commits` and `refs` are collected into your database, `refs` table should contain records like following: -``` -id ref_type -github:GithubRepo:384111310:refs/tags/0.3.5 TAG -github:GithubRepo:384111310:refs/tags/0.3.6 TAG -github:GithubRepo:384111310:refs/tags/0.5.0 TAG -github:GithubRepo:384111310:refs/tags/v0.0.1 TAG -github:GithubRepo:384111310:refs/tags/v0.2.0 TAG -github:GithubRepo:384111310:refs/tags/v0.3.0 TAG -github:GithubRepo:384111310:refs/tags/v0.4.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.1 TAG -``` -2. If you want to run calculateIssuesDiff, please configure GITHUB_PR_BODY_CLOSE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -3. If you want to run calculatePrCherryPick, please configure GITHUB_PR_TITLE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -4. And then, trigger a pipeline like following, you can also define sub tasks, calculateRefDiff will calculate commits between two ref, and creatRefBugStats will create a table to show bug list between two ref: -``` -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "pairs": [ - { "newRef": "refs/tags/v0.6.0", "oldRef": "refs/tags/0.5.0" }, - { "newRef": "refs/tags/0.5.0", "oldRef": "refs/tags/0.4.0" } - ], - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` -Or if you prefered calculating latest releases -``` -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "tagsPattern": "v\d+\.\d+.\d+", - "tagsLimit": 10, - "tagsOrder": "reverse semver", - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. - -### Ubuntu - -``` -apt install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -### MacOS -1. [MacPorts](https://guide.macports.org/#introduction) install -``` -port install libgit2@1.3.0 -``` -2. Source install -``` -brew install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -#### Troubleshooting (MacOS) - -> Q: I got an error saying: `pkg-config: exec: "pkg-config": executable file not found in $PATH` - -> A: -> 1. Make sure you have pkg-config installed: -> -> `brew install pkg-config` -> -> 2. Make sure your pkg config path covers the installation: -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig` - -


diff --git a/versioned_docs/version-v0.12/Plugins/tapd.md b/versioned_docs/version-v0.12/Plugins/tapd.md deleted file mode 100644 index b8db89fca87..00000000000 --- a/versioned_docs/version-v0.12/Plugins/tapd.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: "TAPD" -description: > - TAPD Plugin ---- - -## Summary - -This plugin collects TAPD data. - -This plugin is in development so you can't modify settings in config-ui. - -## Configuration - -In order to fully use this plugin, you will need to get endpoint/basic_auth_encoded/rate_limit and insert it into table `_tool_tapd_connections`. - diff --git a/versioned_docs/version-v0.12/QuickStart/DockerComposeSetup.md b/versioned_docs/version-v0.12/QuickStart/DockerComposeSetup.md deleted file mode 100644 index 465d92e3223..00000000000 --- a/versioned_docs/version-v0.12/QuickStart/DockerComposeSetup.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "Install via Docker Compose" -description: > - The steps to install DevLake via Docker Compose -sidebar_position: 1 ---- - - -## Prerequisites - -- [Docker v19.03.10+](https://docs.docker.com/get-docker) -- [docker-compose v2.2.3+](https://docs.docker.com/compose/install/) - -## Launch DevLake - -- Commands written `like this` are to be run in your terminal. - -1. Download `docker-compose.yml` and `env.example` from [latest release page](https://github.com/apache/incubator-devlake/releases/latest) into a folder. -2. Rename `env.example` to `.env`. For Mac/Linux users, please run `mv env.example .env` in the terminal. -3. Run `docker-compose up -d` to launch DevLake. - -## Configure and collect data - -1. Visit `config-ui` at `http://localhost:4000` in your browser to configure and collect data. - - Please follow the [turorial](UserManuals/ConfigUI/Tutorial.md) - - `devlake` takes a while to fully boot up. if `config-ui` complaining about api being unreachable, please wait a few seconds and try refreshing the page. -2. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use [Grafana](https://grafana.com/) as a visualization tool to build charts for the [data](../DataModels/DataSupport.md) stored in our database. - - Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - - -## Upgrade to a newer version - -Support for database schema migration was introduced to DevLake in v0.10.0. From v0.10.0 onwards, users can upgrade their instance smoothly to a newer version. However, versions prior to v0.10.0 do not support upgrading to a newer version with a different database schema. We recommend users to deploy a new instance if needed. - -
diff --git a/versioned_docs/version-v0.12/QuickStart/HelmSetup.md b/versioned_docs/version-v0.12/QuickStart/HelmSetup.md deleted file mode 100644 index 0173ea5d6d9..00000000000 --- a/versioned_docs/version-v0.12/QuickStart/HelmSetup.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: "Install via Helm" -description: > - The steps to install Apache DevLake via Helm for Kubernetes -sidebar_position: 2 ---- - -## Prerequisites - -- Helm >= 3.6.0 -- Kubernetes >= 1.19.0 - - -## Quick Install - -clone the code, and enter the deployment/helm folder. -``` -helm install devlake . --set service.grafanaEndpoint=http://YOUR-NODE-IP:32000 -``` - -And visit your devlake from the node port (32001 by default). - -http://YOUR-NODE-IP:32001 - - -## Some example deployments - -### Deploy with NodePort - -Conditions: - - IP Address of Kubernetes node: 192.168.0.6 - - Want to visit devlake with port 30000, and grafana at port 30001 - -``` -helm install devlake . --set "service.uiPort=30000,service.grafanaPort=30001,service.grafanaEndpoint=http://192.168.0.6:30001" -``` - -After deployed, visit devlake: http://192.168.0.6:30000 - -### Deploy with Ingress - -Conditions: - - I have already configured default ingress for the Kubernetes cluster - - I want to use http://devlake.example.com for visiting devlake - -``` -helm install devlake . --set "ingress.enabled=true,ingress.hostname=devlake.example.com" -``` - -After deployed, visit devlake: http://devlake.example.com, and grafana at http://devlake.example.com/grafana - -### Deploy with Ingress (Https) - -Conditions: - - I have already configured ingress(class: nginx) for the Kubernetes cluster, and the https using 8443 port. - - I want to use https://devlake-0.example.com:8443 for visiting devlake. - - The https certificates are generated by letsencrypt.org, and the certificate and key files: `cert.pem` and `key.pem` - -First, create the secret: -``` -kubectl create secret tls ssl-certificate --cert cert.pem --key secret.pem -``` - -Then, deploy the devlake: -``` -helm install devlake . \ - --set "ingress.enabled=true,ingress.enableHttps=true,ingress.hostname=devlake-0.example.com" \ - --set "ingress.className=nginx,ingress.httpsPort=8443" \ - --set "ingress.tlsSecretName=ssl-certificate" -``` - -After deployed, visit devlake: https://devlake-0.example.com:8443, and grafana at https://devlake-0.example.com:8443/grafana - - -## Parameters - -Some useful parameters for the chart, you could also check them in values.yaml - -| Parameter | Description | Default | -|-----------|-------------|---------| -| replicaCount | Replica Count for devlake, currently not used | 1 | -| mysql.useExternal | If use external mysql server, currently not used | false | -| mysql.externalServer | External mysql server address | 127.0.0.1 | -| mysql.externalPort | External mysql server port | 3306 | -| mysql.username | username for mysql | merico | -| mysql.password | password for mysql | merico | -| mysql.database | database for mysql | lake | -| mysql.rootPassword | root password for mysql | admin | -| mysql.storage.class | storage class for mysql's volume | "" | -| mysql.storage.size | volume size for mysql's data | 5Gi | -| mysql.image.repository | repository for mysql's image | mysql | -| mysql.image.tag | image tag for mysql's image | 8.0.26 | -| mysql.image.pullPolicy | pullPolicy for mysql's image | IfNotPresent | -| grafana.image.repository | repository for grafana's image | mericodev/grafana | -| grafana.image.tag | image tag for grafana's image | latest | -| grafana.image.pullPolicy | pullPolicy for grafana's image | Always | -| lake.storage.class | storage class for lake's volume | "" | -| lake.storage.size | volume size for lake's data | 100Mi | -| lake.image.repository | repository for lake's image | mericodev/lake | -| lake.image.tag | image tag for lake's image | latest | -| lake.image.pullPolicy | pullPolicy for lake's image | Always | -| ui.image.repository | repository for ui's image | mericodev/config-ui | -| ui.image.tag | image tag for ui's image | latest | -| ui.image.pullPolicy | pullPolicy for ui's image | Always | -| service.type | Service type for exposed service | NodePort | -| service.grafanaPort | Service port for grafana | 32000 | -| service.uiPort | Service port for config ui | 32001 | -| service.grafanaEndpoint | The external grafana endpoint, used when ingress not configured | http://127.0.0.1:32000 | -| service.ingress.enabled | If enable ingress | false | -| service.ingress.enableHttps | If enable https | false | -| service.ingress.className | The class name for ingressClass. If leave empty, the default IngressClass will be used | "" | -| service.ingress.hostname | The hostname/domainname for ingress | localhost | -| service.ingress.prefix | The prefix for endpoints, currently not supported due to devlake's implementation | / | -| service.ingress.tlsSecretName | The secret name for tls's certificate, required when https enabled | "" | -| service.ingress.httpPort | The http port for ingress | 80 | -| service.ingress.httpsPort | The https port for ingress | 443 | -| option.localtime | The hostpath for mount as /etc/localtime | /etc/localtime | diff --git a/versioned_docs/version-v0.12/QuickStart/KubernetesSetup.md b/versioned_docs/version-v0.12/QuickStart/KubernetesSetup.md deleted file mode 100644 index 9efd5bced71..00000000000 --- a/versioned_docs/version-v0.12/QuickStart/KubernetesSetup.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Install via Kubernetes" -description: > - The steps to install Apache DevLake via Kubernetes -sidebar_position: 3 ---- - -:::caution - -We highly recommend the [helm approach](./HelmSetup.md), this page is for Advanced Installation only - -::: - -We provide a sample [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) to help deploy DevLake to Kubernetes - -[k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) will create a namespace `devlake` on your k8s cluster, and use `nodePort 30004` for `config-ui`, `nodePort 30002` for `grafana` dashboards. If you would like to use a specific version of Apache DevLake, please update the image tag of `grafana`, `devlake` and `config-ui` deployments. - -## Step-by-step guide - -1. Download [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) -2. Customize the settings (`devlake-config` config map): - - Settings shared between `grafana` and `mysql` - * `MYSQL_ROOT_PASSWORD`: set root password for `mysql` - * `MYSQL_USER`: shared between `mysql` and `grafana` - * `MYSQL_PASSWORD`: shared between `mysql` and `grafana` - * `MYSQL_DATABASE`: shared between `mysql` and `grafana` - - Settings used by `grafana` - * `MYSQL_URL`: set MySQL URL for `grafana` in `$HOST:$PORT` format - * `GF_SERVER_ROOT_URL`: Public URL to the `grafana` - - Settings used by `config-ui`: - * `GRAFANA_ENDPOINT`: FQDN of grafana which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `DEVLAKE_ENDPOINT`: FQDN of devlake which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `ADMIN_USER`/`ADMIN_PASS`: Not required, but highly recommended - - Settings used by `devlake`: - * `DB_URL`: update this value if `MYSQL_USER`, `MYSQL_PASSWORD` or `MYSQL_DATABASE` were changed -3. The `devlake` deployment store its configuration in `/app/.env`. In our sample yaml, we use `hostPath` volume, so please make sure directory `/var/lib/devlake` exists on your k8s workers, or employ other techniques to persist `/app/.env` file. Please do NOT mount the entire `/app` directory, because plugins are located in `/app/bin` folder. -4. Finally, execute the following command and DevLake should be up and running: - ```sh - kubectl apply -f k8s-deploy.yaml - ``` - - -## FAQ - -1. Can I use a managed Cloud database service instead of running database in k8s? - Yes, it only takes a few changes in the sample yaml file to make it happen. Below we'll use MySQL on AWS RDS as an example. - 1. (Optional) Create a MySQL instance on AWS RDS following this [doc](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_GettingStarted.CreatingConnecting.MySQL.html), skip this step if you'd like to use an existing instance - 2. Remove the `mysql` deployment and service sections from `k8s-deploy.yaml` - 3. Update `devlake-config` configmap according to your RDS instance setup: - * `MYSQL_ROOT_PASSWORD`: remove this line - * `MYSQL_USER`: use your RDS instance's master username - * `MYSQL_PASSWORD`: use your RDS instance's password - * `MYSQL_DATABASE`: use your RDS instance's DB name, you may need to create a database first with `CREATE DATABASE ;` - * `MYSQL_URL`: set this for `grafana` in `$HOST:$PORT` format, where $HOST and $PORT should be your RDS instance's endpoint and port respectively - * `DB_URL`: update the connection string with your RDS instance's info for `devlake` diff --git a/versioned_docs/version-v0.12/QuickStart/TemporalSetup.md b/versioned_docs/version-v0.12/QuickStart/TemporalSetup.md deleted file mode 100644 index c5b91c61e71..00000000000 --- a/versioned_docs/version-v0.12/QuickStart/TemporalSetup.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Install via Temporal" -sidebar_position: 6 -description: > - The steps to install DevLake in Temporal mode. ---- - - -Normally, DevLake would execute pipelines on a local machine (we call it `local mode`), it is sufficient most of the time. However, when you have too many pipelines that need to be executed in parallel, it can be problematic, as the horsepower and throughput of a single machine is limited. - -`temporal mode` was added to support distributed pipeline execution, you can fire up arbitrary workers on multiple machines to carry out those pipelines in parallel to overcome the limitations of a single machine. - -But, be careful, many API services like JIRA/GITHUB have a request rate limit mechanism. Collecting data in parallel against the same API service with the same identity would most likely hit such limit. - -## How it works - -1. DevLake Server and Workers connect to the same temporal server by setting up `TEMPORAL_URL` -2. DevLake Server sends a `pipeline` to the temporal server, and one of the Workers pick it up and execute it - - -**IMPORTANT: This feature is in early stage of development. Please use with caution** - - -## Temporal Demo - -### Requirements - -- [Docker](https://docs.docker.com/get-docker) -- [docker-compose](https://docs.docker.com/compose/install/) -- [temporalio](https://temporal.io/) - -### How to setup - -1. Clone and fire up [temporalio](https://temporal.io/) services -2. Clone this repo, and fire up DevLake with command `docker-compose -f docker-compose-temporal.yml up -d` \ No newline at end of file diff --git a/versioned_docs/version-v0.12/QuickStart/_category_.json b/versioned_docs/version-v0.12/QuickStart/_category_.json deleted file mode 100644 index 877a378f708..00000000000 --- a/versioned_docs/version-v0.12/QuickStart/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Getting Started", - "position": 2 -} diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/AdvancedMode.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/AdvancedMode.md deleted file mode 100644 index e22a0f65cb5..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/AdvancedMode.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: "Using Advanced Mode" -sidebar_position: 6 -description: > - Using the advanced mode of Config-UI ---- - - -## Why advanced mode? - -Advanced mode allows users to create any pipeline by writing JSON. This is useful for users who want to: - -1. Collect multiple GitHub/GitLab repos or Jira projects within a single pipeline -2. Have fine-grained control over what entities to collect or what subtasks to run for each plugin -3. Orchestrate a complex pipeline that consists of multiple stages of plugins. - -Advanced mode gives the most flexibility to users by exposing the JSON API. - -## How to use advanced mode to create pipelines? - -1. Click on "+ New Blueprint" on the Blueprint page. - -![image](/img/AdvancedMode/AdvancedMode1.png) - -2. In step 1, click on the "Advanced Mode" link. - -![image](/img/AdvancedMode/AdvancedMode2.png) - -3. The pipeline editor expects a 2D array of plugins. The first dimension represents different stages of the pipeline and the second dimension describes the plugins in each stage. Stages run in sequential order and plugins within the same stage runs in parallel. We provide some templates for users to get started. Please also see the next section for some examples. - -![image](/img/AdvancedMode/AdvancedMode3.png) - -4. You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -## Examples - -1. Collect multiple GitLab repos sequentially. - ->When there're multiple collection tasks against a single data source, we recommend running these tasks sequentially since the collection speed is mostly limited by the API rate limit of the data source. ->Running multiple tasks against the same data source is unlikely to speed up the process and may overwhelm the data source. - - -Below is an example for collecting 2 GitLab repos sequentially. It has 2 stages, each contains a GitLab task. - - -``` -[ - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 15238074 - } - } - ], - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 11624398 - } - } - ] -] -``` - - -2. Collect a GitHub repo and a Jira board in parallel - -Below is an example for collecting a GitHub repo and a Jira board in parallel. It has a single stage with a GitHub task and a Jira task. Since users can configure multiple Jira connection, it's required to pass in a `connectionId` for Jira task to specify which connection to use. - -``` -[ - [ - { - "Plugin": "github", - "Options": { - "repo": "lake", - "owner": "merico-dev" - } - }, - { - "Plugin": "jira", - "Options": { - "connectionId": 1, - "boardId": 76 - } - } - ] -] -``` diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitHub.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitHub.md deleted file mode 100644 index 15b5eb7addf..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitHub.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: "Configuring GitHub" -sidebar_position: 2 -description: Config UI instruction for GitHub ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![github-add-data-connections](/img/ConfigUI/github-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint, eg. `https://api.github.com/`. The url should end with `/`. - -#### Auth Token(s) -GitHub personal access tokens are required to add a connection. -- Learn about [how to create a GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) -- The data collection speed is relatively slow for GitHub since they have a **rate limit of [5,000 requests](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) per hour** (15,000 requests/hour if you pay for GitHub enterprise). You can accelerate the process by configuring _multiple_ personal access tokens. Please note that multiple tokens should be created by different GitHub accounts. Tokens belonging to the same GitHub account share the rate limit. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![github-set-data-scope](/img/ConfigUI/github-set-data-scope.png) - -#### Projects -Enter the GitHub repos to collect. If you want to collect more than 1 repo, please separate repos with comma. For example, "apache/incubator-devlake,apache/incubator-devlake-website". - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitHub entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: GitHub issues, issue comments, issue labels, etc. -- Source Code Management: GitHub repos, refs, commits, etc. -- Code Review: GitHub PRs, PR comments and reviews, etc. -- Cross Domain: GitHub accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![github-add-transformation-rules-list](/img/ConfigUI/github-add-transformation-rules-list.png) -![github-add-transformation-rules](/img/ConfigUI/github-add-transformation-rules.png) - -Without adding transformation rules, you can still view the "[GitHub Metrics](/livedemo/DataSources/GitHub)" dashboard. However, if you want to view "[Weekly Bug Retro](/livedemo/QAEngineers/WeeklyBugRetro)", "[Weekly Community Retro](/livedemo/OSSMaintainers/WeeklyCommunityRetro)" or other pre-built dashboards, the following transformation rules, especially "Type/Bug", should be added.
- -Each GitHub repo has at most ONE set of transformation rules. - -#### Issue Tracking - -- Severity: Parse the value of `severity` from issue labels. - - when your issue labels for severity level are like 'severity/p0', 'severity/p1', 'severity/p2', then input 'severity/(.*)$' - - when your issue labels for severity level are like 'p0', 'p1', 'p2', then input '(p0|p1|p2)$' - -- Component: Same as "Severity". - -- Priority: Same as "Severity". - -- Type/Requirement: The `type` of issues with labels that match given regular expression will be set to "REQUIREMENT". Unlike "PR.type", submatch does nothing, because for issue management analysis, users tend to focus on 3 kinds of types (Requirement/Bug/Incident), however, the concrete naming varies from repo to repo, time to time, so we decided to standardize them to help analysts metrics. - -- Type/Bug: Same as "Type/Requirement", with `type` setting to "BUG". - -- Type/Incident: Same as "Type/Requirement", with `type` setting to "INCIDENT". - -#### Code Review - -- Type: The `type` of pull requests will be parsed from PR labels by given regular expression. For example: - - when your labels for PR types are like 'type/feature-development', 'type/bug-fixing' and 'type/docs', please input 'type/(.*)$' - - when your labels for PR types are like 'feature-development', 'bug-fixing' and 'docs', please input '(feature-development|bug-fixing|docs)$' - -- Component: The `component` of pull requests will be parsed from PR labels by given regular expression. - -#### Additional Settings (Optional) - -- Tags Limit: It'll compare the last N pairs of tags to get the "commit diff', "issue diff" between tags. N defaults to 10. - - commit diff: new commits for a tag relative to the previous one - - issue diff: issues solved by the new commits for a tag relative to the previous one - -- Tags Pattern: Only tags that meet given regular expression will be counted. - -- Tags Order: Only "reverse semver" order is supported for now. - -Please click `Save` to save the transformation rules for the repo. In the data scope list, click `Next Step` to continue configuring. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitLab.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitLab.md deleted file mode 100644 index 74c9e41f107..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/GitLab.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: "Configuring GitLab" -sidebar_position: 3 -description: Config UI instruction for GitLab ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![gitlab-add-data-connections](/img/ConfigUI/gitlab-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. - - If you are using gitlab.com, the endpoint will be `https://gitlab.com/api/v4/` - - If you are self-hosting GitLab, the endpoint will look like `https://gitlab.example.com/api/v4/` -The endpoint url should end with `/`. - -#### Auth Token(s) -GitLab personal access tokens are required to add a connection. Learn about [how to create a GitLab personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html). - - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope - -#### Projects -Enter the GitLab repos to collect. How to get `GitLab` repos? -- Visit the repository page on GitLab -- Find the project id below the title - -![Get GitLab projects](https://user-images.githubusercontent.com/3789273/128568416-a47b2763-51d8-4a6a-8a8b-396512bffb03.png) - -If you want to collect more than 1 repo, please separate repos with comma. For example, "apache/incubator-devlake,apache/incubator-devlake-website". - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitLab entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: GitLab issues, issue comments, issue labels, etc. -- Source Code Management: GitLab repos, refs, commits, etc. -- Code Review: GitLab MRs, MR comments and reviews, etc. -- Cross Domain: GitLab accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -There are no transformation rules for GitLab repos. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jenkins.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jenkins.md deleted file mode 100644 index 07d1ed2952a..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jenkins.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Configuring Jenkins" -sidebar_position: 5 -description: Config UI instruction for Jenkins ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jenkins-add-data-connections](/img/ConfigUI/jenkins-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. Eg. `https://ci.jenkins.io/`. The endpoint url should end with `/`. - -#### Username (E-mail) -Your User ID for the Jenkins Instance. - -#### Password -For help on Username and Password, please see Jenkins docs on [using credentials](https://www.jenkins.io/doc/book/using/using-credentials/). You can also use "API Access Token" for this field, which can be generated at `User` -> `Configure` -> `API Token` section on Jenkins. - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - -### Step 2 - Setting Data Scope -There is no data cope setting for Jenkins. - -### Step 3 - Adding Transformation Rules (Optional) -There are no transformation rules for Jenkins. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jira.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jira.md deleted file mode 100644 index 952ecddea6a..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Jira.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "Configuring Jira" -sidebar_position: 4 -description: Config UI instruction for Jira ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jira-add-data-connections](/img/ConfigUI/jira-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint - - If you are using Jira Cloud, the endpoint will be `https://.atlassian.net/rest/` - - If you are self-hosting Jira v8+, the endpoint will look like `https://jira..com/rest/` -The endpoint url should end with `/`. - -#### Username / Email -Input the username or email of your Jira account. - - -#### Password -- If you are using Jira Cloud, please input the [Jira personal access token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html). -- If you are using Jira Server v8+, please input the password of your Jira account. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![jira-set-data-scope](/img/ConfigUI/jira-set-data-scope.png) - -#### Projects -Choose the Jira boards to collect. - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain Jira entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: Jira issues, issue comments, issue labels, etc. -- Cross Domain: Jira accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![jira-add-transformation-rules-list](/img/ConfigUI/jira-add-transformation-rules-list.png) - -Without adding transformation rules, you can not view all charts in "Jira" or "Engineering Throughput and Cycle Time" dashboards.
- -Each Jira board has at most ONE set of transformation rules. - -![jira-add-transformation-rules](/img/ConfigUI/jira-add-transformation-rules.png) - -#### Issue Tracking - -- Requirement: choose the issue types to be transformed to "REQUIREMENT". -- Bug: choose the issue types to be transformed to "BUG". -- Incident: choose the issue types to be transformed to "INCIDENT". -- Epic Key: choose the custom field that represents Epic key. In most cases, it is "Epic Link". -- Story Point: choose the custom field that represents story points. In most cases, it is "Story Points". - -#### Additional Settings -- Remotelink Commit SHA: parse the commits from an issue's remote links by the given regular expression so that the relationship between `issues` and `commits` can be created. You can directly use the regular expression `/commit/([0-9a-f]{40})$`. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Tutorial.md b/versioned_docs/version-v0.12/UserManuals/ConfigUI/Tutorial.md deleted file mode 100644 index f31698c9502..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/Tutorial.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: "Tutorial" -sidebar_position: 1 -description: Config UI instruction ---- - -## Overview -The Apache DevLake Config UI allows you to configure the data you wish to collect through a graphical user interface. Visit config-ui at `http://localhost:4000`. - -## Creating a Blueprint - -### Introduction -A Blueprint is the plan that covers all the work to get your raw data ready for query and metric computaion in the dashboards. We have designed the Blueprint to help you with data collection within only one workflow. Creating a Blueprint consists of four steps: - -1. Adding Data Connections: Add new or select from existing data connections for the data you wish to collect -2. Setting Data Scope: Select the scope of data (e.g. GitHub projects or Jira boards) for your data connections -3. Adding Transformation (Optional): Add transformation rules for the data scope you have selected in order to view corresponding metrics -4. Setting Sync Frequency: Set up a schedule for how often you wish your data to be synced - -### Step 1 - Adding Data Connections -There are two ways to add data connections to your Blueprint: adding them during the creation of a Blueprint and adding them separately on the Data Integrations page. There is no difference between these two ways. - -When adding data connections from the Blueprint, you can either create a new or select from an exisitng data connections. - -### Step 2 - Setting Data Scope -After adding data connections, click on "Next Step" and you will be prompted to select the data scope of each data connections. For instance, for a GitHub connection, you will need to enter the projects you wish to sync and for Jira, you will need to select the boards. - -### Step 3 - Adding Transformation (Optional) -This step is only required for viewing certain metrics in the pre-built dashboards that require data transformation. Without adding transformation rules, you can still view the basic metrics. - -Currently, DevLake only supports transformation for GitHub and Jira connections. - - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. - -After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -## Editing a Blueprint (Coming in v0.13) - -## Creating and Managing Data Connections -The Data Connections page allows you to view, create and manage all your data connections at one place. diff --git a/versioned_docs/version-v0.12/UserManuals/ConfigUI/_category_.json b/versioned_docs/version-v0.12/UserManuals/ConfigUI/_category_.json deleted file mode 100644 index 62f99d484f6..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/ConfigUI/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Config UI", - "position": 4 -} diff --git a/versioned_docs/version-v0.12/UserManuals/Dashboards/GrafanaUserGuide.md b/versioned_docs/version-v0.12/UserManuals/Dashboards/GrafanaUserGuide.md deleted file mode 100644 index 41a8e37f78f..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/Dashboards/GrafanaUserGuide.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: "Grafana User Guide" -sidebar_position: 2 -description: > - Grafana User Guide ---- - - -# Grafana - - - -When first visiting Grafana, you will be provided with a sample dashboard with some basic charts setup from the database. - -## Contents - -Section | Link -:------------ | :------------- -Logging In | [View Section](#logging-in) -Viewing All Dashboards | [View Section](#viewing-all-dashboards) -Customizing a Dashboard | [View Section](#customizing-a-dashboard) -Dashboard Settings | [View Section](#dashboard-settings) -Provisioning a Dashboard | [View Section](#provisioning-a-dashboard) -Troubleshooting DB Connection | [View Section](#troubleshooting-db-connection) - -## Logging In - -Once the app is up and running, visit `http://localhost:3002` to view the Grafana dashboard. - -Default login credentials are: - -- Username: `admin` -- Password: `admin` - -## Viewing All Dashboards - -To see all dashboards created in Grafana visit `/dashboards` - -Or, use the sidebar and click on **Manage**: - -![Screen Shot 2021-08-06 at 11 27 08 AM](https://user-images.githubusercontent.com/3789273/128534617-1992c080-9385-49d5-b30f-be5c96d5142a.png) - - -## Customizing a Dashboard - -When viewing a dashboard, click the top bar of a panel, and go to **edit** - -![Screen Shot 2021-08-06 at 11 35 36 AM](https://user-images.githubusercontent.com/3789273/128535505-a56162e0-72ad-46ac-8a94-70f1c7a910ed.png) - -**Edit Dashboard Panel Page:** - -![grafana-sections](https://user-images.githubusercontent.com/3789273/128540136-ba36ee2f-a544-4558-8282-84a7cb9df27a.png) - -### 1. Preview Area -- **Top Left** is the variable select area (custom dashboard variables, used for switching projects, or grouping data) -- **Top Right** we have a toolbar with some buttons related to the display of the data: - - View data results in a table - - Time range selector - - Refresh data button -- **The Main Area** will display the chart and should update in real time - -> Note: Data should refresh automatically, but may require a refresh using the button in some cases - -### 2. Query Builder -Here we form the SQL query to pull data into our chart, from our database -- Ensure the **Data Source** is the correct database - - ![Screen Shot 2021-08-06 at 10 14 22 AM](https://user-images.githubusercontent.com/3789273/128545278-be4846e0-852d-4bc8-8994-e99b79831d8c.png) - -- Select **Format as Table**, and **Edit SQL** buttons to write/edit queries as SQL - - ![Screen Shot 2021-08-06 at 10 17 52 AM](https://user-images.githubusercontent.com/3789273/128545197-a9ff9cb3-f12d-4331-bf6a-39035043667a.png) - -- The **Main Area** is where the queries are written, and in the top right is the **Query Inspector** button (to inspect returned data) - - ![Screen Shot 2021-08-06 at 10 18 23 AM](https://user-images.githubusercontent.com/3789273/128545557-ead5312a-e835-4c59-b9ca-dd5c08f2a38b.png) - -### 3. Main Panel Toolbar -In the top right of the window are buttons for: -- Dashboard settings (regarding entire dashboard) -- Save/apply changes (to specific panel) - -### 4. Grafana Parameter Sidebar -- Change chart style (bar/line/pie chart etc) -- Edit legends, chart parameters -- Modify chart styling -- Other Grafana specific settings - -## Dashboard Settings - -When viewing a dashboard click on the settings icon to view dashboard settings. Here are 2 important sections to use: - -![Screen Shot 2021-08-06 at 1 51 14 PM](https://user-images.githubusercontent.com/3789273/128555763-4d0370c2-bd4d-4462-ae7e-4b140c4e8c34.png) - -- Variables - - Create variables to use throughout the dashboard panels, that are also built on SQL queries - - ![Screen Shot 2021-08-06 at 2 02 40 PM](https://user-images.githubusercontent.com/3789273/128553157-a8e33042-faba-4db4-97db-02a29036e27c.png) - -- JSON Model - - Copy `json` code here and save it to a new file in `/grafana/dashboards/` with a unique name in the `lake` repo. This will allow us to persist dashboards when we load the app - - ![Screen Shot 2021-08-06 at 2 02 52 PM](https://user-images.githubusercontent.com/3789273/128553176-65a5ae43-742f-4abf-9c60-04722033339e.png) - -## Provisioning a Dashboard - -To save a dashboard in the `lake` repo and load it: - -1. Create a dashboard in browser (visit `/dashboard/new`, or use sidebar) -2. Save dashboard (in top right of screen) -3. Go to dashboard settings (in top right of screen) -4. Click on _JSON Model_ in sidebar -5. Copy code into a new `.json` file in `/grafana/dashboards` - -## Troubleshooting DB Connection - -To ensure we have properly connected our database to the data source in Grafana, check database settings in `./grafana/datasources/datasource.yml`, specifically: -- `database` -- `user` -- `secureJsonData/password` diff --git a/versioned_docs/version-v0.12/UserManuals/Dashboards/_category_.json b/versioned_docs/version-v0.12/UserManuals/Dashboards/_category_.json deleted file mode 100644 index 0db83c6e9b8..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/Dashboards/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Dashboards", - "position": 5 -} diff --git a/versioned_docs/version-v0.12/UserManuals/TeamConfiguration.md b/versioned_docs/version-v0.12/UserManuals/TeamConfiguration.md deleted file mode 100644 index b81df20001b..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/TeamConfiguration.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: "Team Configuration" -sidebar_position: 7 -description: > - Team Configuration ---- -## What is 'Team Configuration' and how it works? - -To organize and display metrics by `team`, Apache DevLake needs to know about the team configuration in an organization, specifically: - -1. What are the teams? -2. Who are the users(unified identities)? -3. Which users belong to a team? -4. Which accounts(identities in specific tools) belong to the same user? - -Each of the questions above corresponds to a table in DevLake's schema, illustrated below: - -![image](/img/Team/teamflow0.png) - -1. `teams` table stores all the teams in the organization. -2. `users` table stores the organization's roster. An entry in the `users` table corresponds to a person in the org. -3. `team_users` table stores which users belong to a team. -4. `user_accounts` table stores which accounts belong to a user. An `account` refers to an identiy in a DevOps tool and is automatically created when importing data from that tool. For example, a `user` may have a GitHub `account` as well as a Jira `account`. - -Apache DevLake uses a simple heuristic algorithm based on emails and names to automatically map accounts to users and populate the `user_accounts` table. -When Apache DevLake cannot confidently map an `account` to a `user` due to insufficient information, it allows DevLake users to manually configure the mapping to ensure accuracy and integrity. - -## A step-by-step guide - -In the following sections, we'll walk through how to configure teams and create the five aforementioned tables (`teams`, `users`, `team_users`, `accounts`, and `user_accounts`). -The overall workflow is: - -1. Create the `teams` table -2. Create the `users` and `team_users` table -3. Populate the `accounts` table via data collection -4. Run a heuristic algorithm to populate `user_accounts` table -5. Manually update `user_accounts` when the algorithm can't catch everything - -Note: - -1. Please replace `/path/to/*.csv` with the absolute path of the CSV file you'd like to upload. -2. Please replace `127.0.0.1:8080` with your actual Apache DevLake service IP and port number. - -## Step 1 - Create the `teams` table - -You can create the `teams` table by sending a PUT request to `/plugins/org/teams.csv` with a `teams.csv` file. To jumpstart the process, you can download a template `teams.csv` from `/plugins/org/teams.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `teams.csv` file - - i. GET http://127.0.0.1:8080/plugins/org/teams.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:8080/plugins/org/teams.csv?fake_data=true' - - -b. Fill out `teams.csv` file and upload it to DevLake - - i. Fill out `teams.csv` with your org data. Please don't modify the column headers or the file suffix. - - ii. Upload `teams.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:8080/plugins/org/teams.csv' --form 'file=@"/path/to/teams.csv"' - - iii. The PUT request would populate the `teams` table with data from `teams.csv` file. - You can connect to the database and verify the data in the `teams` table. - See Appendix for how to connect to the database. - -![image](/img/Team/teamflow3.png) - - -## Step 2 - Create the `users` and `team_users` table - -You can create the `users` and `team_users` table by sending a single PUT request to `/plugins/org/users.csv` with a `users.csv` file. To jumpstart the process, you can download a template `users.csv` from `/plugins/org/users.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `users.csv` file - - i. GET http://127.0.0.1:8080/plugins/org/users.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:8080/plugins/org/users.csv?fake_data=true' - - -b. Fill out `users.csv` and upload to DevLake - - i. Fill out `users.csv` with your org data. Please don't modify the column headers or the file suffix - - ii. Upload `users.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:8080/plugins/org/users.csv' --form 'file=@"/path/to/users.csv"' - - iii. The PUT request would populate the `users` table along with the `team_users` table with data from `users.csv` file. - You can connect to the database and verify these two tables. - -![image](/img/Team/teamflow1.png) - -![image](/img/Team/teamflow2.png) - -c. If you ever want to update `team_users` or `users` table, simply upload the updated `users.csv` to DevLake again following step b. - -## Step 3 - Populate the `accounts` table via data collection - -The `accounts` table is automatically populated when you collect data from data sources like GitHub and Jira through DevLake. - -For example, the GitHub plugin would create one entry in the `accounts` table for each GitHub user involved in your repository. -For demo purposes, we'll insert some mock data into the `accounts` table using SQL: - -``` -INSERT INTO `accounts` (`id`, `created_at`, `updated_at`, `_raw_data_params`, `_raw_data_table`, `_raw_data_id`, `_raw_data_remark`, `email`, `full_name`, `user_name`, `avatar_url`, `organization`, `created_date`, `status`) -VALUES - ('github:GithubAccount:1:1234', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"Owner\":\"apache\",\"Repo\":\"incubator-devlake\"}', '_raw_github_api_pull_request_reviews', 28, '', 'TyroneKCummings@teleworm.us', '', 'Tyrone K. Cummings', 'https://avatars.githubusercontent.com/u/101256042?u=a6e460fbaffce7514cbd65ac739a985f5158dabc&v=4', '', NULL, 0), - ('jira:JiraAccount:1:629cdf', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"BoardId\":\"76\"}', '_raw_jira_api_users', 5, '', 'DorothyRUpdegraff@dayrep.com', '', 'Dorothy R. Updegraff', 'https://avatars.jiraxxxx158dabc&v=4', '', NULL, 0); - -``` - -![image](/img/Team/teamflow4.png) - -## Step 4 - Run a heuristic algorithm to populate `user_accounts` table - -Now that we have data in both the `users` and `accounts` table, we can tell DevLake to infer the mappings between `users` and `accounts` with a simple heuristic algorithm based on names and emails. - -a. Send an API request to DevLake to run the mapping algorithm - -``` -curl --location --request POST '127.0.0.1:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "name": "test", - "plan":[ - [ - { - "plugin": "org", - "subtasks":["connectUserAccountsExact"], - "options":{ - "connectionId":1 - } - } - ] - ] -}' -``` - -b. After successful execution, you can verify the data in `user_accounts` in the database. - -![image](/img/Team/teamflow5.png) - -## Step 5 - Manually update `user_accounts` when the algorithm can't catch everything - -It is recommended to examine the generated `user_accounts` table after running the algorithm. -We'll demonstrate how to manually update `user_accounts` when the mapping is inaccurate/incomplete in this section. -To make manual verification easier, DevLake provides an API for users to download `user_accounts` as a CSV file. -Alternatively, you can verify and modify `user_accounts` all by SQL, see Appendix for more info. - -a. GET http://127.0.0.1:8080/plugins/org/user_account_mapping.csv(pasting the URL into your browser will download the file). If you prefer using curl: -``` -curl --location --request GET 'http://127.0.0.1:8080/plugins/org/user_account_mapping.csv' -``` - -![image](/img/Team/teamflow6.png) - -b. If you find the mapping inaccurate or incomplete, you can modify the `user_account_mapping.csv` file and then upload it to DevLake. -For example, here we change the `UserId` of row 'Id=github:GithubAccount:1:1234' in the `user_account_mapping.csv` file to 2. -Then we upload the updated `user_account_mapping.csv` file with the following curl command: - -``` -curl --location --request PUT 'http://127.0.0.1:8080/plugins/org/user_account_mapping.csv' --form 'file=@"/path/to/user_account_mapping.csv"' -``` - -c. You can verify the data in the `user_accounts` table has been updated. - -![image](/img/Team/teamflow7.png) - -## Appendix A: how to connect to the database - -Here we use MySQL as an example. You can install database management tools like Sequel Ace, DataGrip, MySQLWorkbench, etc. - - -Or through the command line: - -``` -mysql -h -u -p -P -``` - -## Appendix B: how to examine `user_accounts` via SQL - -``` -SELECT a.id as account_id, a.email, a.user_name as account_user_name, u.id as user_id, u.name as real_name -FROM accounts a - join user_accounts ua on a.id = ua.account_id - join users u on ua.user_id = u.id -``` diff --git a/versioned_docs/version-v0.12/UserManuals/_category_.json b/versioned_docs/version-v0.12/UserManuals/_category_.json deleted file mode 100644 index b47bdfd7d09..00000000000 --- a/versioned_docs/version-v0.12/UserManuals/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "User Manuals", - "position": 3 -} diff --git a/versioned_docs/version-v0.13/DataModels/DevLakeDomainLayerSchema.md b/versioned_docs/version-v0.13/DataModels/DevLakeDomainLayerSchema.md deleted file mode 100644 index eef943b83a7..00000000000 --- a/versioned_docs/version-v0.13/DataModels/DevLakeDomainLayerSchema.md +++ /dev/null @@ -1,612 +0,0 @@ ---- -title: "Domain Layer Schema" -description: > - DevLake Domain Layer Schema -sidebar_position: 2 ---- - -## Summary - -This document describes Apache DevLake's domain layer schema. - -Referring to DevLake's [architecture](../Overview/Architecture.md), the data in the domain layer is transformed from the data in the tool layer. The tool layer schema is based on the data from specific tools such as Jira, GitHub, Gitlab, Jenkins, etc. The domain layer schema can be regarded as an abstraction of tool-layer schemas. - -Domain layer schema itself includes 2 logical layers: a `DWD` layer and a `DWM` layer. The DWD layer stores the detailed data points, while the DWM is the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -## Use Cases -1. [All metrics](../Metrics) from pre-built dashboards are based on this data schema. -2. As a user, you can create your own customized dashboards based on this data schema. -3. As a contributor, you can refer to this data schema while working on the ETL logic when adding/updating data source plugins. - - -## Data Models - -This is the up-to-date domain layer schema for DevLake v0.10.x. Tables (entities) are categorized into 5 domains. -1. Issue tracking domain entities: Jira issues, GitHub issues, GitLab issues, etc. -2. Source code management domain entities: Git/GitHub/Gitlab commits and refs(tags and branches), etc. -3. Code review domain entities: GitHub PRs, Gitlab MRs, etc. -4. CI/CD domain entities: Jenkins jobs & builds, etc. -5. Cross-domain entities: entities that map entities from different domains to break data isolation. - - -### Schema Diagram -![Domain Layer Schema](/img/DomainLayerSchema/schema-diagram-v0.14.png) - -When reading the schema, you'll notice that many tables' primary key is called `id`. Unlike auto-increment id or UUID, `id` is a string composed of several parts to uniquely identify similar entities (e.g. repo) from different platforms (e.g. Github/Gitlab) and allow them to co-exist in a single table. - -Tables that end with WIP are still under development. - - -### Naming Conventions - -1. The name of a table is in plural form. Eg. boards, issues, etc. -2. The name of a table which describe the relation between 2 entities is in the form of [BigEntity in singular form]\_[SmallEntity in plural form]. Eg. board_issues, sprint_issues, pull_request_comments, etc. -3. Value of the field in enum type are in capital letters. Eg. [table.issues.type](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k) has 3 values, REQUIREMENT, BUG, INCIDENT. Values that are phrases, such as 'IN_PROGRESS' of [table.issues.status](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#ZDCw9k), are separated with underscore '\_'. - -
- -## Get all domain layer model info. - -All domain layer models can be accessed by the following method - -```golang -import "github.com/apache/incubator-devlake/models/domainlayer/domaininfo" - -domaininfo := domaininfo.GetDomainTablesInfo() -for _, table := range domaininfo { - // do something -} -``` - -If you want to learn more about plugin models,please visit [PluginImplementation](https://devlake.apache.org/docs/DeveloperManuals/PluginImplementation) - -## DWD Entities - (Data Warehouse Detail) - -### Domain 1 - Issue Tracking - -#### issues - -An `issue` is the abstraction of Jira/Github/GitLab/TAPD/... issues. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | An issue's `id` is composed of < plugin >:< Entity >:< PK0 >[:PK1]..."
  • For Github issues, a Github issue's id is like "github:GithubIssues:< GithubIssueId >". Eg. 'github:GithubIssues:1049355647'
  • For Jira issues, a Github repo's id is like "jira:JiraIssues:< JiraSourceId >:< JiraIssueId >". Eg. 'jira:JiraIssues:1:10063'. < JiraSourceId > is used to identify which jira source the issue came from, since DevLake users can import data from several different Jira instances at the same time.
| PK | -| `issue_key` | varchar | 255 | The key of this issue. For example, the key of this Github [issue](https://github.com/apache/incubator-devlake/issues/1145) is 1145. | | -| `url` | varchar | 255 | The url of the issue. It's a web address in most cases. | | -| `title` | varchar | 255 | The title of an issue | | -| `description` | longtext | | The detailed description/summary of an issue | | -| `type` | varchar | 255 | The standard type of this issue. There're 3 standard types:
  • REQUIREMENT: this issue is a feature
  • BUG: this issue is a bug found during test
  • INCIDENT: this issue is a bug found after release
The 3 standard types are transformed from the original types of an issue. The transformation rule is set in the '.env' file or 'config-ui' before data collection. For issues with an original type that has not mapped to a standard type, the value of `type` will be the issue's original type. | | -| `status` | varchar | 255 | The standard statuses of this issue. There're 3 standard statuses:
  • TODO: this issue is in backlog or to-do list
  • IN_PROGRESS: this issue is in progress
  • DONE: this issue is resolved or closed
The 3 standard statuses are transformed from the original statuses of an issue. The transformation rule:
  • For Jira issue status: transformed from the Jira issue's `statusCategory`. Jira issue has 3 default status categories: 'To Do', 'In Progress', 'Done'.
  • For Github issue status:
    • open -> TODO
    • closed -> DONE
| | -| `original_status` | varchar | 255 | The original status of an issue. | | -| `story_point` | int | | The story point of this issue. It's default to an empty string for data sources such as Github issues and Gitlab issues. | | -| `priority` | varchar | 255 | The priority of the issue | | -| `component` | varchar | 255 | The component a bug-issue affects. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `severity` | varchar | 255 | The severity level of a bug-issue. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `parent_issue_id` | varchar | 255 | The id of its parent issue | | -| `epic_key` | varchar | 255 | The key of the epic this issue belongs to. For tools with no epic-type issues such as Github and Gitlab, this field is default to an empty string | | -| `original_estimate_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_spent_minutes` | int | | The orginal estimation of the time allocated for this issue | | -| `time_remaining_minutes` | int | | The remaining time to resolve the issue | | -| `creator_id` | varchar | 255 | The id of issue creator | | -| `creator_name` | varchar | 255 | The name of the creator | | -| `assignee_id` | varchar | 255 | The id of issue assignee.
  • For Github issues: this is the last assignee of an issue if the issue has multiple assignees
  • For Jira issues: this is the assignee of the issue at the time of collection
| | -| `assignee_name` | varchar | 255 | The name of the assignee | | -| `created_date` | datetime | 3 | The time issue created | | -| `updated_date` | datetime | 3 | The last time issue gets updated | | -| `resolution_date` | datetime | 3 | The time the issue changes to 'DONE'. | | -| `lead_time_minutes` | int | | Describes the cycle time from issue creation to issue resolution.
  • For issues whose type = 'REQUIREMENT' and status = 'DONE', lead_time_minutes = resolution_date - created_date. The unit is minute.
  • For issues whose type != 'REQUIREMENT' or status != 'DONE', lead_time_minutes is null
| | - -#### issue_labels - -This table shows the labels of issues. Multiple entries can exist per issue. This table can be used to filter issues by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `name` | varchar | 255 | Label name | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### issue_comments(WIP) - -This table shows the comments of issues. Issues with multiple comments are shown as multiple records. This table can be used to calculate _metric - issue response time_. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------- | -| `id` | varchar | 255 | The unique id of a comment | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `account_id` | varchar | 255 | The id of the account who made the comment | FK_accounts.id | -| `body` | longtext | | The body/detail of the comment | | -| `created_date` | datetime | 3 | The creation date of the comment | | -| `updated_date` | datetime | 3 | The last time comment gets updated | | - -#### issue_changelogs - -This table shows the changelogs of issues. Issues with multiple changelogs are shown as multiple records. This is transformed from Jira or TAPD changelogs. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :--------------------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The unique id of an issue changelog | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `author_id` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `author_name` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `field_id` | varchar | 255 | The id of changed field | | -| `field_name` | varchar | 255 | The id of changed field | | -| `original_from_value` | varchar | 255 | The original value of the changed field | | -| `original_to_value` | varchar | 255 | The new value of the changed field | | -| `from_value` | varchar | 255 | The transformed/standardized original value of the changed field | | -| `to_value` | varchar | 255 | The transformed/standardized new value of the changed field | | -| `created_date` | datetime | 3 | The creation date of the changelog | | - - -#### issue_worklogs - -This table shows the work logged under issues. Usually, an issue has multiple worklogs logged by different developers. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------- | :--------------- | -| `id` | varchar | 255 | The id of the worklog | PK | -| `author_id` | varchar | 255 | The id of the author who logged the work | FK_acccounts.id | -| `comment` | longtext | 255 | The comment made while logging the work. | | -| `time_spent_minutes` | int | | The time logged. The unit of value is normalized to minute. Eg. 1d =) 480, 4h30m =) 270 | | -| `logged_date` | datetime | 3 | The time of this logging action | | -| `started_date` | datetime | 3 | Start time of the worklog | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### boards - -A `board` is an issue list or a collection of issues. It's the abstraction of a Jira board, a Jira project, a [Github issue list](https://github.com/merico-dev/lake/issues) or a GitLab issue list. This table can be used to filter issues by the boards they belong to. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A board's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
  • For a Github repo's issue list, the board id is like "< github >:< GithubRepos >:< GithubRepoId >". Eg. "github:GithubRepo:384111310"
  • For a Jira Board, the id is like the board id is like "< jira >:< JiraSourceId >< JiraBoards >:< JiraBoardsId >". Eg. "jira:1:JiraBoards:12"
| PK | -| `name` | varchar | 255 | The name of the board. Note: the board name of a Github project 'merico-dev/lake' is 'merico-dev/lake', representing the [default issue list](https://github.com/merico-dev/lake/issues). | | -| `description` | varchar | 255 | The description of the board. | | -| `url` | varchar | 255 | The url of the board. Eg. https://Github.com/merico-dev/lake | | -| `created_date` | datetime | 3 | Board creation time | | - -#### board_issues - -This table shows the relation between boards and issues. This table can be used to filter issues by board. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | - -#### sprints - -A `sprint` is the abstraction of Jira sprints, TAPD iterations and Github milestones. A sprint contains a list of issues. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------ | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :----------- | -| `id` | varchar | 255 | A sprint's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
  • A sprint in a Github repo is a milestone, the sprint id is like "< github >:< GithubRepos >:< GithubRepoId >:< milestoneNumber >".
    Eg. The id for this [sprint](https://github.com/apache/incubator-devlake/milestone/5) is "github:GithubRepo:384111310:5"
  • For a Jira Board, the id is like "< jira >:< JiraSourceId >< JiraBoards >:< JiraBoardsId >".
    Eg. "jira:1:JiraBoards:12"
| PK | -| `name` | varchar | 255 | The name of sprint.
For Github projects, the sprint name is the milestone name. For instance, 'v0.10.0 - Introduce Temporal to DevLake' is the name of this [sprint](https://github.com/apache/incubator-devlake/milestone/5). | | -| `url` | varchar | 255 | The url of sprint. | | -| `status` | varchar | 255 | There're 3 statuses of a sprint:
  • CLOSED: a completed sprint
  • ACTIVE: a sprint started but not completed
  • FUTURE: a sprint that has not started
| | -| `started_date` | datetime | 3 | The start time of a sprint | | -| `ended_date` | datetime | 3 | The planned/estimated end time of a sprint. It's usually set when planning a sprint. | | -| `completed_date` | datetime | 3 | The actual time to complete a sprint. | | -| `original_board_id` | datetime | 3 | The id of board where the sprint first created. This field is not null only when this entity is transformed from Jira sprintas.
In Jira, sprint and board entities have 2 types of relation:
  • A sprint is created based on a specific board. In this case, board(1):(n)sprint. The `original_board_id` is used to show the relation.
  • A sprint can be mapped to multiple boards, a board can also show multiple sprints. In this case, board(n):(n)sprint. This relation is shown in [table.board_sprints](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#xfm617)
| FK_boards.id | - -#### sprint_issues - -This table shows the relation between sprints and issues that have been added to sprints. This table can be used to show metrics such as _'ratio of unplanned issues'_, _'completion rate of sprint issues'_, etc - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `is_removed` | bool | | If the issue is removed from this sprint, then TRUE; else FALSE | | -| `added_date` | datetime | 3 | The time this issue added to the sprint. If an issue is added to a sprint multiple times, the latest time will be the value. | | -| `removed_date` | datetime | 3 | The time this issue gets removed from the sprint. If an issue is removed multiple times, the latest time will be the value. | | -| `added_stage` | varchar | 255 | The stage when issue is added to this sprint. There're 3 possible values:
  • BEFORE_SPRINT
    Planning before sprint starts.
    Condition: sprint_issues.added_date <= sprints.start_date
  • DURING_SPRINT Planning during a sprint.
    Condition: sprints.start_date < sprint_issues.added_date <= sprints.end_date
  • AFTER_SPRINT
    Planing after a sprint. This is caused by improper operation - adding issues to a completed sprint.
    Condition: sprint_issues.added_date ) sprints.end_date
| | -| `resolved_stage` | varchar | 255 | The stage when an issue is resolved (issue status turns to 'DONE'). There're 3 possible values:
  • BEFORE_SPRINT
    Condition: issues.resolution_date <= sprints.start_date
  • DURING_SPRINT
    Condition: sprints.start_date < issues.resolution_date <= sprints.end_date
  • AFTER_SPRINT
    Condition: issues.resolution_date ) sprints.end_date
| | - -#### board_sprints - -| **field** | **type** | **length** | **description** | **key** | -| :---------- | :------- | :--------- | :-------------- | :------------ | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | - -
- -### Domain 2 - Source Code Management - -#### repos - -Information about Github or Gitlab repositories. A repository is always owned by a user. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `name` | varchar | 255 | The name of repo. | | -| `description` | varchar | 255 | The description of repo. | | -| `url` | varchar | 255 | The url of repo. Eg. https://Github.com/merico-dev/lake | | -| `owner_id` | varchar | 255 | The id of the owner of repo | FK_accounts.id | -| `language` | varchar | 255 | The major language of repo. Eg. The language for merico-dev/lake is 'Go' | | -| `forked_from` | varchar | 255 | Empty unless the repo is a fork in which case it contains the `id` of the repo the repo is forked from. | | -| `deleted` | tinyint | 255 | 0: repo is active 1: repo has been deleted | | -| `created_date` | datetime | 3 | Repo creation date | | -| `updated_date` | datetime | 3 | Last full update was done for this repo | | - -#### repo_languages(WIP) - -Languages that are used in the repository along with byte counts for all files in those languages. This is in line with how Github calculates language percentages in a repository. Multiple entries can exist per repo. - -The table is filled in when the repo has been first inserted on when an update round for all repos is made. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `language` | varchar | 255 | The language of repo.
These are the [languages](https://api.github.com/repos/merico-dev/lake/languages) for merico-dev/lake | | -| `bytes` | int | | The byte counts for all files in those languages | | -| `created_date` | datetime | 3 | The field is filled in with the latest timestamp the query for a specific `repo_id` was done. | | - -#### repo_commits - -The commits belong to the history of a repository. More than one repos can share the same commits if one is a fork of the other. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### refs - -A ref is the abstraction of a branch or tag. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A ref's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github ref is composed of "github:GithubRepos:< GithubRepoId >:< RefUrl >". Eg. The id of release v5.3.0 of PingCAP/TiDB project is 'github:GithubRepos:384111310:refs/tags/v5.3.0' A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `ref_name` | varchar | 255 | The name of ref. Eg. '[refs/tags/v0.9.3](https://github.com/merico-dev/lake/tree/v0.9.3)' | | -| `repo_id` | varchar | 255 | The id of repo this ref belongs to | FK_repos.id | -| `commit_sha` | char | 40 | The commit this ref points to at the time of collection | | -| `is_default` | int | |
  • 0: the ref is the default branch. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), the default branch is the base branch for pull requests and code commits.
  • 1: not the default branch
| | -| `merge_base` | char | 40 | The merge base commit of the main ref and the current ref | | -| `ref_type` | varchar | 64 | There're 2 typical types:
  • BRANCH
  • TAG
| | - -#### refs_commits_diffs - -This table shows the commits added in a new ref compared to an old ref. This table can be used to support tag-based analysis, for instance, '_No. of commits of a tag_', '_No. of merged pull request of a tag_', etc. - -The records of this table are computed by [RefDiff](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/refdiff) plugin. The computation should be manually triggered after using [GitRepoExtractor](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/gitextractor) to collect commits and refs. The algorithm behind is similar to [this](https://github.com/merico-dev/lake/compare/v0.8.0%E2%80%A6v0.9.0). - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :-------------------------------------------------------------- | :------------- | -| `commit_sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `sorting_index` | varchar | 255 | An index for debugging, please skip it | | - -#### commits - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `message` | varchar | 255 | Commit message | | -| `author_name` | varchar | 255 | The value is set with command `git config user.name xxxxx` commit | | -| `author_email` | varchar | 255 | The value is set with command `git config user.email xxxxx` author | | -| `authored_date` | datetime | 3 | The date when this commit was originally made | | -| `author_id` | varchar | 255 | The id of commit author | FK_accounts.id | -| `committer_name` | varchar | 255 | The name of committer | | -| `committer_email` | varchar | 255 | The email of committer | | -| `committed_date` | datetime | 3 | The last time the commit gets modified.
For example, when rebasing the branch where the commit is in on another branch, the committed_date changes. | | -| `committer_id` | varchar | 255 | The id of committer | FK_accounts.id | -| `additions` | int | | Added lines of code | | -| `deletions` | int | | Deleted lines of code | | -| `dev_eq` | int | | A metric that quantifies the amount of code contribution. The data can be retrieved from [AE plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/ae). | | - -#### commit_files - -The files have been changed via commits. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The `id` is composed of "< Commit_sha >:< file_path >" | FK_commits.sha | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | -| `file_path` | varchar | 255 | Path of a changed file in a commit | | -| `additions` | int | | The added lines of code in this file by the commit | | -| `deletions` | int | | The deleted lines of code in this file by the commit | | - -#### components - -The components of files extracted from the file paths. This can be used to analyze Git metrics by component. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :---------- | -| `repo_id` | varchar | 255 | The repo id | FK_repos.id | -| `name` | varchar | 255 | The name of component | | -| `path_regex` | varchar | 255 | The regex to extract components from this repo's paths | | - -#### commit_file_components - -The relationship between commit_file and component_name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :--------------------------- | :----------------- | -| `commit_file_id` | varchar | 255 | The id of commit file | FK_commit_files.id | -| `component_name` | varchar | 255 | The component name of a file | | - -#### commit_parents - -The parent commit(s) for each commit, as specified by Git. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------- | :------------- | -| `commit_sha` | char | 40 | commit sha | FK_commits.sha | -| `parent` | char | 40 | Parent commit sha | FK_commits.sha | - -
- -### Domain 3 - Code Review - -#### pull_requests - -A pull request is the abstraction of Github pull request and Gitlab merge request. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `id` | char | 40 | A pull request's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." Eg. For 'github:GithubPullRequests:1347' | FK_commits.sha | -| `title` | varchar | 255 | The title of pull request | | -| `description` | longtext | | The body/description of pull request | | -| `status` | varchar | 255 | the status of pull requests. For a Github pull request, the status can either be 'open' or 'closed'. | | -| `parent_pr_id` | varchar | 255 | The id of the parent PR | | -| `pull_request_key` | varchar | 255 | The key of PR. Eg, 1536 is the key of this [PR](https://github.com/apache/incubator-devlake/pull/1563) | | -| `base_repo_id` | varchar | 255 | The repo that will be updated. | | -| `head_reop_id` | varchar | 255 | The repo containing the changes that will be added to the base. If the head repository is NULL, this means that the corresponding project had been deleted when DevLake processed the pull request. | | -| `base_ref` | varchar | 255 | The branch name in the base repo that will be updated | | -| `head_ref` | varchar | 255 | The branch name in the head repo that contains the changes that will be added to the base | | -| `author_name` | varchar | 255 | The author's name of the pull request | | -| `author_id` | varchar | 255 | The author's id of the pull request | | -| `url` | varchar | 255 | the web link of the pull request | | -| `type` | varchar | 255 | The work-type of a pull request. For example: feature-development, bug-fix, docs, etc.
The value is transformed from Github pull request labels by configuring `GITHUB_PR_TYPE` in `.env` file during installation. | | -| `component` | varchar | 255 | The component this PR affects.
The value is transformed from Github/Gitlab pull request labels by configuring `GITHUB_PR_COMPONENT` in `.env` file during installation. | | -| `created_date` | datetime | 3 | The time PR created. | | -| `merged_date` | datetime | 3 | The time PR gets merged. Null when the PR is not merged. | | -| `closed_date` | datetime | 3 | The time PR closed. Null when the PR is not closed. | | -| `merge_commit_sha` | char | 40 | the merge commit of this PR. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), when you click the default Merge pull request option on a pull request on Github, all commits from the feature branch are added to the base branch in a merge commit. | | -| `base_commit_sha` | char | 40 | The base commit of this PR. | | -| `head_commit_sha` | char | 40 | The head commit of this PR. | | - - -#### pull_request_labels - -This table shows the labels of pull request. Multiple entries can exist per pull request. This table can be used to filter pull requests by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `name` | varchar | 255 | Label name | | -| `pull_request_id` | varchar | 255 | Pull request ID | FK_pull_requests.id | - -#### pull_request_commits - -A commit associated with a pull request - -The list is additive. This means if a rebase with commit squashing takes place after the commits of a pull request have been processed, the old commits will not be deleted. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_comments - -Normal comments, review bodies, reviews' inline comments of GitHub's pull requests or GitLab's merge requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Comment id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `body` | longtext | | The body of the comments | | -| `account_id` | varchar | 255 | The account who made the comment | FK_accounts.id | -| `created_date` | datetime | 3 | Comment creation time | | -| `position` | int | | Deprecated | | -| `type` | varchar | 255 | - For normal comments: NORMAL
- For review comments, ie. diff/inline comments: DIFF
- For reviews' body (exist in GitHub but not GitLab): REVIEW | | -| `review_id` | varchar | 255 | Review_id of the comment if the type is `REVIEW` or `DIFF` | | -| `status` | varchar | 255 | Status of the comment | | - - -#### pull_request_events(WIP) - -Events of pull requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Event id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `action` | varchar | 255 | The action to be taken, some values:
  • `opened`: When the pull request has been opened
  • `closed`: When the pull request has been closed
  • `merged`: When Github detected that the pull request has been merged. No merges outside Github (i.e. Git based) are reported
  • `reoponed`: When a pull request is opened after being closed
  • `syncrhonize`: When new commits are added/removed to the head repository
| | -| `actor_id` | varchar | 255 | The account id of the event performer | FK_accounts.id | -| `created_date` | datetime | 3 | Event creation time | | - -
- -### Domain 4 - CI/CD(WIP) - -#### jobs - -The CI/CD schedule, not a specific task. - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :------- | :--------- | :-------------- | :------ | -| `id` | varchar | 255 | Job id | PK | -| `name` | varchar | 255 | Name of job | | - -#### builds - -A build is an execution of a job. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------- | :--------- | -| `id` | varchar | 255 | Build id | PK | -| `job_id` | varchar | 255 | Id of the job this build belongs to | FK_jobs.id | -| `name` | varchar | 255 | Name of build | | -| `duration_sec` | bigint | | The duration of build in seconds | | -| `started_date` | datetime | 3 | Started time of the build | | -| `status` | varchar | 255 | The result of build. The values may be 'success', 'failed', etc. | | -| `commit_sha` | char | 40 | The specific commit being built on. Nullable. | | - -#### cicd_pipelines - -A cicd_pipeline is a series of builds that have connections or a standalone build. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :-------------- | :--------- | :-------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `created_at` | datetime | 3 | | | -| `updated_at` | datetime | 3 | | | -| `name` | varchar | 255 | | | -| `commit_sha` | varchar | 255 | | | -| `branch` | varchar | 255 | | | -| `repo` | varchar | 255 | | | -| `result` | varchar | 100 | | | -| `status` | varchar | 100 | | | -| `type` | varchar | 100 | to indicate this is CI or CD | | -| `duration_sec` | bigint unsigned | | | | -| `created_date` | datetime | 3 | | | -| `finished_date` | datetime | 3 | | | - -#### cicd_pipeline_repos - -A map between cic_pipeline and repo info. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------------------------------------------------------- | :------ | -| `commit_sha` | varchar | 255 | | PK | -| `branch` | varchar | 255 | | | -| `repo_url` | varchar | 255 | | | -| `id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `created_at` | datetime | 3 | | | -| `updated_at` | datetime | 3 | | | - - -#### cicd_tasks - -A cicd_task is a single job of ci/cd. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :-------------- | :--------- | :-------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `created_at` | datetime | 3 | | | -| `updated_at` | datetime | 3 | | | -| `name` | varchar | 255 | | | -| `pipeline_id` | varchar | 255 | | | -| `result` | varchar | 100 | | | -| `status` | varchar | 100 | | | -| `type` | varchar | 100 | to indicate this is CI or CD | | -| `duration_sec` | bigint unsigned | | | | -| `started_date` | datetime | 3 | | | -| `finished_date` | datetime | 3 | | | - - -### Cross-Domain Entities - -These entities are used to map entities between different domains. They are the key players to break data isolation. - -There're low-level entities such as issue_commits, users, and higher-level cross domain entities such as board_repos - -#### issue_commits - -A low-level mapping between "issue tracking" and "source code management" domain by mapping `issues` and `commits`. Issue(n): Commit(n). - -The original connection between these two entities lies in either issue tracking tools like Jira or source code management tools like GitLab. You have to use tools to accomplish this. - -For example, a common method to connect Jira issue and GitLab commit is a GitLab plugin [Jira Integration](https://docs.gitlab.com/ee/integration/jira/). With this plugin, the Jira issue key in the commit message written by the committers will be parsed. Then, the plugin will add the commit urls under this jira issue. Hence, DevLake's [Jira plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/jira) can get the related commits (including repo, commit_id, url) of an issue. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_issues - -This table shows the issues closed by pull requests. It's a medium-level mapping between "issue tracking" and "source code management" domain by mapping issues and commits. Issue(n): Commit(n). - -The data is extracted from the body of pull requests conforming to certain regular expression. The regular expression can be defined in GITHUB_PR_BODY_CLOSE_PATTERN in the .env file - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :------------------ | :------------------ | -| `pull_request_id` | char | 40 | Pull request id | FK_pull_requests.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `pull_request_number` | varchar | 255 | Pull request key | | -| `issue_number` | varchar | 255 | Issue key | | - -#### board_repos (Deprecated) - -A way to link "issue tracking" and "source code management" domain by mapping `boards` and `repos`. Board(n): Repo(n). - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | - -#### accounts - -This table stores of user accounts across different tools such as GitHub, Jira, GitLab, etc. This table can be joined to get the metadata of all accounts. - metrics, such as _'No. of Issue closed by contributor', 'No. of commits by contributor',_ - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | -| `id` | varchar | 255 | An account's `id` is the identifier of the account of a specific tool. It is composed of "< Plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github account's id is composed of "< github >:< GithubAccounts >:< GithubUserId >)". Eg. 'github:GithubUsers:14050754' | PK | -| `email` | varchar | 255 | Email of the account | | -| `full_name` | varchar | 255 | Full name | | -| `user_name` | varchar | 255 | Username, nickname or Github login of an account | | -| `avatar_url` | varchar | 255 | | | -| `organization` | varchar | 255 | User's organization(s) | | -| `created_date` | datetime | 3 | User creation time | | -| `status` | int | | 0: default, the user is active. 1: the user is not active | | - -#### users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------- | ------- | -| `id` | varchar | 255 | id of a person | PK | -| `email` | varchar | 255 | the primary email of a person | | -| `name` | varchar | 255 | name of a person | | - -#### user_accounts -| **field** | **type** | **length** | **description** | **key** | -| ------------ | -------- | ---------- | --------------- | ---------------- | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | -| `account_id` | varchar | 255 | accounts.id | Composite PK, FK | - -#### teams -| **field** | **type** | **length** | **description** | **key** | -| --------------- | -------- | ---------- | -------------------------------------------------- | ------- | -| `id` | varchar | 255 | id from the data sources, decided by DevLake users | PK | -| `name` | varchar | 255 | name of the team. Eg. team A, team B, etc. | | -| `alias` | varchar | 255 | alias or abbreviation of a team | | -| `parent_id` | varchar | 255 | teams.id, default to null | FK | -| `sorting_index` | int | 255 | the field to sort team | | - -#### team_users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------------------------- | ---------------- | -| `team_id` | varchar | 255 | Full name of the team. Eg. team A, team B, etc. | Composite PK, FK | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | - - -
- -## DWM Entities - (Data Warehouse Middle) - -DWM entities are the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -#### refs_issues_diffs - -This table shows the issues fixed by commits added in a new ref compared to an old one. The data is computed from [table.ref_commits_diff](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#yJOyqa), [table.pull_requests](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#Uc849c), [table.pull_request_commits](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#G9cPfj), and [table.pull_request_issues](https://merico.feishu.cn/docs/doccnvyuG9YpVc6lvmWkmmbZtUc#we6Uac). - -This table can support tag-based analysis, for instance, '_No. of bugs closed in a tag_'. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :----------------------------------------------------- | :----------- | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `issue_number` | varchar | 255 | Issue number | | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | diff --git a/versioned_docs/version-v0.13/DataModels/_category_.json b/versioned_docs/version-v0.13/DataModels/_category_.json deleted file mode 100644 index ae28c626ea0..00000000000 --- a/versioned_docs/version-v0.13/DataModels/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Data Models", - "position": 6, - "link":{ - "type": "generated-index", - "slug": "DataModels" - } -} diff --git a/versioned_docs/version-v0.13/DeveloperManuals/DBMigration.md b/versioned_docs/version-v0.13/DeveloperManuals/DBMigration.md deleted file mode 100644 index 531604981f9..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/DBMigration.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: "DB Migration" -description: > - DB Migration -sidebar_position: 3 ---- - -## Summary -Starting in v0.10.0, DevLake provides a lightweight migration tool for executing migration scripts. -Both framework itself and plugins define their migration scripts in their own migration folder. -The migration scripts are written with gorm in Golang to support different SQL dialects. - - -## Migration Script -Migration script describes how to do database migration. -They implement the `Script` interface. -When DevLake starts, scripts register themselves to the framework by invoking the `Register` function - -```go -type Script interface { - // this function will contain the business logic of the migration (e.g. DDL logic) - Up(ctx context.Context, db *gorm.DB) error - // the version number of the migration. typically in date format (YYYYMMDDHHMMSS), e.g. 20220728000001. Migrations are executed sequentially based on this number. - Version() uint64 - // The name of this migration - Name() string -} -``` - -## Migration Model - -For each migration we define a "snapshot" datamodel of the model that we wish to perform the migration on. -The fields on this model shall be identical to the actual model, but unlike the actual one, this one will -never change in the future. The naming convention of these models is `YYYYMMDD` and they must implement -the `func TableName() string` method, and consumed by the `Script::Up` method. - -## Table `migration_history` - -The table tracks migration scripts execution and schemas changes. -From which, DevLake could figure out the current state of database schemas. - -## Execution - -Each plugin has a `migrationscripts` subpackage that lists all the migrations to be executed for that plugin. You -will need to add your migration to that list for the framework to pick it up. Similarly, there is such a package -for the framework-only migrations defined under the `models` package. - - -## How It Works -1. Check `migration_history` table, calculate all the migration scripts need to be executed. -2. Sort scripts by Version in ascending order. -3. Execute scripts. -4. Save results in the `migration_history` table. diff --git a/versioned_docs/version-v0.13/DeveloperManuals/Dal.md b/versioned_docs/version-v0.13/DeveloperManuals/Dal.md deleted file mode 100644 index 9b085425ae2..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/Dal.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Dal" -sidebar_position: 5 -description: > - The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12 ---- - -## Summary - -The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12. The advantages of introducing this isolation are: - - - Unit Test: Mocking an Interface is easier and more reliable than Patching a Pointer. - - Clean Code: DBS operations are more consistence than using `gorm ` directly. - - Replaceable: It would be easier to replace `gorm` in the future if needed. - -## The Dal Interface - -```go -type Dal interface { - AutoMigrate(entity interface{}, clauses ...Clause) error - Exec(query string, params ...interface{}) error - RawCursor(query string, params ...interface{}) (*sql.Rows, error) - Cursor(clauses ...Clause) (*sql.Rows, error) - Fetch(cursor *sql.Rows, dst interface{}) error - All(dst interface{}, clauses ...Clause) error - First(dst interface{}, clauses ...Clause) error - Count(clauses ...Clause) (int64, error) - Pluck(column string, dest interface{}, clauses ...Clause) error - Create(entity interface{}, clauses ...Clause) error - Update(entity interface{}, clauses ...Clause) error - CreateOrUpdate(entity interface{}, clauses ...Clause) error - CreateIfNotExist(entity interface{}, clauses ...Clause) error - Delete(entity interface{}, clauses ...Clause) error - AllTables() ([]string, error) -} -``` - - -## How to use - -### Query -```go -// Get a database cursor -user := &models.User{} -cursor, err := db.Cursor( - dal.From(user), - dal.Where("department = ?", "R&D"), - dal.Orderby("id DESC"), -) -if err != nil { - return err -} -for cursor.Next() { - err = dal.Fetch(cursor, user) // fetch one record at a time - ... -} - -// Get a database cursor by raw sql query -cursor, err := db.Raw("SELECT * FROM users") - -// USE WITH CAUTIOUS: loading a big table at once is slow and dangerous -// Load all records from database at once. -users := make([]models.Users, 0) -err := db.All(&users, dal.Where("department = ?", "R&D")) - -// Load a column as Scalar or Slice -var email string -err := db.Pluck("email", &username, dal.Where("id = ?", 1)) -var emails []string -err := db.Pluck("email", &emails) - -// Execute query -err := db.Exec("UPDATE users SET department = ? WHERE department = ?", "Research & Development", "R&D") -``` - -### Insert -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Update -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assumming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` -### Insert or Update -```go -err := db.CreateOrUpdate(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Insert if record(by PrimaryKey) didn't exist -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Delete -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primary key -}) -``` - -### DDL and others -```go -// Returns all table names -allTables, err := db.AllTables() - -// Automigrate: create/add missing table/columns -// Note: it won't delete any existing columns, nor does it update the column definition -err := db.AutoMigrate(&models.User{}) -``` - -## How to do Unit Test -First, run the command `make mock` to generate the Mocking Stubs, the generated source files should appear in `mocks` folder. -``` -mocks -├── ApiResourceHandler.go -├── AsyncResponseHandler.go -├── BasicRes.go -├── CloseablePluginTask.go -├── ConfigGetter.go -├── Dal.go -├── DataConvertHandler.go -├── ExecContext.go -├── InjectConfigGetter.go -├── InjectLogger.go -├── Iterator.go -├── Logger.go -├── Migratable.go -├── PluginApi.go -├── PluginBlueprintV100.go -├── PluginInit.go -├── PluginMeta.go -├── PluginTask.go -├── RateLimitedApiClient.go -├── SubTaskContext.go -├── SubTaskEntryPoint.go -├── SubTask.go -└── TaskContext.go -``` -With these Mocking stubs, you may start writing your TestCases using the `mocks.Dal`. -```go -import "github.com/apache/incubator-devlake/mocks" - -func TestCreateUser(t *testing.T) { - mockDal := new(mocks.Dal) - mockDal.On("Create", mock.Anything, mock.Anything).Return(nil).Once() - userService := &services.UserService{ - Dal: mockDal, - } - userService.Post(map[string]interface{}{ - "email": "helle@example.com", - "name": "hello", - "department": "R&D", - }) - mockDal.AssertExpectations(t) -``` - diff --git a/versioned_docs/version-v0.13/DeveloperManuals/DeveloperSetup.md b/versioned_docs/version-v0.13/DeveloperManuals/DeveloperSetup.md deleted file mode 100644 index ef7ffa2a083..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/DeveloperSetup.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: "Developer Setup" -description: > - The steps to install DevLake in developer mode. -sidebar_position: 1 ---- - - -## Requirements - -- Docker v19.03.10+ -- Golang v1.19+ -- Make - - Mac (Already installed) - - Windows: [Download](http://gnuwin32.sourceforge.net/packages/make.htm) - - Ubuntu: `sudo apt-get install build-essential libssl-dev` - -## How to setup dev environment - -The following guide will walk you through the procedure to run local config-ui and devlake servers against dockerized -MySQL and Grafana containers. - -1. Navigate to where you would like to install this project and clone the repository: - - ```sh - git clone https://github.com/apache/incubator-devlake - cd incubator-devlake - ``` - -2. Install dependencies for plugins: - - - [RefDiff](../Plugins/refdiff.md#development) - -3. Install Go packages - - ```sh - go get - ``` - -4. Copy the sample config file to new local file: - - ```sh - cp .env.example .env - ``` - -5. Update the following variables in the file `.env`: - - * `DB_URL`: Replace `mysql:3306` with `127.0.0.1:3306` - -6. Start the MySQL and Grafana containers: - - > Make sure the Docker daemon is running before this step. - - ```sh - docker-compose up -d mysql grafana - ``` - -7. Run lake and config UI in dev mode in two separate terminals: - - ```sh - # install mockery - go install github.com/vektra/mockery/v2@latest - # generate mocking stubs - make mock - # run lake - make dev - # run config UI - make configure-dev - ``` - - Q: I got an error saying: `libgit2.so.1.3: cannot open share object file: No such file or directory` - - A: This library is needed by the git-extractor plugin. Make sure your program can find `libgit2.so.1.3`. `LD_LIBRARY_PATH` can be assigned like this if your `libgit2.so.1.3` is located at `/usr/local/lib`: - - ```sh - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib - ``` - - Note that the version has to be pinned to 1.3.0. If you don't have it, you may need to build it manually with CMake from [source](https://github.com/libgit2/libgit2/releases/tag/v1.3.0). - -8. Visit config UI at `localhost:4000` to configure data connections. - - Please follow the [tutorial](UserManuals/ConfigUI/Tutorial.md) - - Submit the form to update the values by clicking on the **Save Connection** button on each form page - -9. Visit `localhost:4000/pipelines/create` to RUN a Pipeline and trigger data collection. - - - Pipelines Runs can be initiated by the new "Create Run" Interface. Simply enable the **Data Connection Providers** you wish to run collection for, and specify the data you want to collect, for instance, **Project ID** for Gitlab and **Repository Name** for GitHub. - - Once a valid pipeline configuration has been created, press **Create Run** to start/run the pipeline. - After the pipeline starts, you will be automatically redirected to the **Pipeline Activity** screen to monitor collection activity. - - **Pipelines** is accessible from the main menu of the config-ui for easy access. - - - Manage All Pipelines: `http://localhost:4000/pipelines` - - Create Pipeline RUN: `http://localhost:4000/pipelines/create` - - Track Pipeline Activity: `http://localhost:4000/pipelines/activity/[RUN_ID]` - - For advanced use cases and complex pipelines, please use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - - ```json - [ - [ - { - "plugin": "github", - "options": { - "repo": "lake", - "owner": "merico-dev" - } - } - ] - ] - ``` - - Please refer to [Pipeline Advanced Mode](../UserManuals/ConfigUI/AdvancedMode.md) for in-depth explanation. - - -10. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use Grafana as a visualization tool to build charts for the data stored in our database. Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - -11. (Optional) To run the tests: - - ```sh - make test - ``` - -12. For DB migrations, please refer to [Migration Doc](../DeveloperManuals/DBMigration.md). - diff --git a/versioned_docs/version-v0.13/DeveloperManuals/E2E-Test-Guide.md b/versioned_docs/version-v0.13/DeveloperManuals/E2E-Test-Guide.md deleted file mode 100644 index 9e28fef1218..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/E2E-Test-Guide.md +++ /dev/null @@ -1,212 +0,0 @@ ---- -title: "E2E Test Guide" -description: > - The steps to write E2E tests for plugins. ---- - -# How to write E2E tests for plugins - -## Why write E2E tests - -E2E testing, as a part of automated testing, generally refers to black-box testing at the file and module level or unit testing that allows the use of some external services such as databases. The purpose of writing E2E tests is to shield some internal implementation logic and see whether the same external input can output the same result in terms of data aspects. In addition, compared to the black-box integration tests, it can avoid some chance problems caused by network and other factors. More information about the plugin can be found here: Why write E2E tests (incomplete). -In DevLake, E2E testing consists of interface testing and input/output result validation for the plugin Extract/Convert subtask. This article only describes the process of writing the latter. As the Collectors invoke external -services we typically do not write E2E tests for them. - -## Preparing data - -Let's take a simple plugin - Feishu Meeting Hours Collection as an example here. Its directory structure looks like this. -![image](https://user-images.githubusercontent.com/3294100/175061114-53404aac-16ca-45d1-a0ab-3f61d84922ca.png) -Next, we will write the E2E tests of the sub-tasks. - -The first step in writing the E2E test is to run the Collect task of the corresponding plugin to complete the data collection; that is, to have the corresponding data saved in the table starting with `_raw_feishu_` in the database. -This data will be presumed to be the "source of truth" for our tests. Here are the logs and database tables using the DirectRun (cmd) run method. -``` -$ go run plugins/feishu/main.go --numOfDaysToCollect 2 --connectionId 1 (Note: command may change with version upgrade) -[2022-06-22 23:03:29] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-22 23:03:29] INFO [feishu] start plugin -[2022-06-22 23:03:33] INFO [feishu] scheduler for api https://open.feishu.cn/open-apis/vc/v1 worker: 13, request: 10000, duration: 1h0m0s -[2022-06-22 23:03:33] INFO [feishu] total step: 2 -[2022-06-22 23:03:33] INFO [feishu] executing subtask collectMeetingTopUserItem -[2022-06-22 23:03:33] INFO [feishu] [collectMeetingTopUserItem] start api collection -[2022-06-22 23:03:34] INFO [feishu] [collectMeetingTopUserItem] finished records: 1 -[2022-06-22 23:03:34] INFO [feishu] [collectMeetingTopUserItem] end api collection error: %!w() -[2022-06-22 23:03:34] INFO [feishu] finished step: 1 / 2 -[2022-06-22 23:03:34] INFO [feishu] executing subtask extractMeetingTopUserItem -[2022-06-22 23:03:34] INFO [feishu] [extractMeetingTopUserItem] get data from _raw_feishu_meeting_top_user_item where params={"connectionId":1} and got 148 -[2022-06-22 23:03:34] INFO [feishu] [extractMeetingTopUserItem] finished records: 1 -[2022-06-22 23:03:34] INFO [feishu] finished step: 2 / 2 -``` - -image -Ok, the data has now been saved to the `_raw_feishu_*` table, and the `data` column is the return information from the plugin. Here we only collected data for the last 2 days. The data information is not much, but it also covers a variety of situations. That is, the same person has data on different days. - -It is also worth mentioning that the plugin runs two tasks, `collectMeetingTopUserItem` and `extractMeetingTopUserItem`. The former is the task of collecting, which is needed to run this time, and the latter is the task of extracting data. It doesn't matter whether the extractor runs in the prepared data session. - -Next, we need to export the data to .csv format. This step can be done in a variety of different ways - you can show your skills. I will only introduce a few common methods here. - -### DevLake Code Generator Export - -Run `go run generator/main.go create-e2e-raw` directly and follow the guidelines to complete the export. This solution is the simplest, but has some limitations, such as the exported fields being fixed. You can refer to the next solutions if you need more customisation options. - -![usage](https://user-images.githubusercontent.com/3294100/175849225-12af5251-6181-4cd9-ba72-26087b05ee73.gif) - -### GoLand Database export - -![image](https://user-images.githubusercontent.com/3294100/175067303-7e5e1c4d-2430-4eb5-ad00-e38d86bbd108.png) - -This solution is very easy to use and will not cause problems using Postgres or MySQL. -![image](https://user-images.githubusercontent.com/3294100/175068178-f1c1c290-e043-4672-b43e-54c4b954c685.png) -The success criteria for csv export is that the go program can read it without errors, so several points are worth noticing. - -1. the values in the csv file should be wrapped in double quotes to avoid special symbols such as commas in the values that break the csv format -2. double quotes in csv files are escaped. generally `""` represents a double quote -3. pay attention to whether the column `data` is the actual value, not the value after base64 or hex - -After exporting, move the .csv file to `plugins/feishu/e2e/raw_tables/_raw_feishu_meeting_top_user_item.csv`. - -### MySQL Select Into Outfile - -This is MySQL's solution for exporting query results to a file. The MySQL currently started in docker-compose.yml comes with the --security parameter, so it does not allow `select ... into outfile`. The first step is to turn off the security parameter, which is done roughly as follows. -![origin_img_v2_c809c901-01bc-4ec9-b52a-ab4df24c376g](https://user-images.githubusercontent.com/3294100/175070770-9b7d5b75-574b-49ed-9bca-e9f611f60795.jpg) -After closing it, use `select ... into outfile` to export the csv file. The export result is rough as follows. -![origin_img_v2_ccfdb260-668f-42b4-b249-6c2dd45816ag](https://user-images.githubusercontent.com/3294100/175070866-2204ae13-c058-4a16-bc20-93ab7c95f832.jpg) -Notice that the data field has extra hexsha fields, which need to be manually converted to literal quantities. - -### Vscode Database - -This is Vscode's solution for exporting query results to a file, but it is not easy to use. Here is the export result without any configuration changes -![origin_img_v2_c9eaadaa-afbc-4c06-85bc-e78235f7eb3g](https://user-images.githubusercontent.com/3294100/175071987-760c2537-240c-4314-bbd6-1a0cd85ddc0f.jpg) -However, it is obvious that the escape symbol does not conform to the csv specification, and the data is not successfully exported. After adjusting the configuration and manually replacing `\"` with `""`, we get the following result. -![image](https://user-images.githubusercontent.com/3294100/175072314-954c6794-3ebd-45bb-98e7-60ddbb5a7da9.png) -The data field of this file is encoded in base64, so it needs to be decoded manually to a literal amount before using it. - -### MySQL workerbench - -This tool must write the SQL yourself to complete the data export, which can be rewritten by imitating the following SQL. -```sql -SELECT id, params, CAST(`data` as char) as data, url, input,created_at FROM _raw_feishu_meeting_top_user_item; -``` -![image](https://user-images.githubusercontent.com/3294100/175080866-1631a601-cbe6-40c0-9d3a-d23ca3322a50.png) -Select csv as the save format and export it for use. - -### Postgres Copy with csv header - -`Copy(SQL statement) to '/var/lib/postgresql/data/raw.csv' with csv header;` is a common export method for PG to export csv, which can also be used here. -```sql -COPY ( -SELECT id, params, convert_from(data, 'utf-8') as data, url, input,created_at FROM _raw_feishu_meeting_top_user_item -) to '/var/lib/postgresql/data/raw.csv' with csv header; -``` -Use the above statement to complete the export of the file. If pg runs in docker, just use the command `docker cp` to export the file to the host. - -## Writing E2E tests - -First, create a test environment. For example, let's create `meeting_test.go`. -![image](https://user-images.githubusercontent.com/3294100/175091380-424974b9-15f3-457b-af5c-03d3b5d17e73.png) -Then enter the test preparation code in it as follows. The code is to create an instance of the `feishu` plugin and then call `ImportCsvIntoRawTable` to import the data from the csv file into the `_raw_feishu_meeting_top_user_item` table. - -```go -func TestMeetingDataFlow(t *testing.T) { - var plugin impl.Feishu - dataflowTester := e2ehelper.NewDataFlowTester(t, "feishu", plugin) - - // import raw data table - dataflowTester.ImportCsvIntoRawTable("./raw_tables/_raw_feishu_meeting_top_user_item.csv", "_raw_feishu_meeting_top_user_item") -} -``` -The signature of the import function is as follows. -```func (t *DataFlowTester) ImportCsvIntoRawTable(csvRelPath string, rawTableName string)``` -It has a twin, with only slight differences in parameters. -```func (t *DataFlowTester) ImportCsvIntoTabler(csvRelPath string, dst schema.Tabler)``` -The former is used to import tables in the raw layer. The latter is used to import arbitrary tables. -**Note:** These two functions will delete the db table and use `gorm.AutoMigrate` to re-create a new table to clear data in it. -After importing the data is complete, run this tester and it must be PASS without any test logic at this moment. Then write the logic for calling the call to the extractor task in `TestMeetingDataFlow`. - -```go -func TestMeetingDataFlow(t *testing.T) { - var plugin impl.Feishu - dataflowTester := e2ehelper.NewDataFlowTester(t, "feishu", plugin) - - taskData := &tasks.FeishuTaskData{ - Options: &tasks.FeishuOptions{ - ConnectionId: 1, - }, - } - - // import raw data table - dataflowTester.ImportCsvIntoRawTable("./raw_tables/_raw_feishu_meeting_top_user_item.csv", "_raw_feishu_meeting_top_user_item") - - // verify extraction - dataflowTester.FlushTabler(&models.FeishuMeetingTopUserItem{}) - dataflowTester.Subtask(tasks.ExtractMeetingTopUserItemMeta, taskData) - -} -``` -The added code includes a call to `dataflowTester.FlushTabler` to clear the table `_tool_feishu_meeting_top_user_items` and a call to `dataflowTester.Subtask` to simulate the running of the subtask `ExtractMeetingTopUserItemMeta`. - -Now run it and see if the subtask `ExtractMeetingTopUserItemMeta` completes without errors. The data results of the `extract` run generally come from the raw table, so the plugin subtask will run correctly if written without errors. We can observe if the data is successfully parsed in the db table in the tool layer. In this case the `_tool_feishu_meeting_top_user_items` table has the correct data. - -If the run is incorrect, maybe you can troubleshoot the problem with the plugin itself before moving on to the next step. - -## Verify that the results of the task are correct - -Let's continue writing the test and add the following code at the end of the test function -```go -func TestMeetingDataFlow(t *testing.T) { - ...... - - dataflowTester.VerifyTable( - models.FeishuMeetingTopUserItem{}, - "./snapshot_tables/_tool_feishu_meeting_top_user_items.csv", - []string{ - "meeting_count", - "meeting_duration", - "user_type", - "_raw_data_params", - "_raw_data_table", - "_raw_data_id", - "_raw_data_remark", - }, - ) -} -``` -Its purpose is to call `dataflowTester.VerifyTable` to complete the validation of the data results. The third parameter is all the fields of the table that need to be verified. -The data used for validation exists in `. /snapshot_tables/_tool_feishu_meeting_top_user_items.csv`, but of course, this file does not exist yet. - -There is a twin, more generalized function, that could be used instead: -```go -dataflowTester.VerifyTableWithOptions(models.FeishuMeetingTopUserItem{}, - dataflowTester.TableOptions{ - CSVRelPath: "./snapshot_tables/_tool_feishu_meeting_top_user_items.csv" - }, - ) - -``` -The above usage will default to validating against all fields of the ```models.FeishuMeetingTopUserItem``` model. There are additional fields on ```TableOptions``` that can be specified -to limit which fields on that model to perform validation on. - -To facilitate the generation of the file mentioned above, DevLake has adopted a testing technique called `Snapshot`, which will automatically generate the file based on the run results when the `VerifyTable` or `VerifyTableWithOptions` functions are called without the csv existing. - -But note! Please do two things after the snapshot is created: 1. check if the file is generated correctly 2. re-run it to make sure there are no errors between the generated results and the re-run results. -These two operations are critical and directly related to the quality of test writing. We should treat the snapshot file in `.csv' format like a code file. - -If there is a problem with this step, there are usually 2 ways to solve it. -1. The validated fields contain fields like create_at runtime or self-incrementing ids, which cannot be repeatedly validated and should be excluded. -2. there is `\n` or `\r\n` or other escape mismatch fields in the run results. Generally, when parsing the `httpResponse` error, you can follow these solutions: - 1. modify the field type of the content in the api model to `json. - 2. convert it to string when parsing - 3. so that the `\n` symbol can be kept intact, avoiding the parsing of line breaks by the database or the operating system - - -For example, in the `github` plugin, this is how it is handled. -![image](https://user-images.githubusercontent.com/3294100/175098219-c04b810a-deaf-4958-9295-d5ad4ec152e6.png) -![image](https://user-images.githubusercontent.com/3294100/175098273-e4a18f9a-51c8-4637-a80c-3901a3c2934e.png) - -Well, at this point, the E2E writing is done. We have added a total of 3 new files to complete the testing of the meeting length collection task. It's pretty easy. -![image](https://user-images.githubusercontent.com/3294100/175098574-ae6c7fb7-7123-4d80-aa85-790b492290ca.png) - -## Run E2E tests for all plugins like CI - -It's straightforward. Just run `make e2e-plugins` because DevLake has already solidified it into a script~ - diff --git a/versioned_docs/version-v0.13/DeveloperManuals/Notifications.md b/versioned_docs/version-v0.13/DeveloperManuals/Notifications.md deleted file mode 100644 index 23456b4f1e7..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/Notifications.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Notifications" -description: > - Notifications -sidebar_position: 4 ---- - -## Request -Example request -``` -POST /lake/notify?nouce=3-FDXxIootApWxEVtz&sign=424c2f6159bd9e9828924a53f9911059433dc14328a031e91f9802f062b495d5 - -{"TaskID":39,"PluginName":"jenkins","CreatedAt":"2021-09-30T15:28:00.389+08:00","UpdatedAt":"2021-09-30T15:28:00.785+08:00"} -``` - -## Configuration -If you want to use the notification feature, you should add two configuration key to `.env` file. -```shell -# .env -# notification request url, e.g.: http://example.com/lake/notify -NOTIFICATION_ENDPOINT= -# secret is used to calculate signature -NOTIFICATION_SECRET= -``` - -## Signature -You should check the signature before accepting the notification request. We use sha256 algorithm to calculate the checksum. -```go -// calculate checksum -sum := sha256.Sum256([]byte(requestBody + NOTIFICATION_SECRET + nouce)) -return hex.EncodeToString(sum[:]) -``` diff --git a/versioned_docs/version-v0.13/DeveloperManuals/PluginImplementation.md b/versioned_docs/version-v0.13/DeveloperManuals/PluginImplementation.md deleted file mode 100644 index b478a4ddd43..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/PluginImplementation.md +++ /dev/null @@ -1,339 +0,0 @@ ---- -title: "Plugin Implementation" -sidebar_position: 2 -description: > - Plugin Implementation ---- - -If your favorite DevOps tool is not yet supported by DevLake, don't worry. It's not difficult to implement a DevLake plugin. In this post, we'll go through the basics of DevLake plugins and build an example plugin from scratch together. - -## What is a plugin? - -A DevLake plugin is a shared library built with Go's `plugin` package that hooks up to DevLake core at run-time. - -A plugin may extend DevLake's capability in three ways: - -1. Integrating with new data sources -2. Transforming/enriching existing data -3. Exporting DevLake data to other data systems - - -## How do plugins work? - -A plugin mainly consists of a collection of subtasks that can be executed by DevLake core. For data source plugins, a subtask may be collecting a single entity from the data source (e.g., issues from Jira). Besides the subtasks, there're hooks that a plugin can implement to customize its initialization, migration, and more. See below for a list of the most important interfaces: - -1. [PluginMeta](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_meta.go) contains the minimal interface that a plugin should implement, with only two functions - - Description() returns the description of a plugin - - RootPkgPath() returns the root package path of a plugin -2. [PluginInit](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_init.go) allows a plugin to customize its initialization -3. [PluginTask](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_task.go) enables a plugin to prepare data prior to subtask execution -4. [PluginApi](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_api.go) lets a plugin exposes some self-defined APIs -5. [PluginMigration](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_migration.go) is where a plugin manages its database migrations -6. [PluginModel](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_model.go) allows other plugins to get the model information of all database tables of the current plugin through the GetTablesInfo() method.If you need to access Domain Layer Models,please visit [DomainLayerSchema](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/) - -The diagram below shows the control flow of executing a plugin: - -```mermaid -flowchart TD; - subgraph S4[Step4 sub-task extractor running process]; - direction LR; - D4[DevLake]; - D4 -- "Step4.1 create a new\n ApiExtractor\n and execute it" --> E["ExtractXXXMeta.\nEntryPoint"]; - E <-- "Step4.2 read from\n raw table" --> E2["RawDataSubTaskArgs\n.Table"]; - E -- "Step4.3 call with RawData" --> ApiExtractor.Extract; - ApiExtractor.Extract -- "decode and return gorm models" --> E - end - subgraph S3[Step3 sub-task collector running process] - direction LR - D3[DevLake] - D3 -- "Step3.1 create a new\n ApiCollector\n and execute it" --> C["CollectXXXMeta.\nEntryPoint"]; - C <-- "Step3.2 create\n raw table" --> C2["RawDataSubTaskArgs\n.RAW_BBB_TABLE"]; - C <-- "Step3.3 build query\n before sending requests" --> ApiCollectorArgs.\nQuery/UrlTemplate; - C <-. "Step3.4 send requests by ApiClient \n and return HTTP response" .-> A1["HTTP APIs"]; - C <-- "Step3.5 call and \nreturn decoded data \nfrom HTTP response" --> ResponseParser; - end - subgraph S2[Step2 DevLake register custom plugin] - direction LR - D2[DevLake] - D2 <-- "Step2.1 function \`Init\` \nneed to do init jobs" --> plugin.Init; - D2 <-- "Step2.2 (Optional) call \nand return migration scripts" --> plugin.MigrationScripts; - D2 <-- "Step2.3 (Optional) call \nand return taskCtx" --> plugin.PrepareTaskData; - D2 <-- "Step2.4 call and \nreturn subTasks for execting" --> plugin.SubTaskContext; - end - subgraph S1[Step1 Run DevLake] - direction LR - main -- "Transfer of control \nby \`runner.DirectRun\`" --> D1[DevLake]; - end - S1-->S2-->S3-->S4 -``` -There's a lot of information in the diagram but we don't expect you to digest it right away, simply use it as a reference when you go through the example below. - -## A step-by-step guide towards your first plugin - -In this section, we will describe how to create a data collection plugin from scratch. The data to be collected is the information about all Committers and Contributors of the Apache project, in order to check whether they have signed the CLA. We are going to - -* request `https://people.apache.org/public/icla-info.json` to get the Committers' information -* request the `mailing list` to get the Contributors' information - -We will focus on demonstrating how to request and cache information about all Committers through the Apache API and extract structured data from it. The collection of Contributors will only be briefly described. - -### Step 1: Bootstrap the new plugin - -**Note:** Please make sure you have DevLake up and running before proceeding. - -> More info about plugin: -> Generally, we need these folders in plugin folders: `api`, `models` and `tasks` -> `api` interacts with `config-ui` for test/get/save connection of data source -> - connection [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/api/connection.go) -> - connection model [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/models/connection.go) -> `models` stores all `data entities` and `data migration scripts`. -> - entity -> - data migrations [template](https://github.com/apache/incubator-devlake/tree/main/generator/template/migrationscripts) -> `tasks` contains all of our `sub tasks` for a plugin -> - task data [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data.go-template) -> - api client [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data_with_api_client.go-template) - -Don't worry if you cannot figure out what these concepts mean immediately. We'll explain them one by one later. - -DevLake provides a generator to create a plugin conveniently. Let's scaffold our new plugin by running `go run generator/main.go create-plugin icla`, which would ask for `with_api_client` and `Endpoint`. - -* `with_api_client` is used for choosing if we need to request HTTP APIs by api_client. -* `Endpoint` use in which site we will request, in our case, it should be `https://people.apache.org/`. - -![](https://i.imgur.com/itzlFg7.png) - -Now we have three files in our plugin. `api_client.go` and `task_data.go` are in subfolder `tasks/`. -![plugin files](https://i.imgur.com/zon5waf.png) - -Have a try to run this plugin by function `main` in `plugin_main.go`. When you see result like this: -``` -$go run plugins/icla/plugin_main.go -[2022-06-02 18:07:30] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-02 18:07:30] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-02 18:07:30] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-02 18:07:30] INFO [icla] total step: 0 -``` -How exciting. It works! The plugin defined and initiated in `plugin_main.go` use some options in `task_data.go`. They are made up as the most straightforward plugin in Apache DevLake, and `api_client.go` will be used in the next step to request HTTP APIs. - -### Step 2: Create a sub-task for data collection -Before we start, it is helpful to know how collection task is executed: -1. First, Apache DevLake would call `plugin_main.PrepareTaskData()` to prepare needed data before any sub-tasks. We need to create an API client here. -2. Then Apache DevLake will call the sub-tasks returned by `plugin_main.SubTaskMetas()`. Sub-task is an independent task to do some job, like requesting API, processing data, etc. - -> Each sub-task must be defined as a SubTaskMeta, and implement SubTaskEntryPoint of SubTaskMeta. SubTaskEntryPoint is defined as -> ```go -> type SubTaskEntryPoint func(c SubTaskContext) error -> ``` -> More info at: https://devlake.apache.org/blog/how-DevLake-is-up-and-running/ - -#### Step 2.1: Create a sub-task(Collector) for data collection - -Let's run `go run generator/main.go create-collector icla committer` and confirm it. This sub-task is activated by registering in `plugin_main.go/SubTaskMetas` automatically. - -![](https://i.imgur.com/tkDuofi.png) - -> - Collector will collect data from HTTP or other data sources, and save the data into the raw layer. -> - Inside the func `SubTaskEntryPoint` of `Collector`, we use `helper.NewApiCollector` to create an object of [ApiCollector](https://github.com/apache/incubator-devlake/blob/main/backend/generator/template/plugin/tasks/api_collector.go-template), then call `execute()` to do the job. - -Now you can notice `data.ApiClient` is initiated in `plugin_main.go/PrepareTaskData.ApiClient`. `PrepareTaskData` create a new `ApiClient`, which is a tool Apache DevLake suggests to request data from HTTP Apis. This tool support some valuable features for HttpApi, like rateLimit, proxy and retry. Of course, if you like, you may use the lib `http` instead, but it will be more tedious. - -Let's move forward to use it. - -1. To collect data from `https://people.apache.org/public/icla-info.json`, -we have filled `https://people.apache.org/` into `tasks/api_client.go/ENDPOINT` in Step 1. - -![](https://i.imgur.com/q8Zltnl.png) - -2. Fill `public/icla-info.json` into `UrlTemplate`, delete the unnecessary iterator and add `println("receive data:", res)` in `ResponseParser` to see if collection was successful. - -![](https://i.imgur.com/ToLMclH.png) - -Ok, now the collector sub-task has been added to the plugin, and we can kick it off by running `main` again. If everything goes smoothly, the output should look like this: -```bash -[2022-06-06 12:24:52] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 12:24:52] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 12:24:52] INFO [icla] total step: 1 -[2022-06-06 12:24:52] INFO [icla] executing subtask CollectCommitter -[2022-06-06 12:24:52] INFO [icla] [CollectCommitter] start api collection -receive data: 0x140005763f0 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 12:24:55] INFO [icla] finished step: 1 / 1 -``` - -Great! Now we can see data pulled from the server without any problem. The last step is to decode the response body in `ResponseParser` and return it to the framework, so it can be stored in the database. -```go -ResponseParser: func(res *http.Response) ([]json.RawMessage, error) { - body := &struct { - LastUpdated string `json:"last_updated"` - Committers json.RawMessage `json:"committers"` - }{} - err := helper.UnmarshalResponse(res, body) - if err != nil { - return nil, err - } - println("receive data:", len(body.Committers)) - return []json.RawMessage{body.Committers}, nil -}, - -``` -Ok, run the function `main` once again, then it turned out like this, and we should be able see some records show up in the table `_raw_icla_committer`. -```bash -…… -receive data: 272956 /* <- the number means 272956 models received */ -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 13:46:57] INFO [icla] finished step: 1 / 1 -``` - -![](https://i.imgur.com/aVYNMRr.png) - -#### Step 2.2: Create a sub-task(Extractor) to extract data from the raw layer - -> - Extractor will extract data from raw layer and save it into tool db table. -> - Except for some pre-processing, the main flow is similar to the collector. - -We have already collected data from HTTP API and saved them into the DB table `_raw_XXXX`. In this step, we will extract the names of committers from the raw data. As you may infer from the name, raw tables are temporary and not easy to use directly. - -Now Apache DevLake suggests to save data by [gorm](https://gorm.io/docs/index.html), so we will create a model by gorm and add it into `plugin_main.go/AutoMigrate()`. - -plugins/icla/models/committer.go -```go -package models - -import ( - "github.com/apache/incubator-devlake/models/common" -) - -type IclaCommitter struct { - UserName string `gorm:"primaryKey;type:varchar(255)"` - Name string `gorm:"primaryKey;type:varchar(255)"` - common.NoPKModel -} - -func (IclaCommitter) TableName() string { - return "_tool_icla_committer" -} -``` - -plugins/icla/plugin_main.go -![](https://i.imgur.com/4f0zJty.png) - - -Ok, run the plugin, and table `_tool_icla_committer` will be created automatically just like the snapshot below: -![](https://i.imgur.com/7Z324IX.png) - -Next, let's run `go run generator/main.go create-extractor icla committer` and type in what the command prompt asks for to create a new sub-task. - -![](https://i.imgur.com/UyDP9Um.png) - -Let's look at the function `extract` in `committer_extractor.go` created just now, and the code that needs to be written here. It's obvious that `resData.data` is the raw data, so we could json-decode each row add a new `IclaCommitter` for each and save them. -```go -Extract: func(resData *helper.RawData) ([]interface{}, error) { - names := &map[string]string{} - err := json.Unmarshal(resData.Data, names) - if err != nil { - return nil, err - } - extractedModels := make([]interface{}, 0) - for userName, name := range *names { - extractedModels = append(extractedModels, &models.IclaCommitter{ - UserName: userName, - Name: name, - })fco - } - return extractedModels, nil -}, -``` - -Ok, run it then we get: -``` -[2022-06-06 15:39:40] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 15:39:40] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 15:39:40] INFO [icla] total step: 2 -[2022-06-06 15:39:40] INFO [icla] executing subtask CollectCommitter -[2022-06-06 15:39:40] INFO [icla] [CollectCommitter] start api collection -receive data: 272956 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 15:39:44] INFO [icla] finished step: 1 / 2 -[2022-06-06 15:39:44] INFO [icla] executing subtask ExtractCommitter -[2022-06-06 15:39:46] INFO [icla] [ExtractCommitter] finished records: 1 -[2022-06-06 15:39:46] INFO [icla] finished step: 2 / 2 -``` -Now committer data have been saved in _tool_icla_committer. -![](https://i.imgur.com/6svX0N2.png) - -#### Step 2.3: Convertor - -Notes: The goal of Converters is to create a vendor-agnostic model out of the vendor-dependent ones created by the Extractors. -They are not necessary to have per se, but we encourage it because converters and the domain layer will significantly help with building dashboards. More info about the domain layer [here](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/). - -In short: - -> - Convertor will convert data from the tool layer and save it into the domain layer. -> - We use `helper.NewDataConverter` to create an object of DataConvertor, then call `execute()`. - -#### Step 2.4: Let's try it -Sometimes OpenApi will be protected by token or other auth types, and we need to log in to gain a token to visit it. For example, only after logging in `private@apahce.com` could we gather the data about contributors signing ICLA. Here we briefly introduce how to authorize DevLake to collect data. - -Let's look at `api_client.go`. `NewIclaApiClient` load config `ICLA_TOKEN` by `.env`, so we can add `ICLA_TOKEN=XXXXXX` in `.env` and use it in `apiClient.SetHeaders()` to mock the login status. Code as below: -![](https://i.imgur.com/dPxooAx.png) - -Of course, we can use `username/password` to get a token after login mockery. Just try and adjust according to the actual situation. - -Look for more related details at https://github.com/apache/incubator-devlake - -#### Step 2.5: Implement the GetTablesInfo() method of the PluginModel interface - -As shown in the following gitlab plugin example, -add all models that need to be accessed by external plugins to the return value. - -```go -var _ core.PluginModel = (*Gitlab)(nil) - -func (plugin Gitlab) GetTablesInfo() []core.Tabler { - return []core.Tabler{ - &models.GitlabConnection{}, - &models.GitlabAccount{}, - &models.GitlabCommit{}, - &models.GitlabIssue{}, - &models.GitlabIssueLabel{}, - &models.GitlabJob{}, - &models.GitlabMergeRequest{}, - &models.GitlabMrComment{}, - &models.GitlabMrCommit{}, - &models.GitlabMrLabel{}, - &models.GitlabMrNote{}, - &models.GitlabPipeline{}, - &models.GitlabProject{}, - &models.GitlabProjectCommit{}, - &models.GitlabReviewer{}, - &models.GitlabTag{}, - } -} -``` - -You can use it as follows: - -```go -if pm, ok := plugin.(core.PluginModel); ok { - tables := pm.GetTablesInfo() - for _, table := range tables { - // do something - } -} - -``` - -#### Final step: Submit the code as open source code -We encourage ideas and contributions ~ Let's use migration scripts, domain layers and other discussed concepts to write normative and platform-neutral code. More info at [here](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema) or contact us for ebullient help. - - -## Done! - -Congratulations! The first plugin has been created! 🎖 diff --git a/versioned_docs/version-v0.13/DeveloperManuals/Release-SOP.md b/versioned_docs/version-v0.13/DeveloperManuals/Release-SOP.md deleted file mode 100644 index 9e020d4a8b3..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/Release-SOP.md +++ /dev/null @@ -1,111 +0,0 @@ -# Devlake release guide - -**Please make sure your public key was included in the https://downloads.apache.org/incubator/devlake/KEYS , if not, please update https://downloads.apache.org/incubator/devlake/KEYS first.** -## How to update KEYS -1. Clone the svn repository - ```shell - svn co https://dist.apache.org/repos/dist/dev/incubator/devlake - ``` -2. Append your public key to the KEYS file - ```shell - cd devlake - (gpg --list-sigs && gpg --armor --export ) >> KEYS - ``` -3. Upload - ```shell - svn add KEYS - svn commit -m "update KEYS" - svn cp https://dist.apache.org/repos/dist/dev/incubator/devlake/KEYS https://dist.apache.org/repos/dist/release/incubator/devlake/ -m "update KEYS" - ``` -We will use `v0.12.0` as an example to demonstrate the release process. - -## ASF Release Policy -https://www.apache.org/legal/release-policy.html -https://incubator.apache.org/guides/releasemanagement.html - -## Tools: -`gpg` creating and verifying the signature -`shasum` creating and verifying the checksum -`git` checkout and pack the codebase -`svn` uploading the code to the Apache code hosting server - -## Prepare -- Check against the Incubator Release Checklist -- Create folder `releases/lake-v0.12.0` and put the two files `docker-compose.yml` and `env.example` in there. -- Update the file `.github/ISSUE_TEMPLATE/bug-report.yml` to include the version `v0.12.0` - - -## Pack -- Checkout to the branch/commit - ```shell - git clone https://github.com/apache/incubator-devlake.git - cd incubator-devlake - git checkout 25b718a5cc0c6a782c441965e3cbbce6877747d0 - ``` - -- Tag the commit and push to origin - ```shell - git tag v0.12.0-rc2 - git push origin v0.12.0-rc2 - ``` - -- Pack the code - ```shell - git archive --format=tar.gz --output="/apache-devlake-0.12.0-incubating-src.tar.gz" --prefix="apache-devlake-0.12.0-incubating-src/" v0.12.0-rc2 - ``` -- Before proceeding to the next step, please make sure your public key was included in the https://downloads.apache.org/incubator/devlake/KEYS -- Create signature and checksum - ```shell - cd - gpg -s --armor --output apache-devlake-0.12.0-incubating-src.tar.gz.asc --detach-sig apache-devlake-0.12.0-incubating-src.tar.gz - shasum -a 512 apache-devlake-0.12.0-incubating-src.tar.gz > apache-devlake-0.12.0-incubating-src.tar.gz.sha512 - ``` -- Verify signature and checksum - ```shell - gpg --verify apache-devlake-0.12.0-incubating-src.tar.gz.asc apache-devlake-0.12.0-incubating-src.tar.gz - shasum -a 512 --check apache-devlake-0.12.0-incubating-src.tar.gz.sha512 - ``` -## Upload -- Clone the svn repository - ```shell - svn co https://dist.apache.org/repos/dist/dev/incubator/devlake - ``` -- Copy the files into the svn local directory - ```shell - cd devlake - mkdir -p 0.12.0-incubating-rc2 - cp /apache-devlake-0.12.0-incubating-src.tar.gz* 0.12.0-incubating-rc2/ - - Upload local files - svn add 0.12.0-incubating-rc2 - svn commit -m "add 0.12.0-incubating-rc2" - ``` -## Vote -1. Devlake community vote: - - Start the vote by sending an email to - [[VOTE] Release Apache DevLake (Incubating) v0.12.0-rc2](https://lists.apache.org/thread/yxy3kokhhhxlkxcr4op0pwslts7d8tcy) - - Announce the vote result - [[RESULT][VOTE] Release Apache DevLake (Incubating) v0.12.0-rc2](https://lists.apache.org/thread/qr3fj42tmryztt919jsy5q8hbpmcztky) - -2. Apache incubator community vote: - - Start the vote by sending an email to general@incubator.apache.org - [[VOTE] Release Apache DevLake (Incubating) v0.12.0-rc2](https://lists.apache.org/thread/0bjroykzcyoj7pnjt7gjh1v3yofm901o) - - Announce the vote result - [[RESULT][VOTE] Release Apache DevLake (Incubating) v0.12.0-rc2](https://lists.apache.org/thread/y2pqg0c2hhgp0pcqolv19s27db190xsh) - -## Release -### Apache -- Move the release to the ASF content distribution system - ```shell - svn mv https://dist.apache.org/repos/dist/dev/incubator/devlake/0.12.0-incubating-rc2 https://dist.apache.org/repos/dist/release/incubator/devlake/0.12.0-incubating -m "transfer packages for 0.12.0-incubating-rc2" - ``` -- Wait until the directory 0.12.0-incubating in https://downloads.apache.org/incubator/devlake/ was created -- Announce release by sending an email to general@incubator.apache.org - [[ANNOUNCE] Release Apache Devlake(incubating) 0.12.0-incubating](https://lists.apache.org/thread/7h6og1y6nhh4xr4r6rqbnswjoj3msxjk) -### GitHub -- Create tag v0.12.0 and push - ```shell - git checkout v0.12.0-rc2 - git tag v0.12.0 - git push origin v0.12.0 - ``` -- Create release v0.12.0 https://github.com/apache/incubator-devlake/releases/tag/v0.12.0 diff --git a/versioned_docs/version-v0.13/DeveloperManuals/TagNamingConventions.md b/versioned_docs/version-v0.13/DeveloperManuals/TagNamingConventions.md deleted file mode 100644 index 7195070f6cb..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/TagNamingConventions.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: "Tag Naming Conventions" -description: > - Tag Naming Conventions -sidebar_position: 6 ---- - -Please refer to the rules when creating a new tag for Apache DevLake -- alpha: internal testing/preview, i.e. v0.12.0-alpha1 -- beta: communtity/customer testing/preview, i.e. v0.12.0-beta1 -- rc: asf release candidate, i.e. v0.12.0-rc1 - - diff --git a/versioned_docs/version-v0.13/DeveloperManuals/_category_.json b/versioned_docs/version-v0.13/DeveloperManuals/_category_.json deleted file mode 100644 index f921ae47152..00000000000 --- a/versioned_docs/version-v0.13/DeveloperManuals/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Developer Manuals", - "position": 8, - "link":{ - "type": "generated-index", - "slug": "DeveloperManuals" - } -} diff --git a/versioned_docs/version-v0.13/GettingStarted/DockerComposeSetup.md b/versioned_docs/version-v0.13/GettingStarted/DockerComposeSetup.md deleted file mode 100644 index c17bb9f505b..00000000000 --- a/versioned_docs/version-v0.13/GettingStarted/DockerComposeSetup.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "Install via Docker Compose" -description: > - The steps to install DevLake via Docker Compose -sidebar_position: 1 ---- - - -## Prerequisites - -- [Docker v19.03.10+](https://docs.docker.com/get-docker) -- [docker-compose v2.2.3+](https://docs.docker.com/compose/install/) (If you have Docker Desktop installed then you already have the Compose plugin installed) - -## Launch DevLake - -- Commands written `like this` are to be run in your terminal. - -1. Download `docker-compose.yml` and `env.example` from [latest release page](https://github.com/apache/incubator-devlake/releases/latest) into a folder. -2. Rename `env.example` to `.env`. For Mac/Linux users, please run `mv env.example .env` in the terminal. This file contains the environment variables that the Devlake server will use. Additional ones can be found in the compose file(s). -3. Run `docker-compose up -d` to launch DevLake. - -## Configure and collect data - -1. Visit `config-ui` at `http://localhost:4000` in your browser to configure and collect data. - - Please follow the [tutorial](UserManuals/ConfigUI/Tutorial.md) - - `devlake` takes a while to fully boot up. if `config-ui` complaining about api being unreachable, please wait a few seconds and try refreshing the page. -2. Click *View Dashboards* button in the top left when done, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use [Grafana](https://grafana.com/) as a visualization tool to build charts for the [data](../SupportedDataSources.md) stored in our database. - - Using SQL queries, we can add panels to build, save, and edit customized dashboards. - - All the details on provisioning and customizing a dashboard can be found in the [Grafana Doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - - -## Upgrade to a newer version - -Support for database schema migration was introduced to DevLake in v0.10.0. From v0.10.0 onwards, users can upgrade their instance smoothly to a newer version. However, versions prior to v0.10.0 do not support upgrading to a newer version with a different database schema. We recommend users to deploy a new instance if needed. - -
diff --git a/versioned_docs/version-v0.13/GettingStarted/HelmSetup.md b/versioned_docs/version-v0.13/GettingStarted/HelmSetup.md deleted file mode 100644 index d1c83494da2..00000000000 --- a/versioned_docs/version-v0.13/GettingStarted/HelmSetup.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: "Install via Helm" -description: > - The steps to install Apache DevLake via Helm for Kubernetes -sidebar_position: 2 ---- - -## Prerequisites - -- Helm >= 3.6.0 -- Kubernetes >= 1.19.0 - - -## Quick Install - -clone the code, and enter the deployment/helm folder. -``` -helm install devlake . -``` - -And visit your devlake from the node port (32001 by default). - -http://YOUR-NODE-IP:32001 - - -## Some example deployments - -### Deploy with NodePort - -Conditions: - - IP Address of Kubernetes node: 192.168.0.6 - - Want to visit devlake with port 30000. - -``` -helm install devlake . --set service.uiPort=30000 -``` - -After deployed, visit devlake: http://192.168.0.6:30000 - -### Deploy with Ingress - -Conditions: - - I have already configured default ingress for the Kubernetes cluster - - I want to use http://devlake.example.com for visiting devlake - -``` -helm install devlake . --set "ingress.enabled=true,ingress.hostname=devlake.example.com" -``` - -After deployed, visit devlake: http://devlake.example.com, and grafana at http://devlake.example.com/grafana - -### Deploy with Ingress (Https) - -Conditions: - - I have already configured ingress(class: nginx) for the Kubernetes cluster, and the https using 8443 port. - - I want to use https://devlake-0.example.com:8443 for visiting devlake. - - The https certificates are generated by letsencrypt.org, and the certificate and key files: `cert.pem` and `key.pem` - -First, create the secret: -``` -kubectl create secret tls ssl-certificate --cert cert.pem --key secret.pem -``` - -Then, deploy the devlake: -``` -helm install devlake . \ - --set "ingress.enabled=true,ingress.enableHttps=true,ingress.hostname=devlake-0.example.com" \ - --set "ingress.className=nginx,ingress.httpsPort=8443" \ - --set "ingress.tlsSecretName=ssl-certificate" -``` - -After deployed, visit devlake: https://devlake-0.example.com:8443, and grafana at https://devlake-0.example.com:8443/grafana - - -## Parameters - -Some useful parameters for the chart, you could also check them in values.yaml - -| Parameter | Description | Default | -|-----------|-------------|---------| -| replicaCount | Replica Count for devlake, currently not used | 1 | -| mysql.useExternal | If use external mysql server, currently not used | false | -| mysql.externalServer | External mysql server address | 127.0.0.1 | -| mysql.externalPort | External mysql server port | 3306 | -| mysql.username | username for mysql | merico | -| mysql.password | password for mysql | merico | -| mysql.database | database for mysql | lake | -| mysql.rootPassword | root password for mysql | admin | -| mysql.storage.class | storage class for mysql's volume | "" | -| mysql.storage.size | volume size for mysql's data | 5Gi | -| mysql.image.repository | repository for mysql's image | mysql | -| mysql.image.tag | image tag for mysql's image | 8.0.26 | -| mysql.image.pullPolicy | pullPolicy for mysql's image | IfNotPresent | -| grafana.image.repository | repository for grafana's image | mericodev/grafana | -| grafana.image.tag | image tag for grafana's image | latest | -| grafana.image.pullPolicy | pullPolicy for grafana's image | Always | -| lake.storage.class | storage class for lake's volume | "" | -| lake.storage.size | volume size for lake's data | 100Mi | -| lake.image.repository | repository for lake's image | mericodev/lake | -| lake.image.tag | image tag for lake's image | latest | -| lake.image.pullPolicy | pullPolicy for lake's image | Always | -| lake.loggingDir | the root logging directory of Devlake | /app/logs | -| ui.image.repository | repository for ui's image | mericodev/config-ui | -| ui.image.tag | image tag for ui's image | latest | -| ui.image.pullPolicy | pullPolicy for ui's image | Always | -| service.type | Service type for exposed service | NodePort | -| service.uiPort | Service port for config ui | 32001 | -| service.ingress.enabled | If enable ingress | false | -| service.ingress.enableHttps | If enable https | false | -| service.ingress.className | The class name for ingressClass. If leave empty, the default IngressClass will be used | "" | -| service.ingress.hostname | The hostname/domainname for ingress | localhost | -| service.ingress.prefix | The prefix for endpoints, currently not supported due to devlake's implementation | / | -| service.ingress.tlsSecretName | The secret name for tls's certificate, required when https enabled | "" | -| service.ingress.httpPort | The http port for ingress | 80 | -| service.ingress.httpsPort | The https port for ingress | 443 | -| option.localtime | The hostpath for mount as /etc/localtime | /etc/localtime | diff --git a/versioned_docs/version-v0.13/GettingStarted/KubernetesSetup.md b/versioned_docs/version-v0.13/GettingStarted/KubernetesSetup.md deleted file mode 100644 index 065c3b97e78..00000000000 --- a/versioned_docs/version-v0.13/GettingStarted/KubernetesSetup.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: "Install via Kubernetes" -description: > - The steps to install Apache DevLake via Kubernetes -sidebar_position: 3 ---- - -:::caution - -We highly recommend the [helm approach](./HelmSetup.md), this page is for Advanced Installation only - -::: - -We provide a sample [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) to help deploy DevLake to Kubernetes - -[k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) will create a namespace `devlake` on your k8s cluster, and use `nodePort 30004` for `config-ui`, `nodePort 30002` for `grafana` dashboards. If you would like to use a specific version of Apache DevLake, please update the image tag of `grafana`, `devlake` and `config-ui` deployments. - -## Step-by-step guide - -1. Download [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) -2. Customize the settings (`devlake-config` config map): - - Settings shared between `grafana` and `mysql` - * `MYSQL_ROOT_PASSWORD`: set root password for `mysql` - * `MYSQL_USER`: shared between `mysql` and `grafana` - * `MYSQL_PASSWORD`: shared between `mysql` and `grafana` - * `MYSQL_DATABASE`: shared between `mysql` and `grafana` - - Settings used by `grafana` - * `MYSQL_URL`: set MySQL URL for `grafana` in `$HOST:$PORT` format - * `GF_SERVER_ROOT_URL`: Public URL to the `grafana` - - Settings used by `config-ui`: - * `GRAFANA_ENDPOINT`: FQDN of grafana which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `DEVLAKE_ENDPOINT`: FQDN of devlake which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `ADMIN_USER`/`ADMIN_PASS`: Not required, but highly recommended - - Settings used by `devlake`: - * `DB_URL`: update this value if `MYSQL_USER`, `MYSQL_PASSWORD` or `MYSQL_DATABASE` were changed - * `LOGGING_DIR`: the directory of logs for Devlake - you likely don't need to change it. -3. The `devlake` deployment store its configuration in `/app/.env`. In our sample yaml, we use `hostPath` volume, so please make sure directory `/var/lib/devlake` exists on your k8s workers, or employ other techniques to persist `/app/.env` file. Please do NOT mount the entire `/app` directory, because plugins are located in `/app/bin` folder. -4. Finally, execute the following command and DevLake should be up and running: - ```sh - kubectl apply -f k8s-deploy.yaml - ``` - - -## FAQ - -1. Can I use a managed Cloud database service instead of running database in k8s? - - Yes, it only takes a few changes in the sample yaml file. Below we'll use MySQL on AWS RDS as an example. - 1. (Optional) Create a MySQL instance on AWS RDS following this [doc](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_GettingStarted.CreatingConnecting.MySQL.html), skip this step if you'd like to use an existing instance - 2. Remove the `mysql` deployment and service sections from `k8s-deploy.yaml` - 3. Update `devlake-config` configmap according to your RDS instance setup: - * `MYSQL_ROOT_PASSWORD`: remove this line - * `MYSQL_USER`: use your RDS instance's master username - * `MYSQL_PASSWORD`: use your RDS instance's password - * `MYSQL_DATABASE`: use your RDS instance's DB name, you may need to create a database first with `CREATE DATABASE ;` - * `MYSQL_URL`: set this for `grafana` in `$HOST:$PORT` format, where $HOST and $PORT should be your RDS instance's endpoint and port respectively - * `DB_URL`: update the connection string with your RDS instance's info for `devlake` diff --git a/versioned_docs/version-v0.13/GettingStarted/TemporalSetup.md b/versioned_docs/version-v0.13/GettingStarted/TemporalSetup.md deleted file mode 100644 index 58132999f81..00000000000 --- a/versioned_docs/version-v0.13/GettingStarted/TemporalSetup.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Install via Temporal" -sidebar_position: 6 -description: > - The steps to install DevLake in Temporal mode. ---- - - -Normally, DevLake would execute pipelines on a local machine (we call it `local mode`), it is sufficient most of the time. However, when you have too many pipelines that need to be executed in parallel, it can be problematic, as the horsepower and throughput of a single machine is limited. - -`temporal mode` was added to support distributed pipeline execution, you can fire up arbitrary workers on multiple machines to carry out those pipelines in parallel to overcome the limitations of a single machine. - -But, be careful, many API services like JIRA/GITHUB have a request rate limit mechanism. Collecting data in parallel against the same API service with the same identity would most likely hit such limit. - -## How it works - -1. DevLake Server and Workers connect to the same temporal server by setting up `TEMPORAL_URL` -2. DevLake Server sends a `pipeline` to the temporal server, and one of the Workers pick it up and execute it - - -**IMPORTANT: This feature is in early stage of development. Please use with caution** - - -## Temporal Demo - -### Requirements - -- [Docker](https://docs.docker.com/get-docker) -- [docker-compose](https://docs.docker.com/compose/install/) -- [temporalio](https://temporal.io/) - -### How to setup - -1. Clone and fire up the [temporalio](https://temporal.io/) services -2. Clone this repo, and fire up DevLake with command `docker-compose -f deployment/temporal/docker-compose-temporal.yml up -d` \ No newline at end of file diff --git a/versioned_docs/version-v0.13/GettingStarted/_category_.json b/versioned_docs/version-v0.13/GettingStarted/_category_.json deleted file mode 100644 index 063400ae119..00000000000 --- a/versioned_docs/version-v0.13/GettingStarted/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Getting Started", - "position": 2, - "link":{ - "type": "generated-index", - "slug": "GettingStarted" - } -} diff --git a/versioned_docs/version-v0.13/Glossary.md b/versioned_docs/version-v0.13/Glossary.md deleted file mode 100644 index be814870a3f..00000000000 --- a/versioned_docs/version-v0.13/Glossary.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -sidebar_position: 7 -title: "Glossary" -linkTitle: "Glossary" -description: > - DevLake Glossary ---- - -*Last updated: May 16 2022* - - -## In Configuration UI (Regular Mode) - -The following terms are arranged in the order of their appearance in the actual user workflow. - -### Blueprints -**A blueprint is the plan that covers all the work to get your raw data ready for query and metric computation in the dashboards.** Creating a blueprint consists of four steps: -1. **Adding [Data Connections](Glossary.md#data-connections)**: For each [data source](Glossary.md#data-sources), one or more data connections can be added to a single blueprint, depending on the data you want to sync to DevLake. -2. **Setting the [Data Scope](Glossary.md#data-scope)**: For each data connection, you need to configure the scope of data, such as GitHub projects, Jira boards, and their corresponding [data entities](Glossary.md#data-entities). -3. **Adding [Transformation Rules](Glossary.md#transformation-rules) (optional)**: You can optionally apply transformation for the data scope you have just selected, in order to view more advanced metrics. -3. **Setting the Sync Frequency**: You can specify the sync frequency for your blueprint to achieve recurring data syncs and transformation. Alternatively, you can set the frequency to manual if you wish to run the tasks in the blueprint manually. - -The relationship among Blueprint, Data Connections, Data Scope and Transformation Rules is explained as follows: - -![Blueprint ERD](/img/Glossary/blueprint-erd.svg) -- Each blueprint can have multiple data connections. -- Each data connection can have multiple sets of data scope. -- Each set of data scope only consists of one GitHub/GitLab project or Jira board, along with their corresponding data entities. -- Each set of data scope can only have one set of transformation rules. - -### Data Sources -**A data source is a specific DevOps tool from which you wish to sync your data, such as GitHub, GitLab, Jira and Jenkins.** - -DevLake normally uses one [data plugin](Glossary.md#data-plugins) to pull data for a single data source. However, in some cases, DevLake uses multiple data plugins for one data source for the purpose of improved sync speed, among many other advantages. For instance, when you pull data from GitHub or GitLab, aside from the GitHub or GitLab plugin, Git Extractor is also used to pull data from the repositories. In this case, DevLake still refers GitHub or GitLab as a single data source. - -### Data Connections -**A data connection is a specific instance of a data source that stores information such as `endpoint` and `auth`.** A single data source can have one or more data connections (e.g. two Jira instances). Currently, DevLake supports one data connection for GitHub, GitLab and Jenkins, and multiple connections for Jira. - -You can set up a new data connection either during the first step of creating a blueprint, or in the Connections page that can be accessed from the navigation bar. Because one single data connection can be reused in multiple blueprints, you can update the information of a particular data connection in Connections, to ensure all its associated blueprints will run properly. For example, you may want to update your GitHub token in a data connection if it goes expired. - -### Data Scope -**In a blueprint, each data connection can have multiple sets of data scope configurations, including GitHub or GitLab projects, Jira boards and their corresponding [data entities](Glossary.md#data-entities).** The fields for data scope configuration vary according to different data sources. - -Each set of data scope refers to one GitHub or GitLab project, or one Jira board and the data entities you would like to sync for them, for the convenience of applying transformation in the next step. For instance, if you wish to sync 5 GitHub projects, you will have 5 sets of data scope for GitHub. - -To learn more about the default data scope of all data sources and data plugins, please refer to [Supported Data Sources](./SupportedDataSources.md). - -### Data Entities -**Data entities refer to the data fields from one of the five data domains: Issue Tracking, Source Code Management, Code Review, CI/CD and Cross-Domain.** - -For instance, if you wish to pull Source Code Management data from GitHub and Issue Tracking data from Jira, you can check the corresponding data entities during setting the data scope of these two data connections. - -To learn more details, please refer to [Domain Layer Schema](./DataModels/DevLakeDomainLayerSchema.md). - -### Transformation Rules -**Transformation rules are a collection of methods that allow you to customize how DevLake normalizes raw data for query and metric computation.** Each set of data scope is strictly accompanied with one set of transformation rules. However, for your convenience, transformation rules can also be duplicated across different sets of data scope. - -DevLake uses these normalized values in the transformation to design more advanced dashboards, such as the Weekly Bug Retro dashboard. Although configuring transformation rules is not mandatory, if you leave the rules blank or have not configured correctly, only the basic dashboards (e.g. GitHub Basic Metrics) will be displayed as expected, while the advanced dashboards will not. - -### Historical Runs -**A historical run of a blueprint is an actual execution of the data collection and transformation [tasks](Glossary.md#tasks) defined in the blueprint at its creation.** A list of historical runs of a blueprint is the entire running history of that blueprint, whether executed automatically or manually. Historical runs can be triggered in three ways: -- By the blueprint automatically according to its schedule in the Regular Mode of the Configuration UI -- By running the JSON in the Advanced Mode of the Configuration UI -- By calling the API `/pipelines` endpoint manually - -However, the name Historical Runs is only used in the Configuration UI. In DevLake API, they are called [pipelines](Glossary.md#pipelines). - -## In Configuration UI (Advanced Mode) and API - -The following terms have not appeared in the Regular Mode of Configuration UI for simplification, but can be very useful if you want to learn about the underlying framework of DevLake or use Advanced Mode and the DevLake API. - -### Data Plugins -**A data plugin is a specific module that syncs or transforms data.** There are two types of data plugins: Data Collection Plugins and Data Transformation Plugins. - -Data Collection Plugins pull data from one or more data sources. DevLake supports 8 data plugins in this category: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira` and `tapd`. - -Data Transformation Plugins transform the data pulled by other Data Collection Plugins. `refdiff` is currently the only plugin in this category. - -Although the names of the data plugins are not displayed in the regular mode of DevLake Configuration UI, they can be used directly in JSON in the Advanced Mode. - -For detailed information about the relationship between data sources and data plugins, please refer to [Supported Data Sources](./SupportedDataSources.md). - - -### Pipelines -**A pipeline is an orchestration of [tasks](Glossary.md#tasks) of data `collection`, `extraction`, `conversion` and `enrichment`, defined in the DevLake API.** A pipeline is composed of one or multiple [stages](Glossary.md#stages) that are executed in a sequential order. Any error occurring during the execution of any stage, task or subtask will cause the immediate fail of the pipeline. - -The composition of a pipeline is explained as follows: -![Blueprint ERD](/img/Glossary/pipeline-erd.svg) -Notice: **You can manually orchestrate the pipeline in Configuration UI Advanced Mode and the DevLake API; whereas in Configuration UI regular mode, an optimized pipeline orchestration will be automatically generated for you.** - - -### Stages -**A stages is a collection of tasks performed by data plugins.** Stages are executed in a sequential order in a pipeline. - -### Tasks -**A task is a collection of [subtasks](Glossary.md#subtasks) that perform any of the `collection`, `extraction`, `conversion` and `enrichment` jobs of a particular data plugin.** Tasks are executed in a parallel order in any stages. - -### Subtasks -**A subtask is the minimal work unit in a pipeline that performs in any of the four roles: `Collectors`, `Extractors`, `Converters` and `Enrichers`.** Subtasks are executed in sequential orders. -- `Collectors`: Collect raw data from data sources, normally via DevLake API and stored into `raw data table` -- `Extractors`: Extract data from `raw data table` to `domain layer tables` -- `Converters`: Convert data from `tool layer tables` into `domain layer tables` -- `Enrichers`: Enrich data from one domain to other domains. For instance, the Fourier Transformation can examine `issue_changelog` to show time distribution of an issue on every assignee. diff --git a/versioned_docs/version-v0.13/Metrics/AddedLinesOfCode.md b/versioned_docs/version-v0.13/Metrics/AddedLinesOfCode.md deleted file mode 100644 index 2921ea65bea..00000000000 --- a/versioned_docs/version-v0.13/Metrics/AddedLinesOfCode.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Added Lines of Code" -description: > - Added Lines of Code -sidebar_position: 7 ---- - -## What is this metric? -The accumulated number of added lines of code. - -## Why is it important? -1. identify potential bottlenecks that may affect the output -2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits - -## Which dashboard(s) does it exist in -N/A - -## How is it calculated? -This metric is calculated by summing the additions of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison. diff --git a/versioned_docs/version-v0.13/Metrics/BugAge.md b/versioned_docs/version-v0.13/Metrics/BugAge.md deleted file mode 100644 index 66cdcbad547..00000000000 --- a/versioned_docs/version-v0.13/Metrics/BugAge.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Bug Age" -description: > - Bug Age -sidebar_position: 9 ---- - -## What is this metric? -The amount of time it takes a bug to fix. - -## Why is it important? -1. Help the team to establish an effective hierarchical response mechanism for bugs. Focus on the resolution of important problems in the backlog. -2. Improve team's and individual's bug fixing efficiency. Identify good/to-be-improved practices that affect bug age age - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- Weekly Bug Retro - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "BUG". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-bug' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Bugs`. - - -## How to improve? -1. Observe the trend of bug age and locate the key reasons. -2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of bug age. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/BugCountPer1kLinesOfCode.md b/versioned_docs/version-v0.13/Metrics/BugCountPer1kLinesOfCode.md deleted file mode 100644 index 0c252e530d9..00000000000 --- a/versioned_docs/version-v0.13/Metrics/BugCountPer1kLinesOfCode.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "Bug Count per 1k Lines of Code" -description: > - Bug Count per 1k Lines of Code -sidebar_position: 12 ---- - -## What is this metric? -Amount of bugs per 1,000 lines of code. - -## Why is it important? -1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process -2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts -3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -The number of bugs divided by total accumulated lines of code (additions + deletions) in the given data range. - -Data Sources Required - -This metric relies on -- issues collected from Jira, GitHub or TAPD. -- commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -This metric relies on -- "Issue type mapping" in Jira, GitHub or TAPD's transformation rules page to let DevLake know what type(s) of issues can be regarded as bugs. -- "PR-Issue Mapping" in GitHub, GitLab's transformation rules page to let DevLake know the bugs are fixed by which PR/MRs. - - -## How to improve? -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc. -2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality -3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points -4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values diff --git a/versioned_docs/version-v0.13/Metrics/BuildCount.md b/versioned_docs/version-v0.13/Metrics/BuildCount.md deleted file mode 100644 index 50352bbc1c1..00000000000 --- a/versioned_docs/version-v0.13/Metrics/BuildCount.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Count" -description: > - Build Count -sidebar_position: 15 ---- - -## What is this metric? -The number of successful builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -This metric is calculated by counting the number of successful CI builds/pipelines/runs in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.13/Metrics/BuildDuration.md b/versioned_docs/version-v0.13/Metrics/BuildDuration.md deleted file mode 100644 index 1aa95385fd0..00000000000 --- a/versioned_docs/version-v0.13/Metrics/BuildDuration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Duration" -description: > - Build Duration -sidebar_position: 16 ---- - -## What is this metric? -The duration of successful builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -This metric is calculated by getting the duration of successful CI builds/pipelines/runs in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.13/Metrics/BuildSuccessRate.md b/versioned_docs/version-v0.13/Metrics/BuildSuccessRate.md deleted file mode 100644 index 401086d9632..00000000000 --- a/versioned_docs/version-v0.13/Metrics/BuildSuccessRate.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Success Rate" -description: > - Build Success Rate -sidebar_position: 17 ---- - -## What is this metric? -The ratio of successful builds to all builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -The number of successful builds divided by the total number of builds in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.13/Metrics/CFR.md b/versioned_docs/version-v0.13/Metrics/CFR.md deleted file mode 100644 index c09782e04ba..00000000000 --- a/versioned_docs/version-v0.13/Metrics/CFR.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: "DORA - Change Failure Rate(WIP)" -description: > - DORA - Change Failure Rate -sidebar_position: 21 ---- - -## What is this metric? -The percentage of changes that were made to a code that then resulted in incidents, rollbacks, or any type of production failure. - -## Why is it important? -Unlike Deployment Frequency and Lead Time for Changes that measure the throughput, Change Failure Rate measures the stability and quality of software delivery. A low CFR reflects a bad end-user experience as the production failure is relatively high. - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -The number of failures per the number of deployments. For example, if there are five deployments in a day and one causes a failure, that is a 20% change failure rate. - -As you can see, there is not much distinction between performance benchmarks for CFR: - -| Groups | Benchmarks | -| -----------------| ----------------| -| Elite performers | 0%-15% | -| High performers | 16%-30% | -| Medium performers| 16%-30% | -| Low performers | 16%-30% | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on: -- `Deployments` collected in one of the following ways: - - Open APIs of Jenkins, GitLab, GitHub, etc. - - Webhook for general CI tools. - - Releases and PR/MRs from GitHub, GitLab APIs, etc. -- `Incidents` collected in one of the following ways: - - Issue tracking tools such as Jira, TAPD, GitHub, etc. - - Incident or Service Monitoring tools such as PagerDuty, ServiceNow, etc. - -Transformation Rules Required - -This metric relies on: -- Deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as `Deployments`. -- Incident configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - -## How to improve? -- Add unit tests for all new feature -- "Shift left", start QA early and introduce more automated tests -- Enforce code review if it's not strictly executed diff --git a/versioned_docs/version-v0.13/Metrics/CodingTime.md b/versioned_docs/version-v0.13/Metrics/CodingTime.md deleted file mode 100644 index d788474810c..00000000000 --- a/versioned_docs/version-v0.13/Metrics/CodingTime.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "PR Coding Time" -description: > - PR Coding Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from the first commit until a PR is issued. - -## Why is it important? -It is recommended that you keep every task on a workable and manageable scale for a reasonably short amount of coding time. The average coding time of most engineering teams is around 3-4 days. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -Divide coding tasks into workable and manageable pieces. diff --git a/versioned_docs/version-v0.13/Metrics/CommitAuthorCount.md b/versioned_docs/version-v0.13/Metrics/CommitAuthorCount.md deleted file mode 100644 index 3be4ad20633..00000000000 --- a/versioned_docs/version-v0.13/Metrics/CommitAuthorCount.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Commit Author Count" -description: > - Commit Author Count -sidebar_position: 14 ---- - -## What is this metric? -The number of commit authors who have committed code. - -## Why is it important? -Take inventory of project/team R&D resource inputs, assess input-output ratio, and rationalize resource deployment. - - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -This metric is calculated by counting the number of commit authors in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -As a secondary indicator, this helps assess the labor cost of participating in coding. diff --git a/versioned_docs/version-v0.13/Metrics/CommitCount.md b/versioned_docs/version-v0.13/Metrics/CommitCount.md deleted file mode 100644 index ae85af8d2cd..00000000000 --- a/versioned_docs/version-v0.13/Metrics/CommitCount.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Commit Count" -description: > - Commit Count -sidebar_position: 6 ---- - -## What is this metric? -The number of commits created. - -## Why is it important? -1. Identify potential bottlenecks that may affect output -2. Encourage R&D practices of small step submissions and develop excellent coding habits - -## Which dashboard(s) does it exist in -- GitHub Release Quality and Contribution Analysis -- Demo-Is this month more productive than last? -- Demo-Commit Count by Author - -## How is it calculated? -This metric is calculated by counting the number of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -SQL Queries - -If you want to see the monthly trend, run the following SQL -``` - with _commits as( - SELECT - DATE_ADD(date(authored_date), INTERVAL -DAY(date(authored_date))+1 DAY) as time, - count(*) as commit_count - FROM commits - WHERE - message not like '%Merge%' - and $__timeFilter(authored_date) - group by 1 - ) - - SELECT - date_format(time,'%M %Y') as month, - commit_count as "Commit Count" - FROM _commits - ORDER BY time -``` - -## How to improve? -1. Identify the main reasons for the unusual number of commits and the possible impact on the number of commits through comparison -2. Evaluate whether the number of commits is reasonable in conjunction with more microscopic workload metrics (e.g. lines of code/code equivalents) diff --git a/versioned_docs/version-v0.13/Metrics/CycleTime.md b/versioned_docs/version-v0.13/Metrics/CycleTime.md deleted file mode 100644 index bbc98349ab8..00000000000 --- a/versioned_docs/version-v0.13/Metrics/CycleTime.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "PR Cycle Time" -description: > - PR Cycle Time -sidebar_position: 2 ---- - -## What is this metric? -PR Cycle Time is the sum of PR Coding Time, Pickup TIme, Review Time and Deploy Time. It is the total time from the first commit to when the PR is deployed. - -## Why is it important? -PR Cycle Time indicate the overall speed of the delivery progress in terms of PR. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -You can define `deployment` based on your actual practice. For a full list of `deployment`'s definitions that DevLake support, please refer to [Deployment Frequency](/docs/Metrics/DeploymentFrequency.md). - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Divide coding tasks into workable and manageable pieces; -2. Use DevLake's dashboards to monitor your delivery progress; -3. Have a habit to check for hanging PRs regularly; -4. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued; -2. Use automated tests for the initial work; -5. Reduce PR size; -6. Analyze the causes for long reviews. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/DeletedLinesOfCode.md b/versioned_docs/version-v0.13/Metrics/DeletedLinesOfCode.md deleted file mode 100644 index 218ceae0c54..00000000000 --- a/versioned_docs/version-v0.13/Metrics/DeletedLinesOfCode.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Deleted Lines of Code" -description: > - Deleted Lines of Code -sidebar_position: 8 ---- - -## What is this metric? -The accumulated number of deleted lines of code. - -## Why is it important? -1. identify potential bottlenecks that may affect the output -2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits - -## Which dashboard(s) does it exist in -N/A - -## How is it calculated? -This metric is calculated by summing the deletions of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison. diff --git a/versioned_docs/version-v0.13/Metrics/DeployTime.md b/versioned_docs/version-v0.13/Metrics/DeployTime.md deleted file mode 100644 index d908480829f..00000000000 --- a/versioned_docs/version-v0.13/Metrics/DeployTime.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: "PR Deploy Time" -description: > - PR Deploy Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is merged to when it is deployed. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in? - - -## How is it calculated? -You can define `deployment` based on your actual practice. For a full list of `deployment`'s definitions that DevLake support, please refer to [Deployment Frequency](/docs/Metrics/DeploymentFrequency.md). - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -## How to improve? - diff --git a/versioned_docs/version-v0.13/Metrics/DeploymentFrequency.md b/versioned_docs/version-v0.13/Metrics/DeploymentFrequency.md deleted file mode 100644 index 6b318535c51..00000000000 --- a/versioned_docs/version-v0.13/Metrics/DeploymentFrequency.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "DORA - Deployment Frequency(WIP)" -description: > - DORA - Deployment Frequency -sidebar_position: 18 ---- - -## What is this metric? -How often an organization deploys code to production or release it to end users. - -## Why is it important? -Deployment frequency reflects the efficiency of a team's deployment. A team that deploys more frequently can deliver the product faster and users' feature requirements can be met faster. - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -Deployment frequency is calculated based on the number of deployment days, not the number of deployments, e.g.,daily, weekly, monthly, yearly. - -| Groups | Benchmarks | -| -----------------| -------------------------------------| -| Elite performers | Multiple times a day | -| High performers | Once a week to once a month | -| Medium performers| Once a month to once every six months| -| Low performers | Less than once every six months | - -

Source: 2021 Accelerate State of DevOps, Google

- - -Data Sources Required - -This metric relies on deployments collected in multiple ways: -- Open APIs of Jenkins, GitLab, GitHub, etc. -- Webhook for general CI tools. -- Releases and PR/MRs from GitHub, GitLab APIs, etc. - -Transformation Rules Required - -This metric relies on the deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as deployments. - -## How to improve? -- Trunk development. Work in small batches and often merge their work into shared trunks. -- Integrate CI/CD tools for automated deployment -- Improve automated test coverage diff --git a/versioned_docs/version-v0.13/Metrics/IncidentAge.md b/versioned_docs/version-v0.13/Metrics/IncidentAge.md deleted file mode 100644 index 4cd5e60cbb5..00000000000 --- a/versioned_docs/version-v0.13/Metrics/IncidentAge.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Incident Age" -description: > - Incident Age -sidebar_position: 10 ---- - -## What is this metric? -The amount of time it takes a incident to fix. - -## Why is it important? -1. Help the team to establish an effective hierarchical response mechanism for incidents. Focus on the resolution of important problems in the backlog. -2. Improve team's and individual's incident fixing efficiency. Identify good/to-be-improved practices that affect incident age - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "INCIDENT". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-incident' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - - -## How to improve? -1. Observe the trend of incident age and locate the key reasons. -2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of incident age. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/IncidentCountPer1kLinesOfCode.md b/versioned_docs/version-v0.13/Metrics/IncidentCountPer1kLinesOfCode.md deleted file mode 100644 index 9ad92787780..00000000000 --- a/versioned_docs/version-v0.13/Metrics/IncidentCountPer1kLinesOfCode.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Incident Count per 1k Lines of Code" -description: > - Incident Count per 1k Lines of Code -sidebar_position: 13 ---- - -## What is this metric? -Amount of incidents per 1,000 lines of code. - -## Why is it important? -1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process -2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts -3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -The number of incidents divided by total accumulated lines of code (additions + deletions) in the given data range. - -Data Sources Required - -This metric relies on -- issues collected from Jira, GitHub or TAPD. -- commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -This metric relies on -- "Issue type mapping" in Jira, GitHub or TAPD's transformation rules page to let DevLake know what type(s) of issues can be regarded as incidents. -- "PR-Issue Mapping" in GitHub, GitLab's transformation rules page to let DevLake know the bugs are fixed by which PR/MRs. - -## How to improve? -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc. -2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality -3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points -4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values diff --git a/versioned_docs/version-v0.13/Metrics/LeadTimeForChanges.md b/versioned_docs/version-v0.13/Metrics/LeadTimeForChanges.md deleted file mode 100644 index b964f2009e0..00000000000 --- a/versioned_docs/version-v0.13/Metrics/LeadTimeForChanges.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: "DORA - Lead Time for Changes(WIP)" -description: > - DORA - Lead Time for Changes -sidebar_position: 19 ---- - -## What is this metric? -The median amount of time for a commit to be deployed into production. - -## Why is it important? -This metric measures the time it takes to commit code to the production environment and reflects the speed of software delivery. A lower average change preparation time means that your team is efficient at coding and deploying your project. - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -This metric can be calculated in two ways: -- If a deployment can be linked to PRs, then the lead time for changes of a deployment is the average cycle time of its associated PRs. For instance, - - Compared to the previous deployment `deploy-1`, `deploy-2` deployed three new commits `commit-1`, `commit-2` and `commit-3`. - - `commit-1` is linked to `pr-1`, `commit-2` is linked to `pr-2` and `pr-3`, `commit-3` is not linked to any PR. Then, `deploy-2` is associated with `pr-1`, `pr-2` and `pr-3`. - - `Deploy-2`'s lead time for changes = average cycle time of `pr-1`, `pr-2` and `pr-3`. -- If a deployment can't be linked to PRs, then the lead time for changes is computed based on its associated commits. For instance, - - Compared to the previous deployment `deploy-1`, `deploy-2` deployed three new commits `commit-1`, `commit-2` and `commit-3`. - - None of `commit-1`, `commit-2` and `commit-3` is linked to any PR. - - Calculate each commit's lead time for changes, which equals to `deploy-2`'s deployed_at - commit's authored_date - - `Deploy-2`'s Lead time for changes = average lead time for changes of `commit-1`, `commit-2` and `commit-3`. - -Below are the benchmarks for different development teams: - -| Groups | Benchmarks | -| -----------------| -------------------------------------| -| Elite performers | Less than one hour | -| High performers | Between one day and one week | -| Medium performers| Between one month and six months | -| Low performers | More than six months | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on deployments collected in multiple ways: -- Open APIs of Jenkins, GitLab, GitHub, etc. -- Webhook for general CI tools. -- Releases and PR/MRs from GitHub, GitLab APIs, etc. - -Transformation Rules Required - -This metric relies on the deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as deployments. - -## How to improve? -- Break requirements into smaller, more manageable deliverables -- Optimize the code review process -- "Shift left", start QA early and introduce more automated tests -- Integrate CI/CD tools to automate the deployment process diff --git a/versioned_docs/version-v0.13/Metrics/MTTR.md b/versioned_docs/version-v0.13/Metrics/MTTR.md deleted file mode 100644 index f76be2490f8..00000000000 --- a/versioned_docs/version-v0.13/Metrics/MTTR.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "DORA - Mean Time to Restore Service" -description: > - DORA - Mean Time to Restore Service -sidebar_position: 20 ---- - -## What is this metric? -The time to restore service after service incidents, rollbacks, or any type of production failure happened. - -## Why is it important? -This metric is essential to measure the disaster control capability of your team and the robustness of the software. - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -MTTR = Total [incident age](./IncidentAge.md) (in hours)/number of incidents. - -If you have three incidents that happened in the given data range, one lasting 1 hour, one lasting 2 hours and one lasting 3 hours. Your MTTR will be: (1 + 2 + 3) / 3 = 2 hours. - -Below are the benchmarks for different development teams: - -| Groups | Benchmarks | -| -----------------| -------------------------------------| -| Elite performers | Less than one hour | -| High performers | Less one day | -| Medium performers| Between one day and one week | -| Low performers | More than six months | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on: -- `Deployments` collected in one of the following ways: - - Open APIs of Jenkins, GitLab, GitHub, etc. - - Webhook for general CI tools. - - Releases and PR/MRs from GitHub, GitLab APIs, etc. -- `Incidents` collected in one of the following ways: - - Issue tracking tools such as Jira, TAPD, GitHub, etc. - - Incident or Service Monitoring tools such as PagerDuty, ServiceNow, etc. - -Transformation Rules Required - -This metric relies on: -- Deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as `Deployments`. -- Incident configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - -## How to improve? -- Use automated tools to quickly report failure -- Prioritize recovery when a failure happens -- Establish a go-to action plan to respond to failures immediately -- Reduce the deployment time for failure-fixing diff --git a/versioned_docs/version-v0.13/Metrics/MergeRate.md b/versioned_docs/version-v0.13/Metrics/MergeRate.md deleted file mode 100644 index c8c274338c9..00000000000 --- a/versioned_docs/version-v0.13/Metrics/MergeRate.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "PR Merge Rate" -description: > - Pull Request Merge Rate -sidebar_position: 12 ---- - -## What is this metric? -The ratio of PRs/MRs that get merged. - -## Why is it important? -1. Code review metrics are process indicators to provide quick feedback on developers' code quality -2. Promote the team to establish a unified coding specification and standardize the code review criteria -3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- GitLab -- Weekly Community Retro -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -The number of merged PRs divided by the number of all PRs in the given data range. - -Data Sources Required - -This metric relies on PRs/MRs collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds. -2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds. -3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks. diff --git a/versioned_docs/version-v0.13/Metrics/PRCount.md b/versioned_docs/version-v0.13/Metrics/PRCount.md deleted file mode 100644 index 4521e78617a..00000000000 --- a/versioned_docs/version-v0.13/Metrics/PRCount.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Pull Request Count" -description: > - Pull Request Count -sidebar_position: 11 ---- - -## What is this metric? -The number of pull requests created. - -## Why is it important? -1. Code review metrics are process indicators to provide quick feedback on developers' code quality -2. Promote the team to establish a unified coding specification and standardize the code review criteria -3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- GitLab -- Weekly Community Retro -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is calculated by counting the number of PRs in the given data range. - -Data Sources Required - -This metric relies on PRs/MRs collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds. -2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds. -3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks. diff --git a/versioned_docs/version-v0.13/Metrics/PRSize.md b/versioned_docs/version-v0.13/Metrics/PRSize.md deleted file mode 100644 index bf6a87d82d9..00000000000 --- a/versioned_docs/version-v0.13/Metrics/PRSize.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "PR Size" -description: > - PR Size -sidebar_position: 2 ---- - -## What is this metric? -The average code changes (in Lines of Code) of PRs in the selected time range. - -## Why is it important? -Small PRs can reduce risks of introducing new bugs and increase code review quality, as problems may often be hidden in big chuncks of code and difficult to identify. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is calculated by counting the total number of code changes (in LOC) divided by the total number of PRs in the selected time range. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Divide coding tasks into workable and manageable pieces; -1. Encourage developers to submit small PRs and only keep related changes in the same PR. diff --git a/versioned_docs/version-v0.13/Metrics/PickupTime.md b/versioned_docs/version-v0.13/Metrics/PickupTime.md deleted file mode 100644 index 07242ae772b..00000000000 --- a/versioned_docs/version-v0.13/Metrics/PickupTime.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "PR Pickup Time" -description: > - PR Pickup Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is issued until the first comment is added to that PR. - -## Why is it important? -PR Pickup Time shows how engaged your team is in collaborative work by identifying the delay in picking up PRs. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboard to monitor your delivery progress; -2. Have a habit to check for hanging PRs regularly; -3. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued. diff --git a/versioned_docs/version-v0.13/Metrics/RequirementCount.md b/versioned_docs/version-v0.13/Metrics/RequirementCount.md deleted file mode 100644 index e9a6bd32981..00000000000 --- a/versioned_docs/version-v0.13/Metrics/RequirementCount.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Requirement Count" -description: > - Requirement Count -sidebar_position: 2 ---- - -## What is this metric? -The number of delivered requirements or features. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -This metric is calculated by counting the number of delivered issues in type "REQUIREMENT" in the given data range. - -Data Sources Required - -This metric relies on the issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - -SQL Queries - -If you want to see a single count, run the following SQL in Grafana -``` - select - count(*) as "Requirement Count" - from issues i - join board_issues bi on i.id = bi.issue_id - where - i.type = 'REQUIREMENT' - and i.status = 'DONE' - -- this is the default variable in Grafana - and $__timeFilter(i.created_date) - and bi.board_id in ($board_id) -``` - -If you want to see the monthly trend, run the following SQL -``` - SELECT - DATE_ADD(date(i.created_date), INTERVAL -DAYOFMONTH(date(i.created_date))+1 DAY) as time, - count(distinct case when status != 'DONE' then i.id else null end) as "Number of Open Issues", - count(distinct case when status = 'DONE' then i.id else null end) as "Number of Delivered Issues" - FROM issues i - join board_issues bi on i.id = bi.issue_id - join boards b on bi.board_id = b.id - WHERE - i.type = 'REQUIREMENT' - and i.status = 'DONE' - and $__timeFilter(i.created_date) - and bi.board_id in ($board_id) - GROUP by 1 -``` - -## How to improve? -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog. diff --git a/versioned_docs/version-v0.13/Metrics/RequirementDeliveryRate.md b/versioned_docs/version-v0.13/Metrics/RequirementDeliveryRate.md deleted file mode 100644 index eb0a03133d5..00000000000 --- a/versioned_docs/version-v0.13/Metrics/RequirementDeliveryRate.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "Requirement Delivery Rate" -description: > - Requirement Delivery Rate -sidebar_position: 3 ---- - -## What is this metric? -The ratio of delivered requirements to all requirements. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -The number of delivered requirements divided by the total number of requirements in the given data range. - -Data Sources Required - -This metric relies on the issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog. diff --git a/versioned_docs/version-v0.13/Metrics/RequirementGranularity.md b/versioned_docs/version-v0.13/Metrics/RequirementGranularity.md deleted file mode 100644 index 03bb91767f5..00000000000 --- a/versioned_docs/version-v0.13/Metrics/RequirementGranularity.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Requirement Granularity" -description: > - Requirement Granularity -sidebar_position: 5 ---- - -## What is this metric? -The average number of story points per requirement. - -## Why is it important? -1. Promote product teams to split requirements carefully, improve requirements quality, help developers understand requirements clearly, deliver efficiently and with high quality, and improve the project management capability of the team. -2. Establish a data-supported workload estimation model to help R&D teams calibrate their estimation methods and more accurately assess the granularity of requirements, which is useful to achieve better issue planning in project management. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -The average story points of issues in type "REQUIREMENT" in the given data range. - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the story points/requirement lead time of requirements to evaluate whether the ticket size, ie. requirement complexity is optimal. -2. Compare the estimated requirement granularity with the actual situation and evaluate whether the difference is reasonable by combining more microscopic workload metrics (e.g. lines of code/code equivalents) diff --git a/versioned_docs/version-v0.13/Metrics/RequirementLeadTime.md b/versioned_docs/version-v0.13/Metrics/RequirementLeadTime.md deleted file mode 100644 index 74061d63dec..00000000000 --- a/versioned_docs/version-v0.13/Metrics/RequirementLeadTime.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "Requirement Lead Time" -description: > - Requirement Lead Time -sidebar_position: 4 ---- - -## What is this metric? -The amount of time it takes a requirement to deliver. - -## Why is it important? -1. Analyze key projects and critical points, identify good/to-be-improved practices that affect requirement lead time, and reduce the risk of delays -2. Focus on the end-to-end velocity of value delivery process; coordinate different parts of R&D to avoid efficiency shafts; make targeted improvements to bottlenecks. - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- Community Experience - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "REQUIREMENT". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the trend of requirement lead time to observe if it has improved over time. -2. Analyze and compare the requirement lead time of each project/team to identify key projects with abnormal lead time. -3. Drill down to analyze a requirement's staying time in different phases of SDLC. Analyze the bottleneck of delivery velocity and improve the workflow. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/ReviewDepth.md b/versioned_docs/version-v0.13/Metrics/ReviewDepth.md deleted file mode 100644 index 59bcfbe876c..00000000000 --- a/versioned_docs/version-v0.13/Metrics/ReviewDepth.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "PR Review Depth" -description: > - PR Review Depth -sidebar_position: 2 ---- - -## What is this metric? -The average number of comments of PRs in the selected time range. - -## Why is it important? -PR Review Depth (in Comments per RR) is related to the quality of code review, indicating how thorough your team reviews PRs. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - -## How is it calculated? -This metric is calculated by counting the total number of PR comments divided by the total number of PRs in the selected time range. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Encourage multiple reviewers to review a PR; -2. Review Depth is an indicator for generally how thorough your PRs are reviewed, but it does not mean the deeper the better. In some cases, spending an excessive amount of resources on reviewing PRs is also not recommended. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/ReviewTime.md b/versioned_docs/version-v0.13/Metrics/ReviewTime.md deleted file mode 100644 index 8cfe080b0cc..00000000000 --- a/versioned_docs/version-v0.13/Metrics/ReviewTime.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "PR Review Time" -description: > - PR Review Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes to complete a code review of a PR before it gets merged. - -## Why is it important? -Code review should be conducted almost in real-time and usually take less than two days. Abnormally long PR Review Time may indicate one or more of the following problems: -1. The PR size is too large that makes it difficult to review. -2. The team is too busy to review code. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is the time frame between when the first comment is added to a PR, to when the PR is merged. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboards to monitor your delivery progress; -2. Use automated tests for the initial work; -3. Reduce PR size; -4. Analyze the causes for long reviews. \ No newline at end of file diff --git a/versioned_docs/version-v0.13/Metrics/TimeToMerge.md b/versioned_docs/version-v0.13/Metrics/TimeToMerge.md deleted file mode 100644 index 04a39225fe0..00000000000 --- a/versioned_docs/version-v0.13/Metrics/TimeToMerge.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "PR Time To Merge" -description: > - PR Time To Merge -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is issued to when it is merged. Essentially, PR Time to Merge = PR Pickup Time + PR Review Time. - -## Why is it important? -The delay of reviewing and waiting to review PRs has large impact on delivery speed, while reasonably short PR Time to Merge can indicate frictionless teamwork. Improving on this metric is the key to reduce PR cycle time. - -## Which dashboard(s) does it exist in? -- GitHub Basic Metrics -- Bi-weekly Community Retro - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboards to monitor your delivery progress; -2. Have a habit to check for hanging PRs regularly; -3. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued; -4. Reduce PR size; -5. Analyze the causes for long reviews. diff --git a/versioned_docs/version-v0.13/Metrics/_category_.json b/versioned_docs/version-v0.13/Metrics/_category_.json deleted file mode 100644 index e944147d528..00000000000 --- a/versioned_docs/version-v0.13/Metrics/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Metrics", - "position": 5, - "link":{ - "type": "generated-index", - "slug": "Metrics" - } -} diff --git a/versioned_docs/version-v0.13/Overview/Architecture.md b/versioned_docs/version-v0.13/Overview/Architecture.md deleted file mode 100755 index d4c6a9c5340..00000000000 --- a/versioned_docs/version-v0.13/Overview/Architecture.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Architecture" -description: > - Understand the architecture of Apache DevLake -sidebar_position: 2 ---- - -## Architecture Overview - -

-

DevLake Components

- -A DevLake installation typically consists of the following components: - -- Config UI: A handy user interface to create, trigger, and debug Blueprints. A Blueprint specifies the where (data connection), what (data scope), how (transformation rule), and when (sync frequency) of a data pipeline. -- API Server: The main programmatic interface of DevLake. -- Runner: The runner does all the heavy-lifting for executing tasks. In the default DevLake installation, it runs within the API Server, but DevLake provides a temporal-based runner (beta) for production environments. -- Database: The database stores both DevLake's metadata and user data collected by data pipelines. DevLake supports MySQL and PostgreSQL as of v0.11. -- Plugins: Plugins enable DevLake to collect and analyze dev data from any DevOps tools with an accessible API. DevLake community is actively adding plugins for popular DevOps tools, but if your preferred tool is not covered yet, feel free to open a GitHub issue to let us know or check out our doc on how to build a new plugin by yourself. -- Dashboards: Dashboards deliver data and insights to DevLake users. A dashboard is simply a collection of SQL queries along with corresponding visualization configurations. DevLake's official dashboard tool is Grafana and pre-built dashboards are shipped in Grafana's JSON format. Users are welcome to swap for their own choice of dashboard/BI tool if desired. - -## Dataflow - -

-

DevLake Dataflow

- -A typical plugin's dataflow is illustrated below: - -1. The Raw layer stores the API responses from data sources (DevOps tools) in JSON. This saves developers' time if the raw data is to be transformed differently later on. Please note that communicating with data sources' APIs is usually the most time-consuming step. -2. The Tool layer extracts raw data from JSONs into a relational schema that's easier to consume by analytical tasks. Each DevOps tool would have a schema that's tailored to their data structure, hence the name, the Tool layer. -3. The Domain layer attempts to build a layer of abstraction on top of the Tool layer so that analytics logics can be re-used across different tools. For example, GitHub's Pull Request (PR) and GitLab's Merge Request (MR) are similar entities. They each have their own table name and schema in the Tool layer, but they're consolidated into a single entity in the Domain layer, so that developers only need to implement metrics like Cycle Time and Code Review Rounds once against the domain layer schema. - -## Principles - -1. Extensible: DevLake's plugin system allows users to integrate with any DevOps tool. DevLake also provides a dbt plugin that enables users to define their own data transformation and analysis workflows. -2. Portable: DevLake has a modular design and provides multiple options for each module. Users of different setups can freely choose the right configuration for themselves. -3. Robust: DevLake provides an SDK to help plugins efficiently and reliably collect data from data sources while respecting their API rate limits and constraints. - -
diff --git a/versioned_docs/version-v0.13/Overview/Introduction.md b/versioned_docs/version-v0.13/Overview/Introduction.md deleted file mode 100755 index 4b692ff2bb5..00000000000 --- a/versioned_docs/version-v0.13/Overview/Introduction.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Introduction" -description: General introduction of Apache DevLake -sidebar_position: 1 ---- - -## What is Apache DevLake? -Apache DevLake is an open-source dev data platform that ingests, analyzes, and visualizes the fragmented data from DevOps tools to distill insights for engineering productivity. - -Apache DevLake is designed for developer teams looking to make better sense of their development process and to bring a more data-driven approach to their own practices. You can ask Apache DevLake many questions regarding your development process. Just connect and query. - -## What can be accomplished with DevLake? -1. Collect DevOps data across the entire Software Development Life Cycle (SDLC) and connect the siloed data with a standard [data model](../DataModels/DevLakeDomainLayerSchema.md). -2. Visualize out-of-the-box [engineering metrics](../Metrics) in a series of use-case driven dashboards -3. Easily extend DevLake to support your data sources, metrics, and dashboards with a flexible [framework](Architecture.md) for data collection and ETL (Extract, Transform, Load). - -## How do I use DevLake? -### 1. Set up DevLake -You can easily set up Apache DevLake by following our step-by step instructions for [Docker Compose setup](../GettingStarted/DockerComposeSetup.md) or [Kubernetes setup](../GettingStarted/KubernetesSetup.md). - -### 2. Create a Blueprint -The DevLake Configuration UI will guide you through the process (a Blueprint) to define the data connections, data scope, transformation and sync frequency of the data you wish to collect. - -![img](/img/Introduction/userflow1.svg) - -### 3. Track the Blueprint's progress -You can track the progress of the Blueprint you have just set up. - -![img](/img/Introduction/userflow2.svg) - -### 4. View the pre-built dashboards -Once the first run of the Blueprint is completed, you can view the corresponding dashboards. - -![img](/img/Introduction/userflow3.png) - -### 5. Customize the dashboards with SQL -If the pre-built dashboards are limited for your use cases, you can always customize or create your own metrics or dashboards with SQL. - -![img](/img/Introduction/userflow4.png) diff --git a/versioned_docs/version-v0.13/Overview/Roadmap.md b/versioned_docs/version-v0.13/Overview/Roadmap.md deleted file mode 100644 index 6695584eb4d..00000000000 --- a/versioned_docs/version-v0.13/Overview/Roadmap.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Roadmap" -description: > - The goals and roadmap for DevLake in 2022 -sidebar_position: 3 ---- - - -## Goals -DevLake has joined the Apache Incubator and is aiming to become a top-level project. To achieve this goal, the Apache DevLake (Incubating) community will continue to make efforts in helping development teams to analyze and improve their engineering productivity. In the 2022 Roadmap, we have summarized three major goals followed by the feature breakdown to invite the broader community to join us and grow together. - -1. As a dev data analysis application, discover and implement 3 (or even more!) usage scenarios: - - A collection of metrics to track the contribution, quality and growth of open-source projects - - DORA metrics for DevOps engineers - - To be decided ([let us know](https://join.slack.com/t/devlake-io/shared_invite/zt-17b6vuvps-x98pqseoUagM7EAmKC82xQ) if you have any suggestions!) -2. As dev data infrastructure, provide robust data collection modules, customizable data models, and data extensibility. -3. Design better user experience for end-users and contributors. - -## Feature Breakdown -Apache DevLake is currently under rapid development. You are more than welcome to use the following table to explore your intereted features and make contributions. We deeply appreciate the collective effort of our community to make this project possible! - -| Category | Features| -| --- | --- | -| More data sources across different [DevOps domains](../DataModels/DevLakeDomainLayerSchema.md) (Goal No.1 & 2)| Features in **bold** are of higher priority

Issue/Task Management:
  • **Jira server** [#886 (closed)](https://github.com/apache/incubator-devlake/issues/886)
  • **Jira data center** [#1687 (closed)](https://github.com/apache/incubator-devlake/issues/1687)
  • GitLab Issues [#715 (closed)](https://github.com/apache/incubator-devlake/issues/715)
  • Trello [#1881 (open)](https://github.com/apache/incubator-devlake/issues/1881)
  • **TAPD** [#560 (closed)](https://github.com/apache/incubator-devlake/issues/560)
  • Teambition [#1882 (open)](https://github.com/apache/incubator-devlake/issues/1882)
  • Ones [#1884 (open)](https://github.com/apache/incubator-devlake/issues/1884)
Source Code Management:
  • BitBucket
  • Gitee [#1883 (open)](https://github.com/apache/incubator-devlake/issues/1883)
  • Coder
Code Review:
  • Gerrit
CI/CD:
  • GitHub Action
  • ArgoCI
  • ArgoCD
  • TeamCity
Quality:
  • **SonarQube**
  • Coverity
QA:
  • Selenium
  • Junit
  • JMeter
  • Cucumber Test
Calendar:
  • Google Calendar
  • Zoom Calendar
  • Lark Calendar
  • Tencent Calendar
OSS Community Metrics:
  • GitHub stars, clones, watches
| -| Improved data collection, [data models](../DataModels/DevLakeDomainLayerSchema.md) and data extensibility (Goal No.2)| Data Collection:
  • Complete the logging system
  • Implement a good error handling mechanism during data collection
Data Models:
  • Introduce DBT to allow users to create and modify the domain layer schema. [#1479 (closed)](https://github.com/apache/incubator-devlake/issues/1479)
  • Design the data models for 5 new domains, please refers to the data models of the tools under each domain (see the cell above):
    • Quality
    • Testing
    • Calendar
    • Documentation
    • OSS Community Metrics
  • Polish the data models for [existing domains](../DataModels/DevLakeDomainLayerSchema.md): Issue/Task Management, Source Code Management, Code Review and CI/CD.
Data Extensibility:
  • Enhance the performance of data application under large-scaled usage scenarios
  • Support OLAP databases for more flexible data storage options
| -| Better user experience (Goal No.3) | For new users:
  • Iterate on a clearer step-by-step guide to improve the pre-configuration experience.
  • Provide a new Config UI to reduce frictions for data configuration [#1700 (in-progress)](https://github.com/apache/incubator-devlake/issues/1700)
  • Showcase dashboard live demos to let users explore and learn about the dashboards. [#1784 (open)](https://github.com/apache/incubator-devlake/issues/1784)
For returning users:
  • Provide detailed guides to help users customize Grafana dashboards.
  • Work on the documentation for advanced features in the Config UI, such as the usage of Advanced Mode and replacements of old auth tokens for data connections.
For contributors:
  • Add more guide to set up DevLake in different operating system.
  • Provide clearer docs for contributors to get on board easier.
  • Add Swagger to document API [#292 closed](https://github.com/apache/incubator-devlake/issues/292)
  • More docs about raw/tool/domain data models
| - - -## How to Influence the Roadmap -A roadmap is only useful when it captures real user needs. We are glad to hear from you if you have specific use cases, feedback, or ideas. You can submit an issue to let us know! -Also, if you plan to work (or are already working) on a new or existing feature, tell us, so that we can update the roadmap accordingly. We are happy to share knowledge and context to help your feature land successfully. -


- diff --git a/versioned_docs/version-v0.13/Overview/_category_.json b/versioned_docs/version-v0.13/Overview/_category_.json deleted file mode 100644 index 3e819ddc4ff..00000000000 --- a/versioned_docs/version-v0.13/Overview/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Overview", - "position": 1, - "link":{ - "type": "generated-index", - "slug": "Overview" - } -} diff --git a/versioned_docs/version-v0.13/Plugins/_category_.json b/versioned_docs/version-v0.13/Plugins/_category_.json deleted file mode 100644 index bbea8d5910c..00000000000 --- a/versioned_docs/version-v0.13/Plugins/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Plugins", - "position": 9, - "link":{ - "type": "generated-index", - "slug": "Plugins" - } -} diff --git a/versioned_docs/version-v0.13/Plugins/dbt.md b/versioned_docs/version-v0.13/Plugins/dbt.md deleted file mode 100644 index 059bf12c61d..00000000000 --- a/versioned_docs/version-v0.13/Plugins/dbt.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "DBT" -description: > - DBT Plugin ---- - - -## Summary - -dbt (data build tool) enables analytics engineers to transform data in their warehouses by simply writing select statements. dbt handles turning these select statements into tables and views. -dbt does the T in ELT (Extract, Load, Transform) processes – it doesn’t extract or load data, but it’s extremely good at transforming data that’s already loaded into your warehouse. - -## User setup -- If you plan to use this product, you need to install some environments first. - -#### Required Packages to Install -- [python3.7+](https://www.python.org/downloads/) -- [dbt-mysql](https://pypi.org/project/dbt-mysql/#configuring-your-profile) - -#### Commands to run or create in your terminal and the dbt project -1. pip install dbt-mysql -2. dbt init demoapp (demoapp is project name) -3. create your SQL transformations and data models - -## Convert Data By DBT - -Use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - -```json -[ - [ - { - "plugin": "dbt", - "options": { - "projectPath": "/Users/abeizn/demoapp", - "projectName": "demoapp", - "projectTarget": "dev", - "selectedModels": ["my_first_dbt_model","my_second_dbt_model"], - "projectVars": { - "demokey1": "demovalue1", - "demokey2": "demovalue2" - } - } - } - ] -] -``` - -- `projectPath`: the absolute path of the dbt project. (required) -- `projectName`: the name of the dbt project. (required) -- `projectTarget`: this is the default target your dbt project will use. (optional) -- `selectedModels`: a model is a select statement. Models are defined in .sql files, and typically in your models directory. (required) -And selectedModels accepts one or more arguments. Each argument can be one of: -1. a package name, runs all models in your project, example: example -2. a model name, runs a specific model, example: my_fisrt_dbt_model -3. a fully-qualified path to a directory of models. - -- `projectVars`: variables to parametrize dbt models. (optional) -example: -`select * from events where event_type = '{{ var("event_type") }}'` -To execute this SQL query in your model, you need set a value for `event_type`. - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers - -


diff --git a/versioned_docs/version-v0.13/Plugins/feishu.md b/versioned_docs/version-v0.13/Plugins/feishu.md deleted file mode 100644 index 306f3bd9893..00000000000 --- a/versioned_docs/version-v0.13/Plugins/feishu.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: "Feishu" -description: > - Feishu Plugin ---- - -## Summary - -This plugin collects Feishu meeting data through [Feishu Openapi](https://open.feishu.cn/document/home/user-identity-introduction/introduction). - -## Configuration - -In order to fully use this plugin, you will need to get `app_id` and `app_secret` from a Feishu administrator (for help on App info, please see [official Feishu Docs](https://open.feishu.cn/document/ukTMukTMukTM/ukDNz4SO0MjL5QzM/auth-v3/auth/tenant_access_token_internal)), - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/feishu/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu", - "endpoint": "https://open.feishu.cn/open-apis/vc/v1/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "appId": "", - "appSecret": "" -} -' -``` - -## Collect data from Feishu - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - - -```json -[ - [ - { - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - } - ] -] -``` - -> `numOfDaysToCollect`: The number of days you want to collect - -> `rateLimitPerSecond`: The number of requests to send(Maximum is 8) - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu 20211126", - "plan": [[{ - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.13/Plugins/gitee.md b/versioned_docs/version-v0.13/Plugins/gitee.md deleted file mode 100644 index 79c3c907dac..00000000000 --- a/versioned_docs/version-v0.13/Plugins/gitee.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: "Gitee(WIP)" -description: > - Gitee Plugin ---- - -## Summary - -This plugin collects `Gitee` data through [Gitee Openapi](https://gitee.com/api/v5/swagger). - -## Configuration - -In order to fully use this plugin, you will need to get `token` on the Gitee website. - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/gitee/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee", - "endpoint": "https://gitee.com/api/v5/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "token": "" -} -' -``` - - - -## Collect data from Gitee - -In order to collect data, you have to compose a JSON looks like following one, and send it by selecting `Advanced Mode` on `Create Pipeline Run` page: - -1. Configure-UI Mode -```json -[ - [ - { - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` -and if you want to perform certain subtasks. -```json -[ - [ - { - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -2. Curl Mode: - You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` -and if you want to perform certain subtasks. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.13/Plugins/gitextractor.md b/versioned_docs/version-v0.13/Plugins/gitextractor.md deleted file mode 100644 index d4c10ca7770..00000000000 --- a/versioned_docs/version-v0.13/Plugins/gitextractor.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: "GitExtractor" -description: > - GitExtractor Plugin ---- - -## Summary -This plugin extracts commits and references from a remote or local git repository. It then saves the data into the database or csv files. - -## Steps to make this plugin work - -1. Use the Git repo extractor to retrieve data about commits and branches from your repository. -2. Use the GitHub plugin to retrieve data about Github issues and PRs from your repository. -NOTE: you can run only one issue collection stage as described in the Github Plugin README. -3. Use the [RefDiff](./refdiff.md) plugin to calculate version diff, which will be stored in `refs_commits_diffs` table. - -## Sample Request - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "git repo extractor", - "plan": [ - [ - { - "Plugin": "gitextractor", - "Options": { - "url": "https://github.com/merico-dev/lake.git", - "repoId": "github:GithubRepo:384111310" - } - } - ] - ] -} -' -``` -- `url`: the location of the git repository. It should start with `http`/`https` for a remote git repository and with `/` for a local one. -- `repoId`: column `id` of `repos`. - Note : For GitHub, to find the repo id run `$("meta[name=octolytics-dimension-repository_id]").getAttribute('content')` in browser console. -- `proxy`: optional, http proxy, e.g. `http://your-proxy-server.com:1080`. -- `user`: optional, for cloning private repository using HTTP/HTTPS -- `password`: optional, for cloning private repository using HTTP/HTTPS -- `privateKey`: optional, for SSH cloning, base64 encoded `PEM` file -- `passphrase`: optional, passphrase for the private key - - -## Standalone Mode - -You call also run this plugin in a standalone mode without any DevLake service running using the following command: - -``` -go run plugins/gitextractor/main.go -url https://github.com/merico-dev/lake.git -id github:GithubRepo:384111310 -db "merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True" -``` - -For more options (e.g., saving to a csv file instead of a db), please read `plugins/gitextractor/main.go`. - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. [Click here](./refdiff.md#Development) for a brief guide. - -


diff --git a/versioned_docs/version-v0.13/Plugins/github-connection-in-config-ui.png b/versioned_docs/version-v0.13/Plugins/github-connection-in-config-ui.png deleted file mode 100644 index 5359fb1551b..00000000000 Binary files a/versioned_docs/version-v0.13/Plugins/github-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.13/Plugins/github.md b/versioned_docs/version-v0.13/Plugins/github.md deleted file mode 100644 index fd804a14569..00000000000 --- a/versioned_docs/version-v0.13/Plugins/github.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "GitHub" -description: > - GitHub Plugin ---- - - - -## Summary - -This plugin gathers data from `GitHub` to display information to the user in `Grafana`. We can help tech leaders answer such questions as: - -- Is this month more productive than last? -- How fast do we respond to customer requirements? -- Was our quality improved or not? - -## Metrics - -Here are some examples metrics using `GitHub` data: -- Avg Requirement Lead Time By Assignee -- Bug Count per 1k Lines of Code -- Commit Count over Time - -## Screenshot - -![image](/img/Plugins/github-demo.png) - - -## Configuration -- Configuring GitHub via [config-ui](/UserManuals/ConfigUI/GitHub.md). - -## Sample Request -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "github 20211126", - "plan": [[{ - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.13/Plugins/gitlab-connection-in-config-ui.png b/versioned_docs/version-v0.13/Plugins/gitlab-connection-in-config-ui.png deleted file mode 100644 index 7aacee8d828..00000000000 Binary files a/versioned_docs/version-v0.13/Plugins/gitlab-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.13/Plugins/gitlab.md b/versioned_docs/version-v0.13/Plugins/gitlab.md deleted file mode 100644 index 05f8ba5ec34..00000000000 --- a/versioned_docs/version-v0.13/Plugins/gitlab.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "GitLab" -description: > - GitLab Plugin ---- - - -## Metrics - -| Metric Name | Description | -|:----------------------------|:-------------------------------------------------------------| -| Pull Request Count | Number of Pull/Merge Requests | -| Pull Request Pass Rate | Ratio of Pull/Merge Review requests to merged | -| Pull Request Reviewer Count | Number of Pull/Merge Reviewers | -| Pull Request Review Time | Time from Pull/Merge created time until merged | -| Commit Author Count | Number of Contributors | -| Commit Count | Number of Commits | -| Added Lines | Accumulated Number of New Lines | -| Deleted Lines | Accumulated Number of Removed Lines | -| Pull Request Review Rounds | Number of cycles of commits followed by comments/final merge | - -## Configuration -Configuring GitLab via [config-ui](/UserManuals/ConfigUI/GitLab.md). - -## Gathering Data with GitLab - -To collect data, you can make a POST request to `/pipelines` - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitlab 20211126", - "plan": [[{ - "plugin": "gitlab", - "options": { - "projectId": - } - }]] -} -' -``` - -


diff --git a/versioned_docs/version-v0.13/Plugins/jenkins.md b/versioned_docs/version-v0.13/Plugins/jenkins.md deleted file mode 100644 index 9bb0177d0b6..00000000000 --- a/versioned_docs/version-v0.13/Plugins/jenkins.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: "Jenkins" -description: > - Jenkins Plugin ---- - -## Summary - -This plugin collects Jenkins data through [Remote Access API](https://www.jenkins.io/doc/book/using/remote-access-api/). It then computes and visualizes various DevOps metrics from the Jenkins data. - -![image](https://user-images.githubusercontent.com/61080/141943122-dcb08c35-cb68-4967-9a7c-87b63c2d6988.png) - -## Metrics - -| Metric Name | Description | -|:-------------------|:------------------------------------| -| Build Count | The number of builds created | -| Build Success Rate | The percentage of successful builds | - -## Configuration - -In order to fully use this plugin, you will need to set various configurations via Dev Lake's `config-ui`. - -### By `config-ui` - -The connection section of the configuration screen requires the following key fields to connect to the Jenkins API. - -## Collect Data From Jenkins - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "jenkins", - "options": { - "connectionId": 1 - } - } - ] -] -``` - -## Relationship between job and build - -Build is kind of a snapshot of job. Running job each time creates a build. diff --git a/versioned_docs/version-v0.13/Plugins/jira-connection-config-ui.png b/versioned_docs/version-v0.13/Plugins/jira-connection-config-ui.png deleted file mode 100644 index df2e8e39875..00000000000 Binary files a/versioned_docs/version-v0.13/Plugins/jira-connection-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.13/Plugins/jira-more-setting-in-config-ui.png b/versioned_docs/version-v0.13/Plugins/jira-more-setting-in-config-ui.png deleted file mode 100644 index dffb0c994d2..00000000000 Binary files a/versioned_docs/version-v0.13/Plugins/jira-more-setting-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.13/Plugins/jira.md b/versioned_docs/version-v0.13/Plugins/jira.md deleted file mode 100644 index 7ac79ad065d..00000000000 --- a/versioned_docs/version-v0.13/Plugins/jira.md +++ /dev/null @@ -1,196 +0,0 @@ ---- -title: "Jira" -description: > - Jira Plugin ---- - - -## Summary - -This plugin collects Jira data through Jira Cloud REST API. It then computes and visualizes various engineering metrics from the Jira data. - -jira metric display - -## Project Metrics This Covers - -| Metric Name | Description | -|:------------------------------------|:--------------------------------------------------------------------------------------------------| -| Requirement Count | Number of issues with type "Requirement" | -| Requirement Lead Time | Lead time of issues with type "Requirement" | -| Requirement Delivery Rate | Ratio of delivered requirements to all requirements | -| Requirement Granularity | Number of story points associated with an issue | -| Bug Count | Number of issues with type "Bug"
bugs are found during testing | -| Bug Age | Lead time of issues with type "Bug"
both new and deleted lines count | -| Bugs Count per 1k Lines of Code | Amount of bugs per 1000 lines of code | -| Incident Count | Number of issues with type "Incident"
incidents are found when running in production | -| Incident Age | Lead time of issues with type "Incident" | -| Incident Count per 1k Lines of Code | Amount of incidents per 1000 lines of code | - -## Configuration -Configuring Jira via [config-ui](/UserManuals/ConfigUI/Jira.md). - -## Collect Data From JIRA - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -> Warning: Data collection only supports single-task execution, and the results of concurrent multi-task execution may not meet expectations. - -``` -[ - [ - { - "plugin": "jira", - "options": { - "connectionId": 1, - "boardId": 8, - "since": "2006-01-02T15:04:05Z" - } - } - ] -] -``` - -- `connectionId`: The `ID` field from **JIRA Integration** page. -- `boardId`: JIRA board id, see "Find Board Id" for details. -- `since`: optional, download data since a specified date only. - - -## API - -### Data Connections - -1. Get all data connection - -```GET /plugins/jira/connections -[ - { - "ID": 14, - "CreatedAt": "2021-10-11T11:49:19.029Z", - "UpdatedAt": "2021-10-11T11:49:19.029Z", - "name": "test-jira-connection", - "endpoint": "https://merico.atlassian.net/rest", - "basicAuthEncoded": "basicAuth", - "epicKeyField": "epicKeyField", - "storyPointField": "storyPointField" - } -] -``` - -2. Create a new data connection - -```POST /plugins/jira/connections -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type" - } - } -} -``` - - -3. Update data connection - -```PUT /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -4. Get data connection detail -```GET /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -5. Delete data connection - -```DELETE /plugins/jira/connections/:connectionId -``` - - -### Type mappings - -1. Get all type mappings -```GET /plugins/jira/connections/:connectionId/type-mappings -[ - { - "jiraConnectionId": 16, - "userType": "userType", - "standardType": "standardType" - } -] -``` - -2. Create a new type mapping - -```POST /plugins/jira/connections/:connectionId/type-mappings -{ - "userType": "userType", - "standardType": "standardType" -} -``` - -3. Update type mapping - -```PUT /plugins/jira/connections/:connectionId/type-mapping/:userType -{ - "standardType": "standardTypeUpdated" -} -``` - - -4. Delete type mapping - -```DELETE /plugins/jira/connections/:connectionId/type-mapping/:userType -``` - -5. API forwarding -For example: -Requests to `http://your_devlake_host/plugins/jira/connections/1/proxy/rest/agile/1.0/board/8/sprint` -would be forwarded to `https://your_jira_host/rest/agile/1.0/board/8/sprint` - -```GET /plugins/jira/connections/:connectionId/proxy/rest/*path -{ - "maxResults": 1, - "startAt": 0, - "isLast": false, - "values": [ - { - "id": 7, - "self": "https://merico.atlassian.net/rest/agile/1.0/sprint/7", - "state": "closed", - "name": "EE Sprint 7", - "startDate": "2020-06-12T00:38:51.882Z", - "endDate": "2020-06-26T00:38:00.000Z", - "completeDate": "2020-06-22T05:59:58.980Z", - "originBoardId": 8, - "goal": "" - } - ] -} -``` diff --git a/versioned_docs/version-v0.13/Plugins/refdiff.md b/versioned_docs/version-v0.13/Plugins/refdiff.md deleted file mode 100644 index 788cf995a7e..00000000000 --- a/versioned_docs/version-v0.13/Plugins/refdiff.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: "RefDiff" -description: > - RefDiff Plugin ---- - - -## Summary - -For development workload analysis, we often need to know how many commits have been created between 2 releases. This plugin calculates which commits differ between 2 Ref (branch/tag), and the result will be stored back into database for further analysis. - -## Important Note - -You need to run gitextractor before the refdiff plugin. The gitextractor plugin should create records in the `refs` table in your DB before this plugin can be run. - -## Configuration - -This is a enrichment plugin based on Domain Layer data, no configuration needed - -## How to use - -In order to trigger the enrichment, you need to insert a new task into your pipeline. - -1. Make sure `commits` and `refs` are collected into your database, `refs` table should contain records like following: -``` -id ref_type -github:GithubRepo:384111310:refs/tags/0.3.5 TAG -github:GithubRepo:384111310:refs/tags/0.3.6 TAG -github:GithubRepo:384111310:refs/tags/0.5.0 TAG -github:GithubRepo:384111310:refs/tags/v0.0.1 TAG -github:GithubRepo:384111310:refs/tags/v0.2.0 TAG -github:GithubRepo:384111310:refs/tags/v0.3.0 TAG -github:GithubRepo:384111310:refs/tags/v0.4.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.0 TAG -github:GithubRepo:384111310:refs/tags/v0.6.1 TAG -``` -2. If you want to run calculateIssuesDiff, please configure GITHUB_PR_BODY_CLOSE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -3. If you want to run calculatePrCherryPick, please configure GITHUB_PR_TITLE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -4. And then, trigger a pipeline like following, you can also define sub tasks, calculateRefDiff will calculate commits between two ref, and creatRefBugStats will create a table to show bug list between two ref: -``` -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "pairs": [ - { "newRef": "refs/tags/v0.6.0", "oldRef": "refs/tags/0.5.0" }, - { "newRef": "refs/tags/0.5.0", "oldRef": "refs/tags/0.4.0" } - ], - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` -Or if you prefered calculating latest releases -``` -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "tagsPattern": "v\d+\.\d+.\d+", - "tagsLimit": 10, - "tagsOrder": "reverse semver", - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. - -### Ubuntu - -``` -apt install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -### MacOS - -NOTE:Do **NOT** install libgit2 via `MadPorts` or `homebrew`, install from source instead. -``` -brew install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -#### Troubleshooting (MacOS) - -> Q: I got an error saying: `pkg-config: exec: "pkg-config": executable file not found in $PATH` - -> A: -> 1. Make sure you have pkg-config installed: -> -> `brew install pkg-config` -> -> 2. Make sure your pkg config path covers the installation: -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig` - -


diff --git a/versioned_docs/version-v0.13/Plugins/tapd.md b/versioned_docs/version-v0.13/Plugins/tapd.md deleted file mode 100644 index b8db89fca87..00000000000 --- a/versioned_docs/version-v0.13/Plugins/tapd.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: "TAPD" -description: > - TAPD Plugin ---- - -## Summary - -This plugin collects TAPD data. - -This plugin is in development so you can't modify settings in config-ui. - -## Configuration - -In order to fully use this plugin, you will need to get endpoint/basic_auth_encoded/rate_limit and insert it into table `_tool_tapd_connections`. - diff --git a/versioned_docs/version-v0.13/SupportedDataSources.md b/versioned_docs/version-v0.13/SupportedDataSources.md deleted file mode 100644 index 12bdc1a3db3..00000000000 --- a/versioned_docs/version-v0.13/SupportedDataSources.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: "Supported Data Sources" -description: > - Data sources that DevLake supports -sidebar_position: 4 ---- - - -## Data Sources and Data Plugins -DevLake supports the following data sources. The data from each data source is collected with one or more plugins. There are 9 data plugins in total: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira`, `refdiff` and `tapd`. - - -| Data Source | Versions | Plugins | -|-------------|--------------------------------------|-------- | -| AE | | `ae` | -| Feishu | Cloud |`feishu` | -| GitHub | Cloud |`github`, `gitextractor`, `refdiff` | -| Gitlab | Cloud, Community Edition 13.x+ |`gitlab`, `gitextractor`, `refdiff` | -| Jenkins | 2.263.x+ |`jenkins` | -| Jira | Cloud, Server 8.x+, Data Center 8.x+ |`jira` | -| TAPD | Cloud | `tapd` | - - - -## Data Collection Scope By Each Plugin -This table shows the entities collected by each plugin. Domain layer entities in this table are consistent with the entities [here](./DataModels/DevLakeDomainLayerSchema.md). - -| Domain Layer Entities | ae | gitextractor | github | gitlab | jenkins | jira | refdiff | tapd | -| --------------------- | -------------- | ------------ | -------------- | ------- | ------- | ------- | ------- | ------- | -| commits | update commits | default | not-by-default | default | | | | | -| commit_parents | | default | | | | | | | -| commit_files | | default | | | | | | | -| pull_requests | | | default | default | | | | | -| pull_request_commits | | | default | default | | | | | -| pull_request_comments | | | default | default | | | | | -| pull_request_labels | | | default | | | | | | -| refs | | default | | | | | | | -| refs_commits_diffs | | | | | | | default | | -| refs_issues_diffs | | | | | | | default | | -| ref_pr_cherry_picks | | | | | | | default | | -| repos | | | default | default | | | | | -| repo_commits | | default | default | | | | | | -| board_repos | | | | | | | | | -| issue_commits | | | | | | | | | -| issue_repo_commits | | | | | | | | | -| pull_request_issues | | | | | | | | | -| refs_issues_diffs | | | | | | | | | -| boards | | | default | | | default | | default | -| board_issues | | | default | | | default | | default | -| issue_changelogs | | | | | | default | | default | -| issues | | | default | | | default | | default | -| issue_comments | | | | | | default | | default | -| issue_labels | | | default | | | | | | -| sprints | | | | | | default | | default | -| issue_worklogs | | | | | | default | | default | -| users o | | | default | | | default | | default | -| builds | | | | | default | | | | -| jobs | | | | | default | | | | - diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/AdvancedMode.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/AdvancedMode.md deleted file mode 100644 index b4fc0057288..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/AdvancedMode.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: "Using Advanced Mode" -sidebar_position: 6 -description: > - Using the advanced mode of Config-UI ---- - - -## Why advanced mode? - -Advanced mode allows users to create any pipeline by writing JSON. This is useful for users who want to: - -1. Collect multiple GitHub/GitLab repos or Jira projects within a single pipeline -2. Have fine-grained control over what entities to collect or what subtasks to run for each plugin -3. Orchestrate a complex pipeline that consists of multiple stages of plugins. - -Advanced mode gives utmost flexibility to users by exposing the JSON API. - -## How to use advanced mode to create pipelines? - -1. Click on "+ New Blueprint" on the Blueprint page. - -![image](/img/AdvancedMode/AdvancedMode1.png) - -2. In step 1, click on the "Advanced Mode" link. - -![image](/img/AdvancedMode/AdvancedMode2.png) - -3. The pipeline editor expects a 2D array of plugins. The first dimension represents different stages of the pipeline and the second dimension describes the plugins in each stage. Stages run in sequential order and plugins within the same stage runs in parallel. We provide some templates for users to get started. Please also see the next section for some examples. - -![image](/img/AdvancedMode/AdvancedMode3.png) - -4. You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -## Examples - -1. Collect multiple GitLab repos sequentially. - ->When there're multiple collection tasks against a single data source, we recommend running these tasks sequentially since the collection speed is mostly limited by the API rate limit of the data source. ->Running multiple tasks against the same data source is unlikely to speed up the process and may overwhelm the data source. - - -Below is an example for collecting 2 GitLab repos sequentially. It has 2 stages, each contains a GitLab task. - - -``` -[ - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 15238074 - } - } - ], - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 11624398 - } - } - ] -] -``` - - -2. Collect a GitHub repo and a Jira board in parallel - -Below is an example for collecting a GitHub repo and a Jira board in parallel. It has a single stage with a GitHub task and a Jira task. Since users can configure multiple Jira connection, it's required to pass in a `connectionId` for Jira task to specify which connection to use. - -``` -[ - [ - { - "Plugin": "github", - "Options": { - "repo": "lake", - "owner": "merico-dev" - } - }, - { - "Plugin": "jira", - "Options": { - "connectionId": 1, - "boardId": 76 - } - } - ] -] -``` -## Editing a Blueprint (Advanced Mode) -This section is for editing a Blueprint in the Advanced Mode. To edit in the Normal mode, please refer to [this guide](Tutorial.md#editing-a-blueprint-normal-mode). - -To edit a Blueprint created in the Advanced mode, you can simply go the Settings page of that Blueprint and click on Edit JSON to edit its configuration. - -![img](/img/ConfigUI/BlueprintEditing/blueprint-edit2.png) - diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitHub.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitHub.md deleted file mode 100644 index 09b2c8c8f76..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitHub.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: "Configuring GitHub" -sidebar_position: 2 -description: Config UI instruction for GitHub ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![github-add-data-connections](/img/ConfigUI/github-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint, eg. `https://api.github.com/`. The url should end with `/`. - -#### Auth Token(s) -GitHub personal access tokens are required to add a connection. -- Learn about [how to create a GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) -- The data collection speed is relatively slow for GitHub since they have a **rate limit of [5,000 requests](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) per hour** (15,000 requests/hour if you pay for GitHub enterprise). You can accelerate the process by configuring _multiple_ personal access tokens. Please note that multiple tokens should be created by different GitHub accounts. Tokens belonging to the same GitHub account share the rate limit. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![github-set-data-scope](/img/ConfigUI/github-set-data-scope.png) - -#### Projects -Enter the GitHub repos to collect. If you want to collect more than 1 repo, please separate repos with comma. For example, "apache/incubator-devlake,apache/incubator-devlake-website". - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitHub entities, you can unselect some entities to accelerate the collection speed. -- Issue Tracking: GitHub issues, issue comments, issue labels, etc. -- Source Code Management: GitHub repos, refs, commits, etc. -- Code Review: GitHub PRs, PR comments and reviews, etc. -- Cross Domain: GitHub accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![github-add-transformation-rules-list](/img/ConfigUI/github-add-transformation-rules-list.png) -![github-add-transformation-rules](/img/ConfigUI/github-add-transformation-rules.png) - -Without adding transformation rules, you can still view the "[GitHub Metrics](/livedemo/DataSources/GitHub)" dashboard. However, if you want to view "[Weekly Bug Retro](/livedemo/QAEngineers/WeeklyBugRetro)", "[Weekly Community Retro](/livedemo/OSSMaintainers/WeeklyCommunityRetro)" or other pre-built dashboards, the following transformation rules, especially "Type/Bug", should be added.
- -Each GitHub repo has at most ONE set of transformation rules. - -#### Issue Tracking - -- Severity: Parse the value of `severity` from issue labels. - - when your issue labels for severity level are like 'severity/p0', 'severity/p1', 'severity/p2', then input 'severity/(.*)$' - - when your issue labels for severity level are like 'p0', 'p1', 'p2', then input '(p0|p1|p2)$' - -- Component: Same as "Severity". - -- Priority: Same as "Severity". - -- Type/Requirement: The `type` of issues with labels that match given regular expression will be set to "REQUIREMENT". Unlike "PR.type", submatch does nothing, because for issue management analysis, users tend to focus on 3 kinds of types (Requirement/Bug/Incident), however, the concrete naming varies from repo to repo, time to time, so we decided to standardize them to help analysts metrics. - -- Type/Bug: Same as "Type/Requirement", with `type` setting to "BUG". - -- Type/Incident: Same as "Type/Requirement", with `type` setting to "INCIDENT". - -#### Code Review - -- Type: The `type` of pull requests will be parsed from PR labels by given regular expression. For example: - - when your labels for PR types are like 'type/feature-development', 'type/bug-fixing' and 'type/docs', please input 'type/(.*)$' - - when your labels for PR types are like 'feature-development', 'bug-fixing' and 'docs', please input '(feature-development|bug-fixing|docs)$' - -- Component: The `component` of pull requests will be parsed from PR labels by given regular expression. - -#### Additional Settings (Optional) - -- Tags Limit: It'll compare the last N pairs of tags to get the "commit diff', "issue diff" between tags. N defaults to 10. - - commit diff: new commits for a tag relative to the previous one - - issue diff: issues solved by the new commits for a tag relative to the previous one - -- Tags Pattern: Only tags that meet given regular expression will be counted. - -- Tags Order: Only "reverse semver" order is supported for now. - -Please click `Save` to save the transformation rules for the repo. In the data scope list, click `Next Step` to continue configuring. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitLab.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitLab.md deleted file mode 100644 index 74c9e41f107..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/GitLab.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: "Configuring GitLab" -sidebar_position: 3 -description: Config UI instruction for GitLab ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![gitlab-add-data-connections](/img/ConfigUI/gitlab-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. - - If you are using gitlab.com, the endpoint will be `https://gitlab.com/api/v4/` - - If you are self-hosting GitLab, the endpoint will look like `https://gitlab.example.com/api/v4/` -The endpoint url should end with `/`. - -#### Auth Token(s) -GitLab personal access tokens are required to add a connection. Learn about [how to create a GitLab personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html). - - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope - -#### Projects -Enter the GitLab repos to collect. How to get `GitLab` repos? -- Visit the repository page on GitLab -- Find the project id below the title - -![Get GitLab projects](https://user-images.githubusercontent.com/3789273/128568416-a47b2763-51d8-4a6a-8a8b-396512bffb03.png) - -If you want to collect more than 1 repo, please separate repos with comma. For example, "apache/incubator-devlake,apache/incubator-devlake-website". - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitLab entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: GitLab issues, issue comments, issue labels, etc. -- Source Code Management: GitLab repos, refs, commits, etc. -- Code Review: GitLab MRs, MR comments and reviews, etc. -- Cross Domain: GitLab accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -There are no transformation rules for GitLab repos. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jenkins.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jenkins.md deleted file mode 100644 index 07d1ed2952a..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jenkins.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Configuring Jenkins" -sidebar_position: 5 -description: Config UI instruction for Jenkins ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jenkins-add-data-connections](/img/ConfigUI/jenkins-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. Eg. `https://ci.jenkins.io/`. The endpoint url should end with `/`. - -#### Username (E-mail) -Your User ID for the Jenkins Instance. - -#### Password -For help on Username and Password, please see Jenkins docs on [using credentials](https://www.jenkins.io/doc/book/using/using-credentials/). You can also use "API Access Token" for this field, which can be generated at `User` -> `Configure` -> `API Token` section on Jenkins. - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - -### Step 2 - Setting Data Scope -There is no data cope setting for Jenkins. - -### Step 3 - Adding Transformation Rules (Optional) -There are no transformation rules for Jenkins. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jira.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jira.md deleted file mode 100644 index 952ecddea6a..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Jira.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "Configuring Jira" -sidebar_position: 4 -description: Config UI instruction for Jira ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jira-add-data-connections](/img/ConfigUI/jira-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint - - If you are using Jira Cloud, the endpoint will be `https://.atlassian.net/rest/` - - If you are self-hosting Jira v8+, the endpoint will look like `https://jira..com/rest/` -The endpoint url should end with `/`. - -#### Username / Email -Input the username or email of your Jira account. - - -#### Password -- If you are using Jira Cloud, please input the [Jira personal access token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html). -- If you are using Jira Server v8+, please input the password of your Jira account. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![jira-set-data-scope](/img/ConfigUI/jira-set-data-scope.png) - -#### Projects -Choose the Jira boards to collect. - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain Jira entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: Jira issues, issue comments, issue labels, etc. -- Cross Domain: Jira accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![jira-add-transformation-rules-list](/img/ConfigUI/jira-add-transformation-rules-list.png) - -Without adding transformation rules, you can not view all charts in "Jira" or "Engineering Throughput and Cycle Time" dashboards.
- -Each Jira board has at most ONE set of transformation rules. - -![jira-add-transformation-rules](/img/ConfigUI/jira-add-transformation-rules.png) - -#### Issue Tracking - -- Requirement: choose the issue types to be transformed to "REQUIREMENT". -- Bug: choose the issue types to be transformed to "BUG". -- Incident: choose the issue types to be transformed to "INCIDENT". -- Epic Key: choose the custom field that represents Epic key. In most cases, it is "Epic Link". -- Story Point: choose the custom field that represents story points. In most cases, it is "Story Points". - -#### Additional Settings -- Remotelink Commit SHA: parse the commits from an issue's remote links by the given regular expression so that the relationship between `issues` and `commits` can be created. You can directly use the regular expression `/commit/([0-9a-f]{40})$`. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Tutorial.md b/versioned_docs/version-v0.13/UserManuals/ConfigUI/Tutorial.md deleted file mode 100644 index 5c61e930c79..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/Tutorial.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Tutorial" -sidebar_position: 1 -description: Config UI instruction ---- - -## Overview -The Apache DevLake Config UI allows you to configure the data you wish to collect through a graphical user interface. Visit config-ui at `http://localhost:4000`. - -## Creating a Blueprint - -### Introduction -A Blueprint is the plan that covers all the work to get your raw data ready for query and metric computaion in the dashboards. We have designed the Blueprint to help you with data collection within only one workflow. Creating a Blueprint consists of four steps: - -1. Adding Data Connections: Add new or select from existing data connections for the data you wish to collect -2. Setting Data Scope: Select the scope of data (e.g. GitHub projects or Jira boards) for your data connections -3. Adding Transformation (Optional): Add transformation rules for the data scope you have selected in order to view corresponding metrics -4. Setting Sync Frequency: Set up a schedule for how often you wish your data to be synced - -### Step 1 - Adding Data Connections -There are two ways to add data connections to your Blueprint: adding them during the creation of a Blueprint and adding them separately on the Data Integrations page. There is no difference between these two ways. - -When adding data connections from the Blueprint, you can either create a new or select from an exisitng data connections. - -![img](/img/ConfigUI/BlueprintCreation/step1.png) - -### Step 2 - Setting Data Scope -After adding data connections, click on "Next Step" and you will be prompted to select the data scope of each data connections. For instance, for a GitHub connection, you will need to enter the projects you wish to sync and for Jira, you will need to select the boards. - -![img](/img/ConfigUI/BlueprintCreation/step2.png) - -### Step 3 - Adding Transformation (Optional) -This step is only required for viewing certain metrics in the pre-built dashboards that require data transformation. Without adding transformation rules, you can still view the basic metrics. - -Currently, DevLake only supports transformation for GitHub and Jira connections. - -![img](/img/ConfigUI/BlueprintCreation/step3.png) - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. - -After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -![img](/img/ConfigUI/BlueprintCreation/step4.png) - -## Editing a Blueprint (Normal Mode) -On the Blueprint list page, clicking on any Blueprint will lead you to the detail page of the blueprint. If you switch to the Settings tab on the detail page, you can see the settings of your Blueprint and edit parts of it seperately. - -In the current version, the Blueprint editing feature **allows** editing: -- The Blueprint's name -- The sync frequency -- The data scope of a connection -- The data entities of the data scope -- The transformation rules of any data scope - -and does **NOT allow**: -- Adding or deleting connections to an existing blueprint (will be available in the future) -- Editing any connections - -Please note: -1. The connections of some data sources, such as Jenkins, do not have an editing button, because their configuration do not contain data scope, data entities and/or transformation. -2. If you have created the Blueprint in the Normal mode, you will only be able to edit it in the Normal Mode; if you have created it in the Advanced Mode, please refer to [this guide](AdvancedMode.md#editing-a-blueprint-advanced-mode) for editing. - -The Settings page for editing Blueprints: -![img](/img/ConfigUI/BlueprintEditing/blueprint-edit1.png) - -## Creating and Managing Data Connections -The Data Connections page allows you to view, create and manage all your data connections at one place. diff --git a/versioned_docs/version-v0.13/UserManuals/ConfigUI/_category_.json b/versioned_docs/version-v0.13/UserManuals/ConfigUI/_category_.json deleted file mode 100644 index 62f99d484f6..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/ConfigUI/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Config UI", - "position": 4 -} diff --git a/versioned_docs/version-v0.13/UserManuals/Dashboards/GrafanaUserGuide.md b/versioned_docs/version-v0.13/UserManuals/Dashboards/GrafanaUserGuide.md deleted file mode 100644 index 41a8e37f78f..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/Dashboards/GrafanaUserGuide.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: "Grafana User Guide" -sidebar_position: 2 -description: > - Grafana User Guide ---- - - -# Grafana - - - -When first visiting Grafana, you will be provided with a sample dashboard with some basic charts setup from the database. - -## Contents - -Section | Link -:------------ | :------------- -Logging In | [View Section](#logging-in) -Viewing All Dashboards | [View Section](#viewing-all-dashboards) -Customizing a Dashboard | [View Section](#customizing-a-dashboard) -Dashboard Settings | [View Section](#dashboard-settings) -Provisioning a Dashboard | [View Section](#provisioning-a-dashboard) -Troubleshooting DB Connection | [View Section](#troubleshooting-db-connection) - -## Logging In - -Once the app is up and running, visit `http://localhost:3002` to view the Grafana dashboard. - -Default login credentials are: - -- Username: `admin` -- Password: `admin` - -## Viewing All Dashboards - -To see all dashboards created in Grafana visit `/dashboards` - -Or, use the sidebar and click on **Manage**: - -![Screen Shot 2021-08-06 at 11 27 08 AM](https://user-images.githubusercontent.com/3789273/128534617-1992c080-9385-49d5-b30f-be5c96d5142a.png) - - -## Customizing a Dashboard - -When viewing a dashboard, click the top bar of a panel, and go to **edit** - -![Screen Shot 2021-08-06 at 11 35 36 AM](https://user-images.githubusercontent.com/3789273/128535505-a56162e0-72ad-46ac-8a94-70f1c7a910ed.png) - -**Edit Dashboard Panel Page:** - -![grafana-sections](https://user-images.githubusercontent.com/3789273/128540136-ba36ee2f-a544-4558-8282-84a7cb9df27a.png) - -### 1. Preview Area -- **Top Left** is the variable select area (custom dashboard variables, used for switching projects, or grouping data) -- **Top Right** we have a toolbar with some buttons related to the display of the data: - - View data results in a table - - Time range selector - - Refresh data button -- **The Main Area** will display the chart and should update in real time - -> Note: Data should refresh automatically, but may require a refresh using the button in some cases - -### 2. Query Builder -Here we form the SQL query to pull data into our chart, from our database -- Ensure the **Data Source** is the correct database - - ![Screen Shot 2021-08-06 at 10 14 22 AM](https://user-images.githubusercontent.com/3789273/128545278-be4846e0-852d-4bc8-8994-e99b79831d8c.png) - -- Select **Format as Table**, and **Edit SQL** buttons to write/edit queries as SQL - - ![Screen Shot 2021-08-06 at 10 17 52 AM](https://user-images.githubusercontent.com/3789273/128545197-a9ff9cb3-f12d-4331-bf6a-39035043667a.png) - -- The **Main Area** is where the queries are written, and in the top right is the **Query Inspector** button (to inspect returned data) - - ![Screen Shot 2021-08-06 at 10 18 23 AM](https://user-images.githubusercontent.com/3789273/128545557-ead5312a-e835-4c59-b9ca-dd5c08f2a38b.png) - -### 3. Main Panel Toolbar -In the top right of the window are buttons for: -- Dashboard settings (regarding entire dashboard) -- Save/apply changes (to specific panel) - -### 4. Grafana Parameter Sidebar -- Change chart style (bar/line/pie chart etc) -- Edit legends, chart parameters -- Modify chart styling -- Other Grafana specific settings - -## Dashboard Settings - -When viewing a dashboard click on the settings icon to view dashboard settings. Here are 2 important sections to use: - -![Screen Shot 2021-08-06 at 1 51 14 PM](https://user-images.githubusercontent.com/3789273/128555763-4d0370c2-bd4d-4462-ae7e-4b140c4e8c34.png) - -- Variables - - Create variables to use throughout the dashboard panels, that are also built on SQL queries - - ![Screen Shot 2021-08-06 at 2 02 40 PM](https://user-images.githubusercontent.com/3789273/128553157-a8e33042-faba-4db4-97db-02a29036e27c.png) - -- JSON Model - - Copy `json` code here and save it to a new file in `/grafana/dashboards/` with a unique name in the `lake` repo. This will allow us to persist dashboards when we load the app - - ![Screen Shot 2021-08-06 at 2 02 52 PM](https://user-images.githubusercontent.com/3789273/128553176-65a5ae43-742f-4abf-9c60-04722033339e.png) - -## Provisioning a Dashboard - -To save a dashboard in the `lake` repo and load it: - -1. Create a dashboard in browser (visit `/dashboard/new`, or use sidebar) -2. Save dashboard (in top right of screen) -3. Go to dashboard settings (in top right of screen) -4. Click on _JSON Model_ in sidebar -5. Copy code into a new `.json` file in `/grafana/dashboards` - -## Troubleshooting DB Connection - -To ensure we have properly connected our database to the data source in Grafana, check database settings in `./grafana/datasources/datasource.yml`, specifically: -- `database` -- `user` -- `secureJsonData/password` diff --git a/versioned_docs/version-v0.13/UserManuals/Dashboards/_category_.json b/versioned_docs/version-v0.13/UserManuals/Dashboards/_category_.json deleted file mode 100644 index 0db83c6e9b8..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/Dashboards/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Dashboards", - "position": 5 -} diff --git a/versioned_docs/version-v0.13/UserManuals/TeamConfiguration.md b/versioned_docs/version-v0.13/UserManuals/TeamConfiguration.md deleted file mode 100644 index c8ade3eabcf..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/TeamConfiguration.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: "Team Configuration" -sidebar_position: 7 -description: > - Team Configuration ---- -## What is 'Team Configuration' and how it works? - -To organize and display metrics by `team`, Apache DevLake needs to know about the team configuration in an organization, specifically: - -1. What are the teams? -2. Who are the users(unified identities)? -3. Which users belong to a team? -4. Which accounts(identities in specific tools) belong to the same user? - -Each of the questions above corresponds to a table in DevLake's schema, illustrated below: - -![image](/img/Team/teamflow0.png) - -1. `teams` table stores all the teams in the organization. -2. `users` table stores the organization's roster. An entry in the `users` table corresponds to a person in the org. -3. `team_users` table stores which users belong to a team. -4. `user_accounts` table stores which accounts belong to a user. An `account` refers to an identiy in a DevOps tool and is automatically created when importing data from that tool. For example, a `user` may have a GitHub `account` as well as a Jira `account`. - -Apache DevLake uses a simple heuristic algorithm based on emails and names to automatically map accounts to users and populate the `user_accounts` table. -When Apache DevLake cannot confidently map an `account` to a `user` due to insufficient information, it allows DevLake users to manually configure the mapping to ensure accuracy and integrity. - -## A step-by-step guide - -In the following sections, we'll walk through how to configure teams and create the five aforementioned tables (`teams`, `users`, `team_users`, `accounts`, and `user_accounts`). -The overall workflow is: - -1. Create the `teams` table -2. Create the `users` and `team_users` table -3. Populate the `accounts` table via data collection -4. Run a heuristic algorithm to populate `user_accounts` table -5. Manually update `user_accounts` when the algorithm can't catch everything - -Note: - -1. Please replace `/path/to/*.csv` with the absolute path of the CSV file you'd like to upload. -2. Please replace `127.0.0.1:4000` with your actual Apache DevLake ConfigUI service IP and port number. - -## Step 1 - Create the `teams` table - -You can create the `teams` table by sending a PUT request to `/plugins/org/teams.csv` with a `teams.csv` file. To jumpstart the process, you can download a template `teams.csv` from `/plugins/org/teams.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `teams.csv` file - - i. GET http://127.0.0.1:4000/api/plugins/org/teams.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/teams.csv?fake_data=true' - - -b. Fill out `teams.csv` file and upload it to DevLake - - i. Fill out `teams.csv` with your org data. Please don't modify the column headers or the file suffix. - - ii. Upload `teams.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/teams.csv' --form 'file=@"/path/to/teams.csv"' - - iii. The PUT request would populate the `teams` table with data from `teams.csv` file. - You can connect to the database and verify the data in the `teams` table. - See Appendix for how to connect to the database. - -![image](/img/Team/teamflow3.png) - - -## Step 2 - Create the `users` and `team_users` table - -You can create the `users` and `team_users` table by sending a single PUT request to `/plugins/org/users.csv` with a `users.csv` file. To jumpstart the process, you can download a template `users.csv` from `/plugins/org/users.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `users.csv` file - - i. GET http://127.0.0.1:4000/api/plugins/org/users.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/users.csv?fake_data=true' - - -b. Fill out `users.csv` and upload to DevLake - - i. Fill out `users.csv` with your org data. Please don't modify the column headers or the file suffix - - ii. Upload `users.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/users.csv' --form 'file=@"/path/to/users.csv"' - - iii. The PUT request would populate the `users` table along with the `team_users` table with data from `users.csv` file. - You can connect to the database and verify these two tables. - -![image](/img/Team/teamflow1.png) - -![image](/img/Team/teamflow2.png) - -c. If you ever want to update `team_users` or `users` table, simply upload the updated `users.csv` to DevLake again following step b. - -## Step 3 - Populate the `accounts` table via data collection - -The `accounts` table is automatically populated when you collect data from data sources like GitHub and Jira through DevLake. - -For example, the GitHub plugin would create one entry in the `accounts` table for each GitHub user involved in your repository. -For demo purposes, we'll insert some mock data into the `accounts` table using SQL: - -``` -INSERT INTO `accounts` (`id`, `created_at`, `updated_at`, `_raw_data_params`, `_raw_data_table`, `_raw_data_id`, `_raw_data_remark`, `email`, `full_name`, `user_name`, `avatar_url`, `organization`, `created_date`, `status`) -VALUES - ('github:GithubAccount:1:1234', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"Owner\":\"apache\",\"Repo\":\"incubator-devlake\"}', '_raw_github_api_pull_request_reviews', 28, '', 'TyroneKCummings@teleworm.us', '', 'Tyrone K. Cummings', 'https://avatars.githubusercontent.com/u/101256042?u=a6e460fbaffce7514cbd65ac739a985f5158dabc&v=4', '', NULL, 0), - ('jira:JiraAccount:1:629cdf', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"BoardId\":\"76\"}', '_raw_jira_api_users', 5, '', 'DorothyRUpdegraff@dayrep.com', '', 'Dorothy R. Updegraff', 'https://avatars.jiraxxxx158dabc&v=4', '', NULL, 0); - -``` - -![image](/img/Team/teamflow4.png) - -## Step 4 - Run a heuristic algorithm to populate `user_accounts` table - -Now that we have data in both the `users` and `accounts` table, we can tell DevLake to infer the mappings between `users` and `accounts` with a simple heuristic algorithm based on names and emails. - -a. Send an API request to DevLake to run the mapping algorithm - -``` -curl --location --request POST '127.0.0.1:4000/api/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "name": "test", - "plan":[ - [ - { - "plugin": "org", - "subtasks":["connectUserAccountsExact"], - "options":{ - "connectionId":1 - } - } - ] - ] -}' -``` - -b. After successful execution, you can verify the data in `user_accounts` in the database. - -![image](/img/Team/teamflow5.png) - -## Step 5 - Manually update `user_accounts` when the algorithm can't catch everything - -It is recommended to examine the generated `user_accounts` table after running the algorithm. -We'll demonstrate how to manually update `user_accounts` when the mapping is inaccurate/incomplete in this section. -To make manual verification easier, DevLake provides an API for users to download `user_accounts` as a CSV file. -Alternatively, you can verify and modify `user_accounts` all by SQL, see Appendix for more info. - -a. GET http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv(pasting the URL into your browser will download the file). If you prefer using curl: -``` -curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv' -``` - -![image](/img/Team/teamflow6.png) - -b. If you find the mapping inaccurate or incomplete, you can modify the `user_account_mapping.csv` file and then upload it to DevLake. -For example, here we change the `UserId` of row 'Id=github:GithubAccount:1:1234' in the `user_account_mapping.csv` file to 2. -Then we upload the updated `user_account_mapping.csv` file with the following curl command: - -``` -curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv' --form 'file=@"/path/to/user_account_mapping.csv"' -``` - -c. You can verify the data in the `user_accounts` table has been updated. - -![image](/img/Team/teamflow7.png) - -## Appendix A: how to connect to the database - -Here we use MySQL as an example. You can install database management tools like Sequel Ace, DataGrip, MySQLWorkbench, etc. - - -Or through the command line: - -``` -mysql -h -u -p -P -``` - -## Appendix B: how to examine `user_accounts` via SQL - -``` -SELECT a.id as account_id, a.email, a.user_name as account_user_name, u.id as user_id, u.name as real_name -FROM accounts a - join user_accounts ua on a.id = ua.account_id - join users u on ua.user_id = u.id -``` diff --git a/versioned_docs/version-v0.13/UserManuals/_category_.json b/versioned_docs/version-v0.13/UserManuals/_category_.json deleted file mode 100644 index 23ce768a59c..00000000000 --- a/versioned_docs/version-v0.13/UserManuals/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "User Manuals", - "position": 3, - "link":{ - "type": "generated-index", - "slug": "UserManuals" - } -} diff --git a/versioned_docs/version-v0.14/DataModels/DevLakeDomainLayerSchema.md b/versioned_docs/version-v0.14/DataModels/DevLakeDomainLayerSchema.md deleted file mode 100644 index 2476555456c..00000000000 --- a/versioned_docs/version-v0.14/DataModels/DevLakeDomainLayerSchema.md +++ /dev/null @@ -1,602 +0,0 @@ ---- -title: "Domain Layer Schema" -description: > - DevLake Domain Layer Schema -sidebar_position: 2 ---- - -## Summary - -This document describes Apache DevLake's domain layer schema. - -Referring to DevLake's [architecture](../Overview/Architecture.md), the data in the domain layer is transformed from the data in the tool layer. The tool layer schema is based on the data from specific tools such as Jira, GitHub, Gitlab, Jenkins, etc. The domain layer schema can be regarded as an abstraction of tool-layer schemas. - -Domain layer schema itself includes 2 logical layers: a `DWD` layer and a `DWM` layer. The DWD layer stores the detailed data points, while the DWM is the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -## Use Cases -1. [All metrics](../Metrics) from pre-built dashboards are based on this data schema. -2. As a user, you can create your own customized dashboards based on this data schema. -3. As a contributor, you can refer to this data schema while working on the ETL logic when adding/updating data source plugins. - - -## Data Models - -This is the up-to-date domain layer schema for DevLake v0.10.x. Tables (entities) are categorized into 5 domains. -1. Issue tracking domain entities: Jira issues, GitHub issues, GitLab issues, etc. -2. Source code management domain entities: Git/GitHub/Gitlab commits and refs(tags and branches), etc. -3. Code review domain entities: GitHub PRs, Gitlab MRs, etc. -4. CI/CD domain entities: Jenkins jobs & builds, etc. -5. Cross-domain entities: entities that map entities from different domains to break data isolation. - - -### Schema Diagram -[![Domain Layer Schema](/img/DomainLayerSchema/schema-diagram-v0.14.png)](/img/DomainLayerSchema/schema-diagram-v0.14.png) - -When reading the schema, you'll notice that many tables' primary key is called `id`. Unlike auto-increment id or UUID, `id` is a string composed of several parts to uniquely identify similar entities (e.g. repo) from different platforms (e.g. Github/Gitlab) and allow them to co-exist in a single table. - -Tables that end with WIP are still under development. - - -### Naming Conventions - -1. The name of a table is in plural form. Eg. boards, issues, etc. -2. The name of a table which describe the relation between 2 entities is in the form of [BigEntity in singular form]\_[SmallEntity in plural form]. Eg. board_issues, sprint_issues, pull_request_comments, etc. -3. Value of the field in enum type are in capital letters. Eg. [table.issues.type](#issues) has 3 values, REQUIREMENT, BUG, INCIDENT. Values that are phrases, such as 'IN_PROGRESS' of [table.issues.status](#issues), are separated with underscore '\_'. - -## How to Customize Data Models -Apache DevLake provides 2 plugins: -- [customize](https://devlake.apache.org/docs/Plugins/customize): to create/delete columns in the domain layer schema with the data extracted from [raw layer tables](https://devlake.apache.org/docs/Overview/Architecture/#dataflow) -- [dbt](https://devlake.apache.org/docs/Plugins/customize): to transform data based on the domain layer schema and generate new tables - -
- - -## DWD Entities - (Data Warehouse Detail) - -### Domain 1 - Issue Tracking - -#### issues - -An `issue` is the abstraction of Jira/Github/GitLab/TAPD/... issues. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------------- | :------- | :--------- |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | -| `id` | varchar | 255 | An issue's `id` is composed of < plugin >:< Entity >:< PK0 >[:PK1]..."
  • For Github issues, a Github issue's id is like "github:GithubIssues:< GithubIssueId >". Eg. 'github:GithubIssues:1049355647'
  • For Jira issues, a Github repo's id is like "jira:JiraIssues:< JiraSourceId >:< JiraIssueId >". Eg. 'jira:JiraIssues:1:10063'. < JiraSourceId > is used to identify which jira source the issue came from, since DevLake users can import data from several different Jira instances at the same time.
| PK | -| `issue_key` | varchar | 255 | The key of this issue. For example, the key of this Github [issue](https://github.com/apache/incubator-devlake/issues/1145) is 1145. | | -| `url` | varchar | 255 | The url of the issue. It's a web address in most cases. | | -| `title` | varchar | 255 | The title of an issue | | -| `description` | longtext | | The detailed description/summary of an issue | | -| `type` | varchar | 255 | The standard type of this issue. There're 3 standard types:
  • REQUIREMENT: this issue is a feature
  • BUG: this issue is a bug found during test
  • INCIDENT: this issue is a bug found after release
The 3 standard types are transformed from the original types of an issue. The transformation rule is set in the '.env' file or 'config-ui' before data collection. For issues with an original type that has not mapped to a standard type, the value of `type` will be the issue's original type. | | -| `status` | varchar | 255 | The standard statuses of this issue. There're 3 standard statuses:
  • TODO: this issue is in backlog or to-do list
  • IN_PROGRESS: this issue is in progress
  • DONE: this issue is resolved or closed
The 3 standard statuses are transformed from the original statuses of an issue. The transformation rule:
  • For Jira issue status: transformed from the Jira issue's `statusCategory`. Jira issue has 3 default status categories: 'To Do', 'In Progress', 'Done'.
  • For Github issue status:
    • open -> TODO
    • closed -> DONE
| | -| `original_status` | varchar | 255 | The original status of an issue. | | -| `story_point` | int | | The story point of this issue. It's default to an empty string for data sources such as Github issues and Gitlab issues. | | -| `priority` | varchar | 255 | The priority of the issue | | -| `component` | varchar | 255 | The component a bug-issue affects. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `severity` | varchar | 255 | The severity level of a bug-issue. This field only supports Github plugin for now. The value is transformed from Github issue labels by the rules set according to the user's configuration of .env by end users during DevLake installation. | | -| `parent_issue_id` | varchar | 255 | The id of its parent issue | | -| `epic_key` | varchar | 255 | The key of the epic this issue belongs to. For tools with no epic-type issues such as Github and Gitlab, this field is default to an empty string | | -| `original_estimate_minutes` | int | | The original estimation of the time allocated for this issue | | -| `time_spent_minutes` | int | | The original estimation of the time allocated for this issue | | -| `time_remaining_minutes` | int | | The remaining time to resolve the issue | | -| `creator_id` | varchar | 255 | The id of issue creator | | -| `creator_name` | varchar | 255 | The name of the creator | | -| `assignee_id` | varchar | 255 | The id of issue assignee.
  • For Github issues: this is the last assignee of an issue if the issue has multiple assignees
  • For Jira issues: this is the assignee of the issue at the time of collection
| | -| `assignee_name` | varchar | 255 | The name of the assignee | | -| `created_date` | datetime | 3 | The time issue created | | -| `updated_date` | datetime | 3 | The last time issue gets updated | | -| `resolution_date` | datetime | 3 | The time the issue changes to 'DONE'. | | -| `lead_time_minutes` | int | | Describes the cycle time from issue creation to issue resolution.
  • For issues whose type = 'REQUIREMENT' and status = 'DONE', lead_time_minutes = resolution_date - created_date. The unit is minute.
  • For issues whose type != 'REQUIREMENT' or status != 'DONE', lead_time_minutes is null
| | -| `deployment_id` | varchar | 255 | The cicd_task that relates to this issue | | - -#### issue_labels - -This table shows the labels of issues. Multiple entries can exist per issue. This table can be used to filter issues by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `name` | varchar | 255 | Label name | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### issue_comments(WIP) - -This table shows the comments of issues. Issues with multiple comments are shown as multiple records. This table can be used to calculate _metric - issue response time_. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------- | -| `id` | varchar | 255 | The unique id of a comment | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `account_id` | varchar | 255 | The id of the account who made the comment | FK_accounts.id | -| `body` | longtext | | The body/detail of the comment | | -| `created_date` | datetime | 3 | The creation date of the comment | | -| `updated_date` | datetime | 3 | The last time comment gets updated | | - -#### issue_changelogs - -This table shows the changelogs of issues. Issues with multiple changelogs are shown as multiple records. This is transformed from Jira or TAPD changelogs. - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :--------------------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The unique id of an issue changelog | PK | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | -| `author_id` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `author_name` | varchar | 255 | The id of the user who made the change | FK_accounts.id | -| `field_id` | varchar | 255 | The id of changed field | | -| `field_name` | varchar | 255 | The id of changed field | | -| `original_from_value` | varchar | 255 | The original value of the changed field | | -| `original_to_value` | varchar | 255 | The new value of the changed field | | -| `from_value` | varchar | 255 | The transformed/standardized original value of the changed field | | -| `to_value` | varchar | 255 | The transformed/standardized new value of the changed field | | -| `created_date` | datetime | 3 | The creation date of the changelog | | - - -#### issue_worklogs - -This table shows the work logged under issues. Usually, an issue has multiple worklogs logged by different developers. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :------------------------------------------------------------------------------------------- |:----------------| -| `id` | varchar | 255 | The id of the worklog | PK | -| `author_id` | varchar | 255 | The id of the author who logged the work | FK_accounts.id | -| `comment` | longtext | 255 | The comment made while logging the work. | | -| `time_spent_minutes` | int | | The time logged. The unit of value is normalized to minute. Eg. 1d =) 480, 4h30m =) 270 | | -| `logged_date` | datetime | 3 | The time of this logging action | | -| `started_date` | datetime | 3 | Start time of the worklog | | -| `issue_id` | varchar | 255 | Issue ID | FK_issues.id | - - -#### boards - -A `board` is an issue list or a collection of issues. It's the abstraction of a Jira board, a Jira project, a [GitHub issue list](https://github.com/apache/incubator-devlake/issues) or a GitLab issue list. This table can be used to filter issues by the boards they belong to. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A board's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
  • For a Github repo's issue list, the board id is like "< github >:< GithubRepos >:< GithubRepoId >". Eg. "github:GithubRepo:384111310"
  • For a Jira Board, the id is like the board id is like "< jira >:< JiraSourceId >< JiraBoards >:< JiraBoardsId >". Eg. "jira:1:JiraBoards:12"
| PK | -| `name` | varchar | 255 | The name of the board. Note: the board name of a Github project 'apache/incubator-devlake' is 'apache/incubator-devlake', representing the [default issue list](https://github.com/apache/incubator-devlake/issues). | | -| `description` | varchar | 255 | The description of the board. | | -| `url` | varchar | 255 | The url of the board. Eg. https://github.com/apache/incubator-devlake | | -| `created_date` | datetime | 3 | Board creation time | | -| `type` | varchar | 255 | Identify scrum and non-scrum board | | - -#### board_issues - -This table shows the relation between boards and issues. This table can be used to filter issues by board. - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | - -#### sprints - -A `sprint` is the abstraction of Jira sprints, TAPD iterations and GitHub milestones. A sprint contains a list of issues. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------ | :------- | :--------- |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------- | -| `id` | varchar | 255 | A sprint's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
  • A sprint in a Github repo is a milestone, the sprint id is like "< github >:< GithubRepos >:< GithubRepoId >:< milestoneNumber >".
    Eg. The id for this [sprint](https://github.com/apache/incubator-devlake/milestone/5) is "github:GithubRepo:384111310:5"
  • For a Jira Board, the id is like "< jira >:< JiraSourceId >< JiraBoards >:< JiraBoardsId >".
    Eg. "jira:1:JiraBoards:12"
| PK | -| `name` | varchar | 255 | The name of sprint.
For Github projects, the sprint name is the milestone name. For instance, 'v0.10.0 - Introduce Temporal to DevLake' is the name of this [sprint](https://github.com/apache/incubator-devlake/milestone/5). | | -| `url` | varchar | 255 | The url of sprint. | | -| `status` | varchar | 255 | There're 3 statuses of a sprint:
  • CLOSED: a completed sprint
  • ACTIVE: a sprint started but not completed
  • FUTURE: a sprint that has not started
| | -| `started_date` | datetime | 3 | The start time of a sprint | | -| `ended_date` | datetime | 3 | The planned/estimated end time of a sprint. It's usually set when planning a sprint. | | -| `completed_date` | datetime | 3 | The actual time to complete a sprint. | | -| `original_board_id` | datetime | 3 | The id of board where the sprint first created. This field is not null only when this entity is transformed from Jira sprints.
In Jira, sprint and board entities have 2 types of relation:
  • A sprint is created based on a specific board. In this case, board(1):(n)sprint. The `original_board_id` is used to show the relation.
  • A sprint can be mapped to multiple boards, a board can also show multiple sprints. In this case, board(n):(n)sprint. This relation is shown in [table.board_sprints](#board_sprints)
| FK_boards.id | - -#### sprint_issues - -This table shows the relation between sprints and issues that have been added to sprints. This table can be used to show metrics such as _'ratio of unplanned issues'_, _'completion rate of sprint issues'_, etc - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `is_removed` | bool | | If the issue is removed from this sprint, then TRUE; else FALSE | | -| `added_date` | datetime | 3 | The time this issue added to the sprint. If an issue is added to a sprint multiple times, the latest time will be the value. | | -| `removed_date` | datetime | 3 | The time this issue gets removed from the sprint. If an issue is removed multiple times, the latest time will be the value. | | -| `added_stage` | varchar | 255 | The stage when issue is added to this sprint. There're 3 possible values:
  • BEFORE_SPRINT
    Planning before sprint starts.
    Condition: sprint_issues.added_date <= sprints.start_date
  • DURING_SPRINT Planning during a sprint.
    Condition: sprints.start_date < sprint_issues.added_date <= sprints.end_date
  • AFTER_SPRINT
    Planing after a sprint. This is caused by improper operation - adding issues to a completed sprint.
    Condition: sprint_issues.added_date ) sprints.end_date
| | -| `resolved_stage` | varchar | 255 | The stage when an issue is resolved (issue status turns to 'DONE'). There're 3 possible values:
  • BEFORE_SPRINT
    Condition: issues.resolution_date <= sprints.start_date
  • DURING_SPRINT
    Condition: sprints.start_date < issues.resolution_date <= sprints.end_date
  • AFTER_SPRINT
    Condition: issues.resolution_date ) sprints.end_date
| | - -#### board_sprints - -| **field** | **type** | **length** | **description** | **key** | -| :---------- | :------- | :--------- | :-------------- | :------------ | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `sprint_id` | varchar | 255 | Sprint id | FK_sprints.id | - -
- -### Domain 2 - Source Code Management - -#### repos - -Information about GitHub or Gitlab repositories. A repository is always owned by a user. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `name` | varchar | 255 | The name of repo. | | -| `description` | varchar | 255 | The description of repo. | | -| `url` | varchar | 255 | The url of repo. Eg. https://github.com/apache/incubator-devlake | | -| `owner_id` | varchar | 255 | The id of the owner of repo | FK_accounts.id | -| `language` | varchar | 255 | The major language of repo. Eg. The language for apache/incubator-devlake is 'Go' | | -| `forked_from` | varchar | 255 | Empty unless the repo is a fork in which case it contains the `id` of the repo the repo is forked from. | | -| `deleted` | tinyint | 255 | 0: repo is active 1: repo has been deleted | | -| `created_date` | datetime | 3 | Repo creation date | | -| `updated_date` | datetime | 3 | Last full update was done for this repo | | - -#### repo_languages(WIP) - -Languages that are used in the repository along with byte counts for all files in those languages. This is in line with how GitHub calculates language percentages in a repository. Multiple entries can exist per repo. - -The table is filled in when the repo has been first inserted on when an update round for all repos is made. - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | -| `id` | varchar | 255 | A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github repo's id is like "< github >:< GithubRepos >< GithubRepoId >". Eg. 'github:GithubRepos:384111310' | PK | -| `language` | varchar | 255 | The language of repo.
These are the [languages](https://api.github.com/repos/apache/incubator-devlake/languages) for apache/incubator-devlake | | -| `bytes` | int | | The byte counts for all files in those languages | | -| `created_date` | datetime | 3 | The field is filled in with the latest timestamp the query for a specific `repo_id` was done. | | - -#### repo_commits - -The commits belong to the history of a repository. More than one repos can share the same commits if one is a fork of the other. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### refs - -A ref is the abstraction of a branch or tag. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------- | -| `id` | varchar | 255 | A ref's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github ref is composed of "github:GithubRepos:< GithubRepoId >:< RefUrl >". Eg. The id of release v5.3.0 of PingCAP/TiDB project is 'github:GithubRepos:384111310:refs/tags/v5.3.0' A repo's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." | PK | -| `ref_name` | varchar | 255 | The name of ref. Eg. '[refs/tags/v0.9.3](https://github.com/apache/incubator-devlake/tree/v0.9.3)' | | -| `repo_id` | varchar | 255 | The id of repo this ref belongs to | FK_repos.id | -| `commit_sha` | char | 40 | The commit this ref points to at the time of collection | | -| `is_default` | int | |
  • 0: the ref is the default branch. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), the default branch is the base branch for pull requests and code commits.
  • 1: not the default branch
| | -| `merge_base` | char | 40 | The merge base commit of the main ref and the current ref | | -| `ref_type` | varchar | 64 | There're 2 typical types:
  • BRANCH
  • TAG
| | - -#### refs_commits_diffs - -This table shows the commits added in a new ref compared to an old ref. This table can be used to support tag-based analysis, for instance, '_No. of commits of a tag_', '_No. of merged pull request of a tag_', etc. - -The records of this table are computed by [RefDiff](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/refdiff) plugin. The computation should be manually triggered after using [GitRepoExtractor](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/gitextractor) to collect commits and refs. The algorithm behind is similar to [this](https://github.com/apache/incubator-devlake/compare/v0.8.0%E2%80%A6v0.9.0). - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :-------------------------------------------------------------- | :------------- | -| `commit_sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `sorting_index` | varchar | 255 | An index for debugging, please skip it | | - -#### commits - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `sha` | char | 40 | One of the added commits in the new ref compared to the old ref | FK_commits.sha | -| `message` | varchar | 255 | Commit message | | -| `author_name` | varchar | 255 | The value is set with command `git config user.name xxxxx` commit | | -| `author_email` | varchar | 255 | The value is set with command `git config user.email xxxxx` author | | -| `authored_date` | datetime | 3 | The date when this commit was originally made | | -| `author_id` | varchar | 255 | The id of commit author | FK_accounts.id | -| `committer_name` | varchar | 255 | The name of committer | | -| `committer_email` | varchar | 255 | The email of committer | | -| `committed_date` | datetime | 3 | The last time the commit gets modified.
For example, when rebasing the branch where the commit is in on another branch, the committed_date changes. | | -| `committer_id` | varchar | 255 | The id of committer | FK_accounts.id | -| `additions` | int | | Added lines of code | | -| `deletions` | int | | Deleted lines of code | | -| `dev_eq` | int | | A metric that quantifies the amount of code contribution. The data can be retrieved from [AE plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/ae). | | - -#### commit_files - -The files have been changed via commits. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :------------- | -| `id` | varchar | 255 | The `id` is composed of "< Commit_sha >:< file_path >" | FK_commits.sha | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | -| `file_path` | varchar | 255 | Path of a changed file in a commit | | -| `additions` | int | | The added lines of code in this file by the commit | | -| `deletions` | int | | The deleted lines of code in this file by the commit | | - -#### components - -The components of files extracted from the file paths. This can be used to analyze Git metrics by component. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :----------------------------------------------------- | :---------- | -| `repo_id` | varchar | 255 | The repo id | FK_repos.id | -| `name` | varchar | 255 | The name of component | | -| `path_regex` | varchar | 255 | The regex to extract components from this repo's paths | | - -#### commit_file_components - -The relationship between commit_file and component_name. - -| **field** | **type** | **length** | **description** | **key** | -| :--------------- | :------- | :--------- | :--------------------------- | :----------------- | -| `commit_file_id` | varchar | 255 | The id of commit file | FK_commit_files.id | -| `component_name` | varchar | 255 | The component name of a file | | - -#### commit_parents - -The parent commit(s) for each commit, as specified by Git. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :---------------- | :------------- | -| `commit_sha` | char | 40 | commit sha | FK_commits.sha | -| `parent` | char | 40 | Parent commit sha | FK_commits.sha | - -
- -### Domain 3 - Code Review - -#### pull_requests - -A pull request is the abstraction of GitHub pull request and Gitlab merge request. - -| **field** | **type** | **length** | **description** | **key** | -| :----------------- | :------- | :--------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | -| `id` | char | 40 | A pull request's `id` is composed of "< plugin >:< Entity >:< PK0 >[:PK1]..." Eg. For 'github:GithubPullRequests:1347' | FK_commits.sha | -| `title` | varchar | 255 | The title of pull request | | -| `description` | longtext | | The body/description of pull request | | -| `status` | varchar | 255 | the status of pull requests. For a Github pull request, the status can either be 'open' or 'closed'. | | -| `parent_pr_id` | varchar | 255 | The id of the parent PR | | -| `pull_request_key` | varchar | 255 | The key of PR. Eg, 1536 is the key of this [PR](https://github.com/apache/incubator-devlake/pull/1563) | | -| `base_repo_id` | varchar | 255 | The repo that will be updated. | | -| `head_reop_id` | varchar | 255 | The repo containing the changes that will be added to the base. If the head repository is NULL, this means that the corresponding project had been deleted when DevLake processed the pull request. | | -| `base_ref` | varchar | 255 | The branch name in the base repo that will be updated | | -| `head_ref` | varchar | 255 | The branch name in the head repo that contains the changes that will be added to the base | | -| `author_name` | varchar | 255 | The author's name of the pull request | | -| `author_id` | varchar | 255 | The author's id of the pull request | | -| `url` | varchar | 255 | the web link of the pull request | | -| `type` | varchar | 255 | The work-type of a pull request. For example: feature-development, bug-fix, docs, etc.
The value is transformed from Github pull request labels by configuring `GITHUB_PR_TYPE` in `.env` file during installation. | | -| `component` | varchar | 255 | The component this PR affects.
The value is transformed from Github/Gitlab pull request labels by configuring `GITHUB_PR_COMPONENT` in `.env` file during installation. | | -| `created_date` | datetime | 3 | The time PR created. | | -| `merged_date` | datetime | 3 | The time PR gets merged. Null when the PR is not merged. | | -| `closed_date` | datetime | 3 | The time PR closed. Null when the PR is not closed. | | -| `merge_commit_sha` | char | 40 | the merge commit of this PR. By the definition of [Github](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/changing-the-default-branch), when you click the default Merge pull request option on a pull request on Github, all commits from the feature branch are added to the base branch in a merge commit. | | -| `base_commit_sha` | char | 40 | The base commit of this PR. | | -| `head_commit_sha` | char | 40 | The head commit of this PR. | | -| `coding_timespan`| bigint | | PR created_date - PR's first commit's authored_date | | -| `review_lag` | bigint | | PR's first comment time - PR's created_date | | -| `review_timespan`| bigint | | PR merged_date - PR's first comment time | | -| `deploy_timespan` | bigint | | PR deployed date - PR merged_date | | -| `change_timespan` | bigint | | PR cycle time, equals to coding_time_span + review_lag + review_time_span + deploy_timespan | | - - -#### pull_request_labels - -This table shows the labels of pull request. Multiple entries can exist per pull request. This table can be used to filter pull requests by label name. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `name` | varchar | 255 | Label name | | -| `pull_request_id` | varchar | 255 | Pull request ID | FK_pull_requests.id | - -#### pull_request_commits - -A commit associated with a pull request - -The list is additive. This means if a rebase with commit squashing takes place after the commits of a pull request have been processed, the old commits will not be deleted. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :-------------- | :------------------ | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_comments - -Normal comments, review bodies, reviews' inline comments of GitHub's pull requests or GitLab's merge requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :--------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Comment id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `body` | longtext | | The body of the comments | | -| `account_id` | varchar | 255 | The account who made the comment | FK_accounts.id | -| `created_date` | datetime | 3 | Comment creation time | | -| `position` | int | | Deprecated | | -| `type` | varchar | 255 | - For normal comments: NORMAL
- For review comments, ie. diff/inline comments: DIFF
- For reviews' body (exist in GitHub but not GitLab): REVIEW | | -| `review_id` | varchar | 255 | Review_id of the comment if the type is `REVIEW` or `DIFF` | | -| `status` | varchar | 255 | Status of the comment | | - - -#### pull_request_events(WIP) - -Events of pull requests. - -| **field** | **type** | **length** | **description** | **key** | -| :---------------- | :------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------ | -| `id` | varchar | 255 | Event id | PK | -| `pull_request_id` | varchar | 255 | Pull request id | FK_pull_requests.id | -| `action` | varchar | 255 | The action to be taken, some values:
  • `opened`: When the pull request has been opened
  • `closed`: When the pull request has been closed
  • `merged`: When Github detected that the pull request has been merged. No merges outside Github (i.e. Git based) are reported
  • `reoponed`: When a pull request is opened after being closed
  • `syncrhonize`: When new commits are added/removed to the head repository
| | -| `actor_id` | varchar | 255 | The account id of the event performer | FK_accounts.id | -| `created_date` | datetime | 3 | Event creation time | | - -
- -### Domain 4 - CI/CD(WIP) - -#### cicd_pipelines - -A cicd_pipeline is a series of builds that have connections or a standalone build. - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :-------- | :-------- |:----------------------------------------------------------------------------------------------| :-------- | -| `id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `name` | varchar | 255 | For gitlab, as there is no name for pipeline, so we use projectId, others have their own name | | -| `result` | varchar | 100 | The result of this task | | -| `status` | varchar | 100 | The status of this task | | -| `type` | varchar | 100 | To indicate if this is a DEPLOYMENT | | -| `duration_sec` | bigint unsigned | | how long does this task take | | -| `started_date` | datetime | 3 | when did this task start | | -| `finished_date` | datetime | 3 | when did this task finish | | -| `environment` | varchar | 255 | To indicate the environment in which the task is running | | - -#### cicd_pipeline_commits - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :-------- | :-------- |:----------------------------------------------------------------| :-------- | -| `pipeline_id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `commit_sha` | varchar | 255 | The commit that trigger this pipeline | PK | -| `branch` | varchar | 255 | The branch that trigger this pipeline | | -| `repo` | varchar | 255 | | | -| `repo_id` | varchar | 255 | The repo that this pipeline belongs to | | -| `repo_url` | longtext | | | | - -#### cicd_pipeline_relationships - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :-------- | :-------- |:--------------------------------------------------| :-------- | -| `parent_pipeline_id` | varchar | 255 | The pipeline trigger child_pipeline | PK | -| `child_pipeline_id` | varchar | 255 | The pipeline that is triggered by parent_pipeline | PK | - -#### cicd_tasks - -A cicd_task is a single job of ci/cd. - -| **field** | **type** | **length** | **description** | **key** | -| :-------- | :-------- |:-------------------------------|:----------------------------------------------------------------| :-------- | -| `id` | varchar | 255 | This key is generated based on details from the original plugin | PK | -| `name` | varchar | 255 | | | -| `pipeline_id` | varchar | 255 | The id of pipeline | | -| `result` | varchar | 100 | The result of this task | | -| `status` | varchar | 100 | The status of this task | | -| `type` | varchar | 100 | To indicate if this is a DEPLOYMENT | | -| `duration_sec` | bigint unsigned | | how long does this task take | | -| `started_date` | datetime | 3 | when did this task start | | -| `finished_date` | datetime | 3 | when did this task finish | | -| `environment` | varchar | 255 | To indicate the environment in which the task is running | | - - -### Cross-Domain Entities - -These entities are used to map entities between different domains. They are the key players to break data isolation. - -There're low-level entities such as issue_commits, users, and higher-level cross domain entities such as board_repos - -#### issue_commits - -A low-level mapping between "issue tracking" and "source code management" domain by mapping `issues` and `commits`. Issue(n): Commit(n). - -The original connection between these two entities lies in either issue tracking tools like Jira or source code management tools like GitLab. You have to use tools to accomplish this. - -For example, a common method to connect Jira issue and GitLab commit is a GitLab plugin [Jira Integration](https://docs.gitlab.com/ee/integration/jira/). With this plugin, the Jira issue key in the commit message written by the committers will be parsed. Then, the plugin will add the commit urls under this jira issue. Hence, DevLake's [Jira plugin](https://github.com/apache/incubator-devlake/tree/main/backend/plugins/jira) can get the related commits (including repo, commit_id, url) of an issue. - -| **field** | **type** | **length** | **description** | **key** | -| :----------- | :------- | :--------- | :-------------- | :------------- | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `commit_sha` | char | 40 | Commit sha | FK_commits.sha | - -#### pull_request_issues - -This table shows the issues closed by pull requests. It's a medium-level mapping between "issue tracking" and "source code management" domain by mapping issues and commits. Issue(n): Commit(n). - -The data is extracted from the body of pull requests conforming to certain regular expression. The regular expression can be defined in GITHUB_PR_BODY_CLOSE_PATTERN in the .env file - -| **field** | **type** | **length** | **description** | **key** | -| :-------------------- | :------- | :--------- | :------------------ | :------------------ | -| `pull_request_id` | char | 40 | Pull request id | FK_pull_requests.id | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | -| `pull_request_number` | varchar | 255 | Pull request key | | -| `issue_number` | varchar | 255 | Issue key | | - -#### board_repos (Deprecated) - -A way to link "issue tracking" and "source code management" domain by mapping `boards` and `repos`. Board(n): Repo(n). - -| **field** | **type** | **length** | **description** | **key** | -| :--------- | :------- | :--------- | :-------------- | :----------- | -| `board_id` | varchar | 255 | Board id | FK_boards.id | -| `repo_id` | varchar | 255 | Repo id | FK_repos.id | - -#### accounts - -This table stores of user accounts across different tools such as GitHub, Jira, GitLab, etc. This table can be joined to get the metadata of all accounts. - metrics, such as _'No. of Issue closed by contributor', 'No. of commits by contributor',_ - -| **field** | **type** | **length** | **description** | **key** | -| :------------- | :------- | :--------- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | -| `id` | varchar | 255 | An account's `id` is the identifier of the account of a specific tool. It is composed of "< Plugin >:< Entity >:< PK0 >[:PK1]..."
For example, a Github account's id is composed of "< github >:< GithubAccounts >:< GithubUserId >)". Eg. 'github:GithubUsers:14050754' | PK | -| `email` | varchar | 255 | Email of the account | | -| `full_name` | varchar | 255 | Full name | | -| `user_name` | varchar | 255 | Username, nickname or Github login of an account | | -| `avatar_url` | varchar | 255 | | | -| `organization` | varchar | 255 | User's organization(s) | | -| `created_date` | datetime | 3 | User creation time | | -| `status` | int | | 0: default, the user is active. 1: the user is not active | | - -#### users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------- | ------- | -| `id` | varchar | 255 | id of a person | PK | -| `email` | varchar | 255 | the primary email of a person | | -| `name` | varchar | 255 | name of a person | | - -#### user_accounts -| **field** | **type** | **length** | **description** | **key** | -| ------------ | -------- | ---------- | --------------- | ---------------- | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | -| `account_id` | varchar | 255 | accounts.id | Composite PK, FK | - -#### teams -| **field** | **type** | **length** | **description** | **key** | -| --------------- | -------- | ---------- | -------------------------------------------------- | ------- | -| `id` | varchar | 255 | id from the data sources, decided by DevLake users | PK | -| `name` | varchar | 255 | name of the team. Eg. team A, team B, etc. | | -| `alias` | varchar | 255 | alias or abbreviation of a team | | -| `parent_id` | varchar | 255 | teams.id, default to null | FK | -| `sorting_index` | int | 255 | the field to sort team | | - -#### team_users -| **field** | **type** | **length** | **description** | **key** | -| --------- | -------- | ---------- | ----------------------------------------------- | ---------------- | -| `team_id` | varchar | 255 | Full name of the team. Eg. team A, team B, etc. | Composite PK, FK | -| `user_id` | varchar | 255 | users.id | Composite PK, FK | - - -
- -## DWM Entities - (Data Warehouse Middle) - -DWM entities are the slight aggregation and operation of DWD to store more organized details or middle-level metrics. - - -#### refs_issues_diffs - -This table shows the issues fixed by commits added in a new ref compared to an old one. The data is computed from [table.ref_commits_diff](#refs_commits_diffs), [table.pull_requests](#pull_requests), [table.pull_request_commits](#pull_request_commits), and [table.pull_request_issues](#pull_request_issues). - -This table can support tag-based analysis, for instance, '_No. of bugs closed in a tag_'. - -| **field** | **type** | **length** | **description** | **key** | -| :------------------- | :------- | :--------- | :----------------------------------------------------- | :----------- | -| `new_ref_id` | varchar | 255 | The new ref's id for comparison | FK_refs.id | -| `old_ref_id` | varchar | 255 | The old ref's id for comparison | FK_refs.id | -| `new_ref_commit_sha` | char | 40 | The commit new ref points to at the time of collection | | -| `old_ref_commit_sha` | char | 40 | The commit old ref points to at the time of collection | | -| `issue_number` | varchar | 255 | Issue number | | -| `issue_id` | varchar | 255 | Issue id | FK_issues.id | - - -## Get Domain Layer Models in Developer Mode - -When developing a new plugin, you need to refer to domain layer models, as all raw data should be transformed to domain layer data to provide standardized metrics across tools. Please use the following method to access the domain data models. - -```golang -import "github.com/apache/incubator-devlake/models/domainlayer/domaininfo" - -domaininfo := domaininfo.GetDomainTablesInfo() -for _, table := range domaininfo { - // do something -} -``` - -If you want to learn more about plugin models, please visit [PluginImplementation](https://devlake.apache.org/docs/DeveloperManuals/PluginImplementation) diff --git a/versioned_docs/version-v0.14/DataModels/_category_.json b/versioned_docs/version-v0.14/DataModels/_category_.json deleted file mode 100644 index ae28c626ea0..00000000000 --- a/versioned_docs/version-v0.14/DataModels/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Data Models", - "position": 6, - "link":{ - "type": "generated-index", - "slug": "DataModels" - } -} diff --git a/versioned_docs/version-v0.14/DeveloperManuals/DBMigration.md b/versioned_docs/version-v0.14/DeveloperManuals/DBMigration.md deleted file mode 100644 index b4394d33c7a..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/DBMigration.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -title: "DB Migration" -description: > - DB Migration -sidebar_position: 3 ---- - -## Summary -Starting in v0.10.0, DevLake provides a lightweight migration tool for executing migration scripts. -Both framework itself and plugins define their migration scripts in their own migration folder. -The migration scripts are written with gorm in Golang to support different SQL dialects. - - -## Migration Script -Migration script describes how to do database migration. -They implement the `Script` interface. -When DevLake starts, scripts register themselves to the framework by invoking the `Register` function. -The method `Up` contains the steps of migration. - -```go -type Script interface { - // this function will contain the business logic of the migration (e.g. DDL logic) - Up(ctx context.Context, db *gorm.DB) errors.Error - // the version number of the migration. typically in date format (YYYYMMDDHHMMSS), e.g. 20220728000001. Migrations are executed sequentially based on this number. - Version() uint64 - // The name of this migration - Name() string -} -``` - -## Migration Model - -For each migration we define a "snapshot" datamodel of the model that we wish to perform the migration on. -The fields on this model shall be identical to the actual model, but unlike the actual one, this one will -never change in the future. The naming convention of these models is `YYYYMMDD` and they must implement -the `func TableName() string` method, and consumed by the `Script::Up` method. - -## Table `migration_history` - -The table tracks migration scripts execution and schemas changes. -From which, DevLake could figure out the current state of database schemas. - -## Execution - -Each plugin has a `migrationscripts` subpackage that lists all the migrations to be executed for that plugin. You -will need to add your migration to that list for the framework to pick it up. Similarly, there is such a package -for the framework-only migrations defined under the `models` package. - - -## How It Works -1. Check `migration_history` table, calculate all the migration scripts need to be executed. -2. Sort scripts by Version in ascending order. -3. Execute scripts. -4. Save results in the `migration_history` table. - - -## Best Practices -When you write a new migration script, please pay attention to the fault tolerance and the side effect. It would be better if the failed script could be safely retry, in case of something goes wrong during the migration. For this purpose, the migration scripts should be well-designed. For example, if you created a temporary table in the Up method, it should be dropped before exiting, regardless of success or failure. Using the defer statement to do some cleanup is a good idea. Let's demonstrate this idea with a concrete example. - -Suppose we want to recalculate the column `name` of the table `user` - -1. rename `user` to `user_bak` (stop if error, define `defer` to rename back on error) -2. create new `user` (stop if error, define `defer` to drop TABLE on error) -3. convert data from `user_bak` to `user` (stop if error) -4. drop `user_bak` - -```golang - -type User struct { - name string `gorm:"type:varchar(255)"` -} - -func (User) TableName() string { - return "user" -} - -type NewUser struct { - name string `gorm:"type:text"` -} - -func (NewUser) TableName() string { - return "user" -} - -type UserBak struct { - name string `gorm:"type:varchar(255)"` -} - -func (UserBak) TableName() string { - return "user_bak" -} - -func (*exampleScript) Up(ctx context.Context, db *gorm.DB) (errs errors.Error) { - var err error - - // rename the user_bak to cache old table - err = db.Migrator().RenameTable(&User{}, &UserBak{}) - if err != nil { - return errors.Default.Wrap(err, "error no rename user to user_bak") - } - - // rollback for rename back - defer func() { - if errs != nil { - err = db.Migrator().RenameTable(&UserBak{}, &User{}) - if err != nil { - errs = errors.Default.Wrap(err, fmt.Sprintf("fail to rollback table user_bak , you must to rollback by yourself. %s", err.Error())) - } - } - }() - - // create new user table - err = db.Migrator().AutoMigrate(&NewUser{}) - - if err != nil { - return errors.Default.Wrap(err, "error on auto migrate user") - } - - // rollback for create new table - defer func() { - if errs != nil { - err = db.Migrator().DropTable(&User{}) - if err != nil { - errs = errors.Default.Wrap(err, fmt.Sprintf("fail to rollback table OldTable , you must to rollback by yourself. %s", err.Error())) - } - } - }() - - // update old id to new id and write to the new table - cursor, err := db.Model(&UserBak{}).Rows() - if err != nil { - return errors.Default.Wrap(err, "error on select NewTable") - } - defer cursor.Close() - - // caculate and save the data to new table - batch, err := helper.NewBatchSave(api.BasicRes, reflect.TypeOf(&NewUser{}), 200) - if err != nil { - return errors.Default.Wrap(err, "error getting batch from table user") - } - defer batch.Close() - for cursor.Next() { - ot := UserBak{} - err = db.ScanRows(cursor, &ot) - if err != nil { - return errors.Default.Wrap(err, "error scan rows from table user_bak") - } - nt := NewUser(ot) - - nt.name = nt.name + "new" - - err = batch.Add(&nt) - if err != nil { - return errors.Default.Wrap(err, "error on user batch add") - } - } - - // drop the old table - err = db.Migrator().DropTable(&UserBak{}) - if err != nil { - return errors.Default.Wrap(err, "error no drop user_bak") - } -} - -``` - diff --git a/versioned_docs/version-v0.14/DeveloperManuals/Dal.md b/versioned_docs/version-v0.14/DeveloperManuals/Dal.md deleted file mode 100644 index 3e1d397e5ef..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/Dal.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Dal" -sidebar_position: 5 -description: > - The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12 ---- - -## Summary - -The Dal (Data Access Layer) is designed to decouple the hard dependency on `gorm` in v0.12. The advantages of introducing this isolation are: - - - Unit Test: Mocking an Interface is easier and more reliable than Patching a Pointer. - - Clean Code: DBS operations are more consistence than using `gorm ` directly. - - Replaceable: It would be easier to replace `gorm` in the future if needed. - -## The Dal Interface - -```go -type Dal interface { - AutoMigrate(entity interface{}, clauses ...Clause) error - Exec(query string, params ...interface{}) error - RawCursor(query string, params ...interface{}) (*sql.Rows, error) - Cursor(clauses ...Clause) (*sql.Rows, error) - Fetch(cursor *sql.Rows, dst interface{}) error - All(dst interface{}, clauses ...Clause) error - First(dst interface{}, clauses ...Clause) error - Count(clauses ...Clause) (int64, error) - Pluck(column string, dest interface{}, clauses ...Clause) error - Create(entity interface{}, clauses ...Clause) error - Update(entity interface{}, clauses ...Clause) error - CreateOrUpdate(entity interface{}, clauses ...Clause) error - CreateIfNotExist(entity interface{}, clauses ...Clause) error - Delete(entity interface{}, clauses ...Clause) error - AllTables() ([]string, error) -} -``` - - -## How to use - -### Query -```go -// Get a database cursor -user := &models.User{} -cursor, err := db.Cursor( - dal.From(user), - dal.Where("department = ?", "R&D"), - dal.Orderby("id DESC"), -) -if err != nil { - return err -} -for cursor.Next() { - err = dal.Fetch(cursor, user) // fetch one record at a time - ... -} - -// Get a database cursor by raw sql query -cursor, err := db.Raw("SELECT * FROM users") - -// USE WITH CAUTIOUS: loading a big table at once is slow and dangerous -// Load all records from database at once. -users := make([]models.Users, 0) -err := db.All(&users, dal.Where("department = ?", "R&D")) - -// Load a column as Scalar or Slice -var email string -err := db.Pluck("email", &username, dal.Where("id = ?", 1)) -var emails []string -err := db.Pluck("email", &emails) - -// Execute query -err := db.Exec("UPDATE users SET department = ? WHERE department = ?", "Research & Development", "R&D") -``` - -### Insert -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assuming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Update -```go -err := db.Create(&models.User{ - Email: "hello@example.com", // assuming this the Primarykey - Name: "hello", - Department: "R&D", -}) -``` -### Insert or Update -```go -err := db.CreateOrUpdate(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Insert if record(by PrimaryKey) didn't exist -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primarykey - Name: "hello", - Department: "R&D", -}) -``` - -### Delete -```go -err := db.CreateIfNotExist(&models.User{ - Email: "hello@example.com", // assuming this is the Primary key -}) -``` - -### DDL and others -```go -// Returns all table names -allTables, err := db.AllTables() - -// Automigrate: create/add missing table/columns -// Note: it won't delete any existing columns, nor does it update the column definition -err := db.AutoMigrate(&models.User{}) -``` - -## How to do Unit Test -First, run the command `make mock` to generate the Mocking Stubs, the generated source files should appear in `mocks` folder. -``` -mocks -├── ApiResourceHandler.go -├── AsyncResponseHandler.go -├── BasicRes.go -├── CloseablePluginTask.go -├── ConfigGetter.go -├── Dal.go -├── DataConvertHandler.go -├── ExecContext.go -├── InjectConfigGetter.go -├── InjectLogger.go -├── Iterator.go -├── Logger.go -├── Migratable.go -├── PluginApi.go -├── PluginBlueprintV100.go -├── PluginInit.go -├── PluginMeta.go -├── PluginTask.go -├── RateLimitedApiClient.go -├── SubTaskContext.go -├── SubTaskEntryPoint.go -├── SubTask.go -└── TaskContext.go -``` -With these Mocking stubs, you may start writing your TestCases using the `mocks.Dal`. -```go -import "github.com/apache/incubator-devlake/mocks" - -func TestCreateUser(t *testing.T) { - mockDal := new(mocks.Dal) - mockDal.On("Create", mock.Anything, mock.Anything).Return(nil).Once() - userService := &services.UserService{ - Dal: mockDal, - } - userService.Post(map[string]interface{}{ - "email": "helle@example.com", - "name": "hello", - "department": "R&D", - }) - mockDal.AssertExpectations(t) -``` - diff --git a/versioned_docs/version-v0.14/DeveloperManuals/DeveloperSetup.md b/versioned_docs/version-v0.14/DeveloperManuals/DeveloperSetup.md deleted file mode 100644 index eb33c703c93..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/DeveloperSetup.md +++ /dev/null @@ -1,122 +0,0 @@ ---- -title: "Developer Setup" -description: > - The steps to install DevLake in developer mode. -sidebar_position: 1 ---- - - -## Requirements - -- Docker v19.03.10+ -- Golang v1.19+ -- GNU Make - - Mac (Preinstalled) - - Windows: [Download](http://gnuwin32.sourceforge.net/packages/make.htm) - - Ubuntu: `sudo apt-get install build-essential libssl-dev` - -## How to setup dev environment - -The following guide will walk through how to run DevLake's frontend (`config-ui`) and backend in dev mode. - - -1. Navigate to where you would like to install this project and clone the repository: - - ```sh - git clone https://github.com/apache/incubator-devlake - cd incubator-devlake - ``` - -2. Install dependencies for plugins: - - - [RefDiff](../Plugins/refdiff.md#development) - -3. Install Go packages - - ```sh - go get - ``` - -4. Copy the sample config file to new local file: - - ```sh - cp .env.example .env - ``` - -5. Update the following variables in the file `.env`: - - * `DB_URL`: Replace `mysql:3306` with `127.0.0.1:3306` - -6. Start the MySQL and Grafana containers: - - > Make sure the Docker daemon is running before this step. - - ```sh - docker-compose up -d mysql grafana - ``` - -7. Run `devlake` and `config-ui` in dev mode in two separate terminals: - - ```sh - # run devlake - make dev - # run config-ui - make configure-dev - ``` - - For common errors, please see [Troubleshooting](#troubleshotting). - -8. Config UI is running at `localhost:4000` - - For how to use Config UI, please refer to our [tutorial](UserManuals/ConfigUI/Tutorial.md) - -## Running Tests - -```sh -# install mockery -go install github.com/vektra/mockery/v2@latest -# generate mocking stubs -make mock -# run tests -make test -``` - -## DB migrations - -Please refer to the [Migration Doc](../DeveloperManuals/DBMigration.md). - -## Using DevLake API - -All DevLake APIs (core service + plugin API) are documented with swagger. To see API doc live with swagger: - - - Install [swag](https://github.com/swaggo/swag). - - Run `make swag` to generate the swagger documentation. - - Visit `http://localhost:8080/swagger/index.html` while `devlake` is running. - - -## Developing dashboards - -To access Grafana, click *View Dashboards* button in the top left corner of Config UI, or visit `localhost:3002` (username: `admin`, password: `admin`). - -For provisioning, customizing, and creating dashboards, please refer to our [Grafana Doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - - -## Troubleshooting - - - Q: Running `make dev` yields error: `libgit2.so.1.3: cannot open share object file: No such file or directory` - - A: `libgit2.so.1.3` is required by the gitextractor plugin and should be . Make sure your program can find `libgit2.so.1.3`. `LD_LIBRARY_PATH` can be assigned like this if your `libgit2.so.1.3` is located at `/usr/local/lib`: - - ```sh - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib - ``` - - Note that the version has to be pinned to 1.3.0. If you don't have it, you may need to build it manually with CMake from [source](https://github.com/libgit2/libgit2/releases/tag/v1.3.0). - - -## Compiling - - - Compile all plugins: `make build-plugin` - - Compile specific plugins: `PLUGIN= make build-plugin` - - Compile server: `make build` - - Compile worker: `make build-worker` diff --git a/versioned_docs/version-v0.14/DeveloperManuals/E2E-Test-Guide.md b/versioned_docs/version-v0.14/DeveloperManuals/E2E-Test-Guide.md deleted file mode 100644 index 1156e4cd24e..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/E2E-Test-Guide.md +++ /dev/null @@ -1,211 +0,0 @@ ---- -title: "E2E Test Guide" -description: > - The steps to write E2E tests for plugins. ---- - -# How to write E2E tests for plugins - -## Why write E2E tests - -E2E testing, as a part of automated testing, generally refers to black-box testing at the file and module level or unit testing that allows the use of some external services such as databases. The purpose of writing E2E tests is to shield some internal implementation logic and see whether the same external input can output the same result in terms of data aspects. In addition, compared to the black-box integration tests, it can avoid some chance problems caused by network and other factors. More information about the plugin can be found here: Why write E2E tests (incomplete). -In DevLake, E2E testing consists of interface testing and input/output result validation for the plugin Extract/Convert subtask. This article only describes the process of writing the latter. As the Collectors invoke external -services we typically do not write E2E tests for them. - -## Preparing data - -Let's take a simple plugin - Feishu Meeting Hours Collection as an example here. Its directory structure looks like this. -![image](https://user-images.githubusercontent.com/3294100/175061114-53404aac-16ca-45d1-a0ab-3f61d84922ca.png) -Next, we will write the E2E tests of the sub-tasks. - -The first step in writing the E2E test is to run the Collect task of the corresponding plugin to complete the data collection; that is, to have the corresponding data saved in the table starting with `_raw_feishu_` in the database. -This data will be presumed to be the "source of truth" for our tests. Here are the logs and database tables using the DirectRun (cmd) run method. -``` -$ go run plugins/feishu/main.go --numOfDaysToCollect 2 --connectionId 1 (Note: command may change with version upgrade) -[2022-06-22 23:03:29] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-22 23:03:29] INFO [feishu] start plugin -[2022-06-22 23:03:33] INFO [feishu] scheduler for api https://open.feishu.cn/open-apis/vc/v1 worker: 13, request: 10000, duration: 1h0m0s -[2022-06-22 23:03:33] INFO [feishu] total step: 2 -[2022-06-22 23:03:33] INFO [feishu] executing subtask collectMeetingTopUserItem -[2022-06-22 23:03:33] INFO [feishu] [collectMeetingTopUserItem] start api collection -[2022-06-22 23:03:34] INFO [feishu] [collectMeetingTopUserItem] finished records: 1 -[2022-06-22 23:03:34] INFO [feishu] [collectMeetingTopUserItem] end api collection error: %!w() -[2022-06-22 23:03:34] INFO [feishu] finished step: 1 / 2 -[2022-06-22 23:03:34] INFO [feishu] executing subtask extractMeetingTopUserItem -[2022-06-22 23:03:34] INFO [feishu] [extractMeetingTopUserItem] get data from _raw_feishu_meeting_top_user_item where params={"connectionId":1} and got 148 -[2022-06-22 23:03:34] INFO [feishu] [extractMeetingTopUserItem] finished records: 1 -[2022-06-22 23:03:34] INFO [feishu] finished step: 2 / 2 -``` - -image -Ok, the data has now been saved to the `_raw_feishu_*` table, and the `data` column is the return information from the plugin. Here we only collected data for the last 2 days. The data information is not much, but it also covers a variety of situations. That is, the same person has data on different days. - -It is also worth mentioning that the plugin runs two tasks, `collectMeetingTopUserItem` and `extractMeetingTopUserItem`. The former is the task of collecting, which is needed to run this time, and the latter is the task of extracting data. It doesn't matter whether the extractor runs in the prepared data session. - -Next, we need to export the data to .csv format. This step can be done in a variety of different ways - you can show your skills. I will only introduce a few common methods here. - -### DevLake Code Generator Export - -Run `go run generator/main.go create-e2e-raw` directly and follow the guidelines to complete the export. This solution is the simplest, but has some limitations, such as the exported fields being fixed. You can refer to the next solutions if you need more customisation options. - -![usage](https://user-images.githubusercontent.com/3294100/175849225-12af5251-6181-4cd9-ba72-26087b05ee73.gif) - -### GoLand Database export - -![image](https://user-images.githubusercontent.com/3294100/175067303-7e5e1c4d-2430-4eb5-ad00-e38d86bbd108.png) - -This solution is very easy to use and will not cause problems using Postgres or MySQL. -![image](https://user-images.githubusercontent.com/3294100/175068178-f1c1c290-e043-4672-b43e-54c4b954c685.png) -The success criteria for csv export is that the go program can read it without errors, so several points are worth noticing. - -1. the values in the csv file should be wrapped in double quotes to avoid special symbols such as commas in the values that break the csv format -2. double quotes in csv files are escaped. generally `""` represents a double quote -3. pay attention to whether the column `data` is the actual value, not the value after base64 or hex - -After exporting, move the .csv file to `plugins/feishu/e2e/raw_tables/_raw_feishu_meeting_top_user_item.csv`. - -### MySQL Select Into Outfile - -This is MySQL's solution for exporting query results to a file. The MySQL currently started in docker-compose.yml comes with the --security parameter, so it does not allow `select ... into outfile`. The first step is to turn off the security parameter, which is done roughly as follows. -![origin_img_v2_c809c901-01bc-4ec9-b52a-ab4df24c376g](https://user-images.githubusercontent.com/3294100/175070770-9b7d5b75-574b-49ed-9bca-e9f611f60795.jpg) -After closing it, use `select ... into outfile` to export the csv file. The export result is rough as follows. -![origin_img_v2_ccfdb260-668f-42b4-b249-6c2dd45816ag](https://user-images.githubusercontent.com/3294100/175070866-2204ae13-c058-4a16-bc20-93ab7c95f832.jpg) -Notice that the data field has extra hexsha fields, which need to be manually converted to literal quantities. - -### Vscode Database - -This is Vscode's solution for exporting query results to a file, but it is not easy to use. Here is the export result without any configuration changes -![origin_img_v2_c9eaadaa-afbc-4c06-85bc-e78235f7eb3g](https://user-images.githubusercontent.com/3294100/175071987-760c2537-240c-4314-bbd6-1a0cd85ddc0f.jpg) -However, it is obvious that the escape symbol does not conform to the csv specification, and the data is not successfully exported. After adjusting the configuration and manually replacing `\"` with `""`, we get the following result. -![image](https://user-images.githubusercontent.com/3294100/175072314-954c6794-3ebd-45bb-98e7-60ddbb5a7da9.png) -The data field of this file is encoded in base64, so it needs to be decoded manually to a literal amount before using it. - -### MySQL workbench - -This tool must write the SQL yourself to complete the data export, which can be rewritten by imitating the following SQL. -```sql -SELECT id, params, CAST(`data` as char) as data, url, input,created_at FROM _raw_feishu_meeting_top_user_item; -``` -![image](https://user-images.githubusercontent.com/3294100/175080866-1631a601-cbe6-40c0-9d3a-d23ca3322a50.png) -Select csv as the save format and export it for use. - -### Postgres Copy with csv header - -`Copy(SQL statement) to '/var/lib/postgresql/data/raw.csv' with csv header;` is a common export method for PG to export csv, which can also be used here. -```sql -COPY ( -SELECT id, params, convert_from(data, 'utf-8') as data, url, input,created_at FROM _raw_feishu_meeting_top_user_item -) to '/var/lib/postgresql/data/raw.csv' with csv header; -``` -Use the above statement to complete the export of the file. If pg runs in docker, just use the command `docker cp` to export the file to the host. - -## Writing E2E tests - -First, create a test environment. For example, let's create `meeting_test.go`. -![image](https://user-images.githubusercontent.com/3294100/175091380-424974b9-15f3-457b-af5c-03d3b5d17e73.png) -Then enter the test preparation code in it as follows. The code is to create an instance of the `feishu` plugin and then call `ImportCsvIntoRawTable` to import the data from the csv file into the `_raw_feishu_meeting_top_user_item` table. - -```go -func TestMeetingDataFlow(t *testing.T) { - var plugin impl.Feishu - dataflowTester := e2ehelper.NewDataFlowTester(t, "feishu", plugin) - - // import raw data table - dataflowTester.ImportCsvIntoRawTable("./raw_tables/_raw_feishu_meeting_top_user_item.csv", "_raw_feishu_meeting_top_user_item") -} -``` -The signature of the import function is as follows. -```func (t *DataFlowTester) ImportCsvIntoRawTable(csvRelPath string, rawTableName string)``` -It has a twin, with only slight differences in parameters. -```func (t *DataFlowTester) ImportCsvIntoTabler(csvRelPath string, dst schema.Tabler)``` -The former is used to import tables in the raw layer. The latter is used to import arbitrary tables. -**Note:** These two functions will delete the db table and use `gorm.AutoMigrate` to re-create a new table to clear data in it. -After importing the data is complete, run this tester and it must be PASS without any test logic at this moment. Then write the logic for calling the call to the extractor task in `TestMeetingDataFlow`. - -```go -func TestMeetingDataFlow(t *testing.T) { - var plugin impl.Feishu - dataflowTester := e2ehelper.NewDataFlowTester(t, "feishu", plugin) - - taskData := &tasks.FeishuTaskData{ - Options: &tasks.FeishuOptions{ - ConnectionId: 1, - }, - } - - // import raw data table - dataflowTester.ImportCsvIntoRawTable("./raw_tables/_raw_feishu_meeting_top_user_item.csv", "_raw_feishu_meeting_top_user_item") - - // verify extraction - dataflowTester.FlushTabler(&models.FeishuMeetingTopUserItem{}) - dataflowTester.Subtask(tasks.ExtractMeetingTopUserItemMeta, taskData) - -} -``` -The added code includes a call to `dataflowTester.FlushTabler` to clear the table `_tool_feishu_meeting_top_user_items` and a call to `dataflowTester.Subtask` to simulate the running of the subtask `ExtractMeetingTopUserItemMeta`. - -Now run it and see if the subtask `ExtractMeetingTopUserItemMeta` completes without errors. The data results of the `extract` run generally come from the raw table, so the plugin subtask will run correctly if written without errors. We can observe if the data is successfully parsed in the db table in the tool layer. In this case the `_tool_feishu_meeting_top_user_items` table has the correct data. - -If the run is incorrect, maybe you can troubleshoot the problem with the plugin itself before moving on to the next step. - -## Verify that the results of the task are correct - -Let's continue writing the test and add the following code at the end of the test function -```go -func TestMeetingDataFlow(t *testing.T) { - ...... - - dataflowTester.VerifyTable( - models.FeishuMeetingTopUserItem{}, - "./snapshot_tables/_tool_feishu_meeting_top_user_items.csv", - []string{ - "meeting_count", - "meeting_duration", - "user_type", - "_raw_data_params", - "_raw_data_table", - "_raw_data_id", - "_raw_data_remark", - }, - ) -} -``` -Its purpose is to call `dataflowTester.VerifyTable` to complete the validation of the data results. The third parameter is all the fields of the table that need to be verified. -The data used for validation exists in `. /snapshot_tables/_tool_feishu_meeting_top_user_items.csv`, but of course, this file does not exist yet. - -There is a twin, more generalized function, that could be used instead: -```go -dataflowTester.VerifyTableWithOptions(models.FeishuMeetingTopUserItem{}, - dataflowTester.TableOptions{ - CSVRelPath: "./snapshot_tables/_tool_feishu_meeting_top_user_items.csv" - }, - ) - -``` -The above usage will be default to validating against all fields of the `models.FeishuMeetingTopUserItem` model. There are additional fields on `TableOptions` that can be specified to limit which fields on that model to perform validation on. - -To facilitate the generation of the file mentioned above, DevLake has adopted a testing technique called `Snapshot`, which will automatically generate the file based on the run results when the `VerifyTable` or `VerifyTableWithOptions` functions are called without the csv existing. - -But note! Please do two things after the snapshot is created: 1. check if the file is generated correctly 2. re-run it to make sure there are no errors between the generated results and the re-run results. -These two operations are critical and directly related to the quality of test writing. We should treat the snapshot file in `.csv` format like a code file. - -If there is a problem with this step, there are usually 2 ways to solve it. -1. The validated fields contain fields like create_at runtime or self-incrementing ids, which cannot be repeatedly validated and should be excluded. -2. there is `\n` or `\r\n` or other escape mismatch fields in the run results. Generally, when parsing the `httpResponse` error, you can follow these solutions: - 1. modify the field type of the content in the api model to `json. - 2. convert it to string when parsing - 3. so that the `\n` symbol can be kept intact, avoiding the parsing of line breaks by the database or the operating system - - -For example, in the `github` plugin, this is how it is handled. -![image](https://user-images.githubusercontent.com/3294100/175098219-c04b810a-deaf-4958-9295-d5ad4ec152e6.png) -![image](https://user-images.githubusercontent.com/3294100/175098273-e4a18f9a-51c8-4637-a80c-3901a3c2934e.png) - -Well, at this point, the E2E writing is done. We have added a total of 3 new files to complete the testing of the meeting length collection task. It's pretty easy. -![image](https://user-images.githubusercontent.com/3294100/175098574-ae6c7fb7-7123-4d80-aa85-790b492290ca.png) - -## Run E2E tests for all plugins like CI - -It's straightforward. Just run `make e2e-plugins` because DevLake has already solidified it into a script~ - diff --git a/versioned_docs/version-v0.14/DeveloperManuals/Notifications.md b/versioned_docs/version-v0.14/DeveloperManuals/Notifications.md deleted file mode 100644 index 23456b4f1e7..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/Notifications.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Notifications" -description: > - Notifications -sidebar_position: 4 ---- - -## Request -Example request -``` -POST /lake/notify?nouce=3-FDXxIootApWxEVtz&sign=424c2f6159bd9e9828924a53f9911059433dc14328a031e91f9802f062b495d5 - -{"TaskID":39,"PluginName":"jenkins","CreatedAt":"2021-09-30T15:28:00.389+08:00","UpdatedAt":"2021-09-30T15:28:00.785+08:00"} -``` - -## Configuration -If you want to use the notification feature, you should add two configuration key to `.env` file. -```shell -# .env -# notification request url, e.g.: http://example.com/lake/notify -NOTIFICATION_ENDPOINT= -# secret is used to calculate signature -NOTIFICATION_SECRET= -``` - -## Signature -You should check the signature before accepting the notification request. We use sha256 algorithm to calculate the checksum. -```go -// calculate checksum -sum := sha256.Sum256([]byte(requestBody + NOTIFICATION_SECRET + nouce)) -return hex.EncodeToString(sum[:]) -``` diff --git a/versioned_docs/version-v0.14/DeveloperManuals/PluginImplementation.md b/versioned_docs/version-v0.14/DeveloperManuals/PluginImplementation.md deleted file mode 100644 index b6264991c28..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/PluginImplementation.md +++ /dev/null @@ -1,339 +0,0 @@ ---- -title: "Plugin Implementation" -sidebar_position: 2 -description: > - Plugin Implementation ---- - -If your favorite DevOps tool is not yet supported by DevLake, don't worry. It's not difficult to implement a DevLake plugin. In this post, we'll go through the basics of DevLake plugins and build an example plugin from scratch together. - -## What is a plugin? - -A DevLake plugin is a shared library built with Go's `plugin` package that hooks up to DevLake core at run-time. - -A plugin may extend DevLake's capability in three ways: - -1. Integrating with new data sources -2. Transforming/enriching existing data -3. Exporting DevLake data to other data systems - - -## How do plugins work? - -A plugin mainly consists of a collection of subtasks that can be executed by DevLake core. For data source plugins, a subtask may be collecting a single entity from the data source (e.g., issues from Jira). Besides the subtasks, there're hooks that a plugin can implement to customize its initialization, migration, and more. See below for a list of the most important interfaces: - -1. [PluginMeta](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_meta.go) contains the minimal interface that a plugin should implement, with only two functions - - Description() returns the description of a plugin - - RootPkgPath() returns the root package path of a plugin -2. [PluginInit](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_init.go) allows a plugin to customize its initialization -3. [PluginTask](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_task.go) enables a plugin to prepare data prior to subtask execution -4. [PluginApi](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_api.go) lets a plugin exposes some self-defined APIs -5. [PluginMigration](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_migration.go) is where a plugin manages its database migrations -6. [PluginModel](https://github.com/apache/incubator-devlake/blob/main/backend/core/plugin/plugin_model.go) allows other plugins to get the model information of all database tables of the current plugin through the GetTablesInfo() method.If you need to access Domain Layer Models,please visit [DomainLayerSchema](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/) - -The diagram below shows the control flow of executing a plugin: - -```mermaid -flowchart TD; - subgraph S4[Step4 sub-task extractor running process]; - direction LR; - D4[DevLake]; - D4 -- "Step4.1 create a new\n ApiExtractor\n and execute it" --> E["ExtractXXXMeta.\nEntryPoint"]; - E <-- "Step4.2 read from\n raw table" --> E2["RawDataSubTaskArgs\n.Table"]; - E -- "Step4.3 call with RawData" --> ApiExtractor.Extract; - ApiExtractor.Extract -- "decode and return gorm models" --> E - end - subgraph S3[Step3 sub-task collector running process] - direction LR - D3[DevLake] - D3 -- "Step3.1 create a new\n ApiCollector\n and execute it" --> C["CollectXXXMeta.\nEntryPoint"]; - C <-- "Step3.2 create\n raw table" --> C2["RawDataSubTaskArgs\n.RAW_BBB_TABLE"]; - C <-- "Step3.3 build query\n before sending requests" --> ApiCollectorArgs.\nQuery/UrlTemplate; - C <-. "Step3.4 send requests by ApiClient \n and return HTTP response" .-> A1["HTTP APIs"]; - C <-- "Step3.5 call and \nreturn decoded data \nfrom HTTP response" --> ResponseParser; - end - subgraph S2[Step2 DevLake register custom plugin] - direction LR - D2[DevLake] - D2 <-- "Step2.1 function \`Init\` \nneed to do init jobs" --> plugin.Init; - D2 <-- "Step2.2 (Optional) call \nand return migration scripts" --> plugin.MigrationScripts; - D2 <-- "Step2.3 (Optional) call \nand return taskCtx" --> plugin.PrepareTaskData; - D2 <-- "Step2.4 call and \nreturn subTasks for execting" --> plugin.SubTaskContext; - end - subgraph S1[Step1 Run DevLake] - direction LR - main -- "Transfer of control \nby \`runner.DirectRun\`" --> D1[DevLake]; - end - S1-->S2-->S3-->S4 -``` -There's a lot of information in the diagram, but we don't expect you to digest it right away. You can simply use it as a reference when you go through the example below. - -## A step-by-step guide towards your first plugin - -In this section, we will describe how to create a data collection plugin from scratch. The data to be collected is the information about all Committers and Contributors of the Apache project, in order to check whether they have signed the CLA. We are going to - -* request `https://people.apache.org/public/icla-info.json` to get the Committers' information -* request the `mailing list` to get the Contributors' information - -We will focus on demonstrating how to request and cache information about all Committers through the Apache API and extract structured data from it. The collection of Contributors will only be briefly described. - -### Step 1: Bootstrap the new plugin - -**Note:** Please make sure you have DevLake up and running before proceeding. - -> More info about plugin: -> Generally, we need these folders in plugin folders: `api`, `models` and `tasks` -> `api` interacts with `config-ui` for test/get/save connection of data source -> - connection [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/api/connection.go) -> - connection model [example](https://github.com/apache/incubator-devlake/blob/main/plugins/gitlab/models/connection.go) -> `models` stores all `data entities` and `data migration scripts`. -> - entity -> - data migrations [template](https://github.com/apache/incubator-devlake/tree/main/generator/template/migrationscripts) -> `tasks` contains all of our `sub tasks` for a plugin -> - task data [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data.go-template) -> - api client [template](https://github.com/apache/incubator-devlake/blob/main/generator/template/plugin/tasks/task_data_with_api_client.go-template) - -Don't worry if you cannot figure out what these concepts mean immediately. We'll explain them one by one later. - -DevLake provides a generator to create a plugin conveniently. Let's scaffold our new plugin by running `go run generator/main.go create-plugin icla`, which would ask for `with_api_client` and `Endpoint`. - -* `with_api_client` is used for choosing if we need to request HTTP APIs by api_client. -* `Endpoint` use in which site we will request, in our case, it should be `https://people.apache.org/`. - -![](https://i.imgur.com/itzlFg7.png) - -Now we have three files in our plugin. `api_client.go` and `task_data.go` are in subfolder `tasks/`. -![plugin files](https://i.imgur.com/zon5waf.png) - -Have a try to run this plugin by function `main` in `plugin_main.go`. When you see result like this: -``` -$go run plugins/icla/plugin_main.go -[2022-06-02 18:07:30] INFO failed to create dir logs: mkdir logs: file exists -press `c` to send cancel signal -[2022-06-02 18:07:30] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-02 18:07:30] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-02 18:07:30] INFO [icla] total step: 0 -``` -How exciting. It works! The plugin defined and initiated in `plugin_main.go` use some options in `task_data.go`. They are made up as the most straightforward plugin in Apache DevLake, and `api_client.go` will be used in the next step to request HTTP APIs. - -### Step 2: Create a sub-task for data collection -Before we start, it is helpful to know how collection task is executed: -1. First, Apache DevLake would call `plugin_main.PrepareTaskData()` to prepare needed data before any sub-tasks. We need to create an API client here. -2. Then Apache DevLake will call the sub-tasks returned by `plugin_main.SubTaskMetas()`. Sub-task is an independent task to do some job, like requesting API, processing data, etc. - -> Each sub-task must be defined as a SubTaskMeta, and implement SubTaskEntryPoint of SubTaskMeta. SubTaskEntryPoint is defined as -> ```go -> type SubTaskEntryPoint func(c SubTaskContext) error -> ``` -> More info at: https://devlake.apache.org/blog/how-DevLake-is-up-and-running/ - -#### Step 2.1: Create a sub-task(Collector) for data collection - -Let's run `go run generator/main.go create-collector icla committer` and confirm it. This sub-task is activated by registering in `plugin_main.go/SubTaskMetas` automatically. - -![](https://i.imgur.com/tkDuofi.png) - -> - Collector will collect data from HTTP or other data sources, and save the data into the raw layer. -> - Inside the func `SubTaskEntryPoint` of `Collector`, we use `helper.NewApiCollector` to create an object of [ApiCollector](https://github.com/apache/incubator-devlake/blob/main/backend/generator/template/plugin/tasks/api_collector.go-template), then call `execute()` to do the job. - -Now you can notice `data.ApiClient` is initiated in `plugin_main.go/PrepareTaskData.ApiClient`. `PrepareTaskData` create a new `ApiClient`, which is a tool Apache DevLake suggests to request data from HTTP Apis. This tool support some valuable features for HttpApi, like rateLimit, proxy and retry. Of course, if you like, you may use the lib `http` instead, but it will be more tedious. - -Let's move forward to use it. - -1. To collect data from `https://people.apache.org/public/icla-info.json`, -we have filled `https://people.apache.org/` into `tasks/api_client.go/ENDPOINT` in Step 1. - -![](https://i.imgur.com/q8Zltnl.png) - -2. Fill `public/icla-info.json` into `UrlTemplate`, delete the unnecessary iterator and add `println("receive data:", res)` in `ResponseParser` to see if collection was successful. - -![](https://i.imgur.com/ToLMclH.png) - -Ok, now the collector sub-task has been added to the plugin, and we can kick it off by running `main` again. If everything goes smoothly, the output should look like this: -```bash -[2022-06-06 12:24:52] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 12:24:52] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 12:24:52] INFO [icla] total step: 1 -[2022-06-06 12:24:52] INFO [icla] executing subtask CollectCommitter -[2022-06-06 12:24:52] INFO [icla] [CollectCommitter] start api collection -receive data: 0x140005763f0 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 12:24:55] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 12:24:55] INFO [icla] finished step: 1 / 1 -``` - -Great! Now we can see data pulled from the server without any problem. The last step is to decode the response body in `ResponseParser` and return it to the framework, so it can be stored in the database. -```go -ResponseParser: func(res *http.Response) ([]json.RawMessage, error) { - body := &struct { - LastUpdated string `json:"last_updated"` - Committers json.RawMessage `json:"committers"` - }{} - err := helper.UnmarshalResponse(res, body) - if err != nil { - return nil, err - } - println("receive data:", len(body.Committers)) - return []json.RawMessage{body.Committers}, nil -}, - -``` -Ok, run the function `main` once again, then it turned out like this, and we should be able to see some records show up in the table `_raw_icla_committer`. -```bash -…… -receive data: 272956 /* <- the number means 272956 models received */ -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 13:46:57] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 13:46:57] INFO [icla] finished step: 1 / 1 -``` - -![](https://i.imgur.com/aVYNMRr.png) - -#### Step 2.2: Create a sub-task(Extractor) to extract data from the raw layer - -> - Extractor will extract data from raw layer and save it into tool db table. -> - Except for some pre-processing, the main flow is similar to the collector. - -We have already collected data from HTTP API and saved them into the DB table `_raw_XXXX`. In this step, we will extract the names of committers from the raw data. As you may infer from the name, raw tables are temporary and not easy to use directly. - -Now Apache DevLake suggests to save data by [gorm](https://gorm.io/docs/index.html), so we will create a model by gorm and add it into `plugin_main.go/AutoMigrate()`. - -plugins/icla/models/committer.go -```go -package models - -import ( - "github.com/apache/incubator-devlake/models/common" -) - -type IclaCommitter struct { - UserName string `gorm:"primaryKey;type:varchar(255)"` - Name string `gorm:"primaryKey;type:varchar(255)"` - common.NoPKModel -} - -func (IclaCommitter) TableName() string { - return "_tool_icla_committer" -} -``` - -plugins/icla/plugin_main.go -![](https://i.imgur.com/4f0zJty.png) - - -Ok, run the plugin, and table `_tool_icla_committer` will be created automatically just like the snapshot below: -![](https://i.imgur.com/7Z324IX.png) - -Next, let's run `go run generator/main.go create-extractor icla committer` and type in what the command prompt asks for to create a new sub-task. - -![](https://i.imgur.com/UyDP9Um.png) - -Let's look at the function `extract` in `committer_extractor.go` created just now, and the code that needs to be written here. It's obvious that `resData.data` is the raw data, so we could json-decode each row add a new `IclaCommitter` for each and save them. -```go -Extract: func(resData *helper.RawData) ([]interface{}, error) { - names := &map[string]string{} - err := json.Unmarshal(resData.Data, names) - if err != nil { - return nil, err - } - extractedModels := make([]interface{}, 0) - for userName, name := range *names { - extractedModels = append(extractedModels, &models.IclaCommitter{ - UserName: userName, - Name: name, - })fco - } - return extractedModels, nil -}, -``` - -Ok, run it then we get: -``` -[2022-06-06 15:39:40] INFO [icla] start plugin -invalid ICLA_TOKEN, but ignore this error now -[2022-06-06 15:39:40] INFO [icla] scheduler for api https://people.apache.org/ worker: 25, request: 18000, duration: 1h0m0s -[2022-06-06 15:39:40] INFO [icla] total step: 2 -[2022-06-06 15:39:40] INFO [icla] executing subtask CollectCommitter -[2022-06-06 15:39:40] INFO [icla] [CollectCommitter] start api collection -receive data: 272956 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] finished records: 1 -[2022-06-06 15:39:44] INFO [icla] [CollectCommitter] end api collection -[2022-06-06 15:39:44] INFO [icla] finished step: 1 / 2 -[2022-06-06 15:39:44] INFO [icla] executing subtask ExtractCommitter -[2022-06-06 15:39:46] INFO [icla] [ExtractCommitter] finished records: 1 -[2022-06-06 15:39:46] INFO [icla] finished step: 2 / 2 -``` -Now committer data have been saved in _tool_icla_committer. -![](https://i.imgur.com/6svX0N2.png) - -#### Step 2.3: Convertor - -Notes: The goal of Converters is to create a vendor-agnostic model out of the vendor-dependent ones created by the Extractors. -They are not necessary to have per se, but we encourage it because converters and the domain layer will significantly help with building dashboards. More info about the domain layer [here](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/). - -In short: - -> - Convertor will convert data from the tool layer and save it into the domain layer. -> - We use `helper.NewDataConverter` to create an object of DataConvertor, then call `execute()`. - -#### Step 2.4: Let's try it -Sometimes OpenApi will be protected by token or other auth types, and we need to log in to gain a token to visit it. For example, only after logging in `private@apahce.com` could we gather the data about contributors signing ICLA. Here we briefly introduce how to authorize DevLake to collect data. - -Let's look at `api_client.go`. `NewIclaApiClient` load config `ICLA_TOKEN` by `.env`, so we can add `ICLA_TOKEN=XXXXXX` in `.env` and use it in `apiClient.SetHeaders()` to mock the login status. Code as below: -![](https://i.imgur.com/dPxooAx.png) - -Of course, we can use `username/password` to get a token after login mockery. Just try and adjust according to the actual situation. - -Look for more related details at https://github.com/apache/incubator-devlake - -#### Step 2.5: Implement the GetTablesInfo() method of the PluginModel interface - -As shown in the following gitlab plugin example, -add all models that need to be accessed by external plugins to the return value. - -```go -var _ core.PluginModel = (*Gitlab)(nil) - -func (plugin Gitlab) GetTablesInfo() []core.Tabler { - return []core.Tabler{ - &models.GitlabConnection{}, - &models.GitlabAccount{}, - &models.GitlabCommit{}, - &models.GitlabIssue{}, - &models.GitlabIssueLabel{}, - &models.GitlabJob{}, - &models.GitlabMergeRequest{}, - &models.GitlabMrComment{}, - &models.GitlabMrCommit{}, - &models.GitlabMrLabel{}, - &models.GitlabMrNote{}, - &models.GitlabPipeline{}, - &models.GitlabProject{}, - &models.GitlabProjectCommit{}, - &models.GitlabReviewer{}, - &models.GitlabTag{}, - } -} -``` - -You can use it as follows: - -```go -if pm, ok := plugin.(core.PluginModel); ok { - tables := pm.GetTablesInfo() - for _, table := range tables { - // do something - } -} - -``` - -#### Final step: Submit the code as open source code -We encourage ideas and contributions ~ Let's use migration scripts, domain layers and other discussed concepts to write normative and platform-neutral code. More info at [here](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema) or contact us for ebullient help. - - -## Done! - -Congratulations! The first plugin has been created! 🎖 diff --git a/versioned_docs/version-v0.14/DeveloperManuals/Release-SOP.md b/versioned_docs/version-v0.14/DeveloperManuals/Release-SOP.md deleted file mode 100644 index e63f317f5bd..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/Release-SOP.md +++ /dev/null @@ -1,117 +0,0 @@ -# DevLake Release Guide - -**Please make sure your public key was included in the https://downloads.apache.org/incubator/devlake/KEYS , if not, please update this file first.** -## How to update KEYS -1. Clone the svn repository - ```shell - svn co https://dist.apache.org/repos/dist/dev/incubator/devlake - ``` -2. Append your public key to the KEYS file - ```shell - cd devlake - (gpg --list-sigs && gpg --armor --export ) >> KEYS - ``` -3. Upload - ```shell - svn add KEYS - svn commit -m "update KEYS" - svn cp https://dist.apache.org/repos/dist/dev/incubator/devlake/KEYS https://dist.apache.org/repos/dist/release/incubator/devlake/ -m "update KEYS" - ``` -We will use `v0.14.0` as an example to demonstrate the release process. - -## ASF Release Policy -- https://www.apache.org/legal/release-policy.html -- https://incubator.apache.org/guides/releasemanagement.html - -## Tools: -- `gpg` creating and verifying the signature -- `shasum` creating and verifying the checksum -- `git` checkout and pack the codebase -- `svn` uploading the code to the Apache code hosting server - -## Prepare -- Check against the Incubator Release Checklist -- Create folder `releases/lake-v0.14.0` and put the two files `docker-compose.yml` and `env.example` in there. -- Update the file `.github/ISSUE_TEMPLATE/bug-report.yml` to include the version `v0.14.0` - - -## Pack -- Checkout to the branch/commit - ```shell - git clone https://github.com/apache/incubator-devlake.git - cd incubator-devlake - git checkout b268d53a48edb26d3c9b73b782798703f068f655 - ``` - -- Tag the commit and push to origin - ```shell - git tag v0.14.0-rc1 - git push origin v0.14.0-rc1 - ``` - -- Pack the code - ```shell - git archive --format=tar.gz --output="/apache-devlake-0.14.0-incubating-src.tar.gz" --prefix="apache-devlake-0.14.0-incubating-src/" v0.14.0-rc1 - ``` -- Before proceeding to the next step, please make sure your public key was included in the https://downloads.apache.org/incubator/devlake/KEYS -- Create signature and checksum - ```shell - cd - gpg -s --armor --output apache-devlake-0.14.0-incubating-src.tar.gz.asc --detach-sig apache-devlake-0.14.0-incubating-src.tar.gz - shasum -a 512 apache-devlake-0.14.0-incubating-src.tar.gz > apache-devlake-0.14.0-incubating-src.tar.gz.sha512 - ``` -- Verify signature and checksum - ```shell - gpg --verify apache-devlake-0.14.0-incubating-src.tar.gz.asc apache-devlake-0.14.0-incubating-src.tar.gz - shasum -a 512 --check apache-devlake-0.14.0-incubating-src.tar.gz.sha512 - ``` -## Upload -- Clone the svn repository - ```shell - svn co https://dist.apache.org/repos/dist/dev/incubator/devlake - ``` -- Copy the files into the svn local directory - ```shell - cd devlake - mkdir -p 0.14.0-incubating-rc1 - cp /apache-devlake-0.14.0-incubating-src.tar.gz* 0.14.0-incubating-rc1/ - ``` -- Upload local files - ```shell - svn add 0.14.0-incubating-rc1 - svn commit -m "add 0.14.0-incubating-rc1" - ``` -## Vote -1. Devlake community vote: - - Start the vote by sending an email to - [[VOTE] Release Apache DevLake (Incubating) v0.14.0-rc1](https://lists.apache.org/thread/s6jj2tl5mlyb8jpdd88jmo5woydzhp54) - - Announce the vote result: - [[RESULT][VOTE] Release Apache DevLake (Incubating) v0.14.0-rc1](https://lists.apache.org/thread/mb5sxdopprqksf1ppfggkvkwxs6110zk) - -2. Apache incubator community vote: - - Start the vote by sending an email to general@incubator.apache.org - [[VOTE] Release Apache DevLake (Incubating) v0.14.0-rc1](https://lists.apache.org/thread/lgfrsv0ymfk1c19ngnkkn46cspkf76lg) - - Announce the vote result: - [[RESULT][VOTE] Release Apache DevLake (Incubating) v0.14.0-rc1](https://lists.apache.org/thread/2xoqzymgvnrvrbn9dwsby39olotvt6oj) - -## Release -### Apache -- Move the release to the ASF content distribution system - ```shell - svn mv https://dist.apache.org/repos/dist/dev/incubator/devlake/0.14.0-incubating-rc1 https://dist.apache.org/repos/dist/release/incubator/devlake/0.14.0-incubating -m "transfer packages for 0.14.0-incubating-rc1" - ``` -- Wait until the directory `https://downloads.apache.org/incubator/devlake/0.14.0-incubating/` was created -- Remove the last release from `https://downloads.apache.org/` (according the Apache release policy, this link should be pointing to the current release) - ```shell - svn rm https://dist.apache.org/repos/dist/release/incubator/devlake/0.11.0-incubating -m "remove 0.11.0-incubating" - ``` -- Announce release by sending an email to general@incubator.apache.org - [[ANNOUNCE] Release Apache Devlake(incubating) 0.14.0-incubating](https://lists.apache.org/thread/401p8xm8tcp9tplh2sdht7dnrbs03rht) -### GitHub -- Create tag v0.14.0 and push - ```shell - git checkout v0.14.0-rc1 - git tag v0.14.0 - git push origin v0.14.0 - ``` -- Open the URL `https://github.com/apache/incubator-devlake/releases/`, draft a new release, fill in the form and upload two files `docker-compose.yml` and `env.example` diff --git a/versioned_docs/version-v0.14/DeveloperManuals/TagNamingConventions.md b/versioned_docs/version-v0.14/DeveloperManuals/TagNamingConventions.md deleted file mode 100644 index 3417c29b638..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/TagNamingConventions.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: "Tag Naming Conventions" -description: > - Tag Naming Conventions -sidebar_position: 6 ---- - -Please refer to the rules when creating a new tag for Apache DevLake -- alpha: internal testing/preview, i.e. v0.12.0-alpha1 -- beta: community/customer testing/preview, i.e. v0.12.0-beta1 -- rc: asf release candidate, i.e. v0.12.0-rc1 - - diff --git a/versioned_docs/version-v0.14/DeveloperManuals/_category_.json b/versioned_docs/version-v0.14/DeveloperManuals/_category_.json deleted file mode 100644 index f921ae47152..00000000000 --- a/versioned_docs/version-v0.14/DeveloperManuals/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Developer Manuals", - "position": 8, - "link":{ - "type": "generated-index", - "slug": "DeveloperManuals" - } -} diff --git a/versioned_docs/version-v0.14/GettingStarted/DockerComposeSetup.md b/versioned_docs/version-v0.14/GettingStarted/DockerComposeSetup.md deleted file mode 100644 index 71e822bbc46..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/DockerComposeSetup.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: "Install via Docker Compose" -description: > - The steps to install DevLake via Docker Compose -sidebar_position: 1 ---- - - -## Prerequisites - -- [Docker v19.03.10+](https://docs.docker.com/get-docker) -- [docker-compose v2.2.3+](https://docs.docker.com/compose/install/) (If you have Docker Desktop installed then you already have the Compose plugin installed) - -## Launch DevLake - -- Commands written `like this` are to be run in your terminal. - -NOTE:Before 3, make sure your `.env` file has read/write permissions set for the current user in your machine, else the [API-Server](https://devlake.apache.org/docs/Overview/Architecture) will not start properly - -1. Download `docker-compose.yml` and `env.example` from [latest release page](https://github.com/apache/incubator-devlake/releases/latest) into a folder. -2. Rename `env.example` to `.env`. For Mac/Linux users, please run `mv env.example .env` in the terminal. This file contains the environment variables that the Devlake server will use. Additional ones can be found in the compose file(s). -3. Run `docker-compose up -d` to launch DevLake. - -## Collect data and view dashboards - -1. Visit `config-ui` at `http://localhost:4000` in your browser to configure DevLake and collect data. - - Please follow the [tutorial](UserManuals/ConfigUI/Tutorial.md) - - `devlake` container takes a while to fully boot up. If `config-ui` complains about API being unreachable, please wait a few seconds and refresh the page. -2. To view dashboards, click *View Dashboards* button in the top left corner, or visit `localhost:3002` (username: `admin`, password: `admin`). - - We use [Grafana](https://grafana.com/) to visualize the DevOps [data](../SupportedDataSources.md) and build dashboards. - - For how to customize and provision dashboards, please see our [Grafana doc](../UserManuals/Dashboards/GrafanaUserGuide.md). - - -## Upgrade to a newer version - -Support for database schema migration was introduced to DevLake in v0.10.0. From v0.10.0 onwards, users can upgrade their instance smoothly to a newer version. However, versions prior to v0.10.0 do not support upgrading to a newer version with a different database schema. - -
diff --git a/versioned_docs/version-v0.14/GettingStarted/HelmSetup.md b/versioned_docs/version-v0.14/GettingStarted/HelmSetup.md deleted file mode 100644 index 0e57039da6f..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/HelmSetup.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -title: "Install via Helm" -description: > - The steps to install Apache DevLake via Helm for Kubernetes -sidebar_position: 2 ---- - -## Prerequisites - -- Helm >= 3.6.0 -- Kubernetes >= 1.19.0 - - -## Quick Start - -#### You can also check You can also check https://github.com/apache/incubator-devlake-helm-chart to make contribution - -### Install - -To install the chart with release name `devlake`: - -```shell -helm repo add devlake https://apache.github.io/incubator-devlake-helm-chart -helm repo update -helm install devlake devlake/devlake -``` -And visit your devlake from the node port (32001 by default). - -http://YOUR-NODE-IP:32001 - -#### Tips: -If you are using minikube inside your mac, please use the following command to forward the port: -```shell -kubectl port-forward service/devlake-ui 30090:4000 -``` -and open another terminal: -```shell -kubectl port-forward service/devlake-grafana 30091:3000 -``` - -Then you can visit: -config-ui by url `http://YOUR-NODE-IP:30090` -grafana by url `http://YOUR-NODE-IP:30091` - -### Update - -```shell -helm repo update -helm upgrade --install devlake devlake/devlake -``` - -### Uninstall - -To uninstall/delete the `devlake` release: - -```shell -helm uninstall devlake -``` - - -## Some example deployments - -### Deploy with NodePort - -Conditions: - - IP Address of Kubernetes node: 192.168.0.6 - - Want to visit devlake with port 30000. - -``` -helm install devlake . --set service.uiPort=30000 -``` - -After deployed, visit devlake: http://192.168.0.6:30000 - -### Deploy with Ingress - -Conditions: - - I have already configured default ingress for the Kubernetes cluster - - I want to use http://devlake.example.com for visiting devlake - -``` -helm install devlake . --set "ingress.enabled=true,ingress.hostname=devlake.example.com" -``` - -After deployed, visit devlake: http://devlake.example.com, and grafana at http://devlake.example.com/grafana - -### Deploy with Ingress (Https) - -Conditions: - - I have already configured ingress(class: nginx) for the Kubernetes cluster, and the https using 8443 port. - - I want to use https://devlake-0.example.com:8443 for visiting devlake. - - The https certificates are generated by letsencrypt.org, and the certificate and key files: `cert.pem` and `key.pem` - -First, create the secret: -``` -kubectl create secret tls ssl-certificate --cert cert.pem --key secret.pem -``` - -Then, deploy the devlake: -``` -helm install devlake . \ - --set "ingress.enabled=true,ingress.enableHttps=true,ingress.hostname=devlake-0.example.com" \ - --set "ingress.className=nginx,ingress.httpsPort=8443" \ - --set "ingress.tlsSecretName=ssl-certificate" -``` - -After deployed, visit devlake: https://devlake-0.example.com:8443, and grafana at https://devlake-0.example.com:8443/grafana - - -## Parameters - -Some useful parameters for the chart, you could also check them in values.yaml - -| Parameter | Description | Default | -|-----------|-------------|---------| -| replicaCount | Replica Count for devlake, currently not used | 1 | -| mysql.useExternal | If use external mysql server, currently not used | false | -| mysql.externalServer | External mysql server address | 127.0.0.1 | -| mysql.externalPort | External mysql server port | 3306 | -| mysql.username | username for mysql | merico | -| mysql.password | password for mysql | merico | -| mysql.database | database for mysql | lake | -| mysql.rootPassword | root password for mysql | admin | -| mysql.storage.class | storage class for mysql's volume | "" | -| mysql.storage.size | volume size for mysql's data | 5Gi | -| mysql.image.repository | repository for mysql's image | mysql | -| mysql.image.tag | image tag for mysql's image | 8.0.26 | -| mysql.image.pullPolicy | pullPolicy for mysql's image | IfNotPresent | -| grafana.image.repository | repository for grafana's image | mericodev/grafana | -| grafana.image.tag | image tag for grafana's image | latest | -| grafana.image.pullPolicy | pullPolicy for grafana's image | Always | -| lake.storage.class | storage class for lake's volume | "" | -| lake.storage.size | volume size for lake's data | 100Mi | -| lake.image.repository | repository for lake's image | mericodev/lake | -| lake.image.tag | image tag for lake's image | latest | -| lake.image.pullPolicy | pullPolicy for lake's image | Always | -| lake.loggingDir | the root logging directory of Devlake | /app/logs | -| ui.image.repository | repository for ui's image | mericodev/config-ui | -| ui.image.tag | image tag for ui's image | latest | -| ui.image.pullPolicy | pullPolicy for ui's image | Always | -| service.type | Service type for exposed service | NodePort | -| service.uiPort | Service port for config ui | 32001 | -| service.ingress.enabled | If enable ingress | false | -| service.ingress.enableHttps | If enable https | false | -| service.ingress.className | The class name for ingressClass. If leave empty, the default IngressClass will be used | "" | -| service.ingress.hostname | The hostname/domainname for ingress | localhost | -| service.ingress.prefix | The prefix for endpoints, currently not supported due to devlake's implementation | / | -| service.ingress.tlsSecretName | The secret name for tls's certificate, required when https enabled | "" | -| service.ingress.httpPort | The http port for ingress | 80 | -| service.ingress.httpsPort | The https port for ingress | 443 | -| option.localtime | The hostpath for mount as /etc/localtime | /etc/localtime | diff --git a/versioned_docs/version-v0.14/GettingStarted/KubernetesSetup.md b/versioned_docs/version-v0.14/GettingStarted/KubernetesSetup.md deleted file mode 100644 index 065c3b97e78..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/KubernetesSetup.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: "Install via Kubernetes" -description: > - The steps to install Apache DevLake via Kubernetes -sidebar_position: 3 ---- - -:::caution - -We highly recommend the [helm approach](./HelmSetup.md), this page is for Advanced Installation only - -::: - -We provide a sample [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) to help deploy DevLake to Kubernetes - -[k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) will create a namespace `devlake` on your k8s cluster, and use `nodePort 30004` for `config-ui`, `nodePort 30002` for `grafana` dashboards. If you would like to use a specific version of Apache DevLake, please update the image tag of `grafana`, `devlake` and `config-ui` deployments. - -## Step-by-step guide - -1. Download [k8s-deploy.yaml](https://github.com/apache/incubator-devlake/blob/main/devops/deployment/k8s/k8s-deploy.yaml) -2. Customize the settings (`devlake-config` config map): - - Settings shared between `grafana` and `mysql` - * `MYSQL_ROOT_PASSWORD`: set root password for `mysql` - * `MYSQL_USER`: shared between `mysql` and `grafana` - * `MYSQL_PASSWORD`: shared between `mysql` and `grafana` - * `MYSQL_DATABASE`: shared between `mysql` and `grafana` - - Settings used by `grafana` - * `MYSQL_URL`: set MySQL URL for `grafana` in `$HOST:$PORT` format - * `GF_SERVER_ROOT_URL`: Public URL to the `grafana` - - Settings used by `config-ui`: - * `GRAFANA_ENDPOINT`: FQDN of grafana which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `DEVLAKE_ENDPOINT`: FQDN of devlake which can be reached within k8s cluster, normally you don't need to change it unless namespace was changed - * `ADMIN_USER`/`ADMIN_PASS`: Not required, but highly recommended - - Settings used by `devlake`: - * `DB_URL`: update this value if `MYSQL_USER`, `MYSQL_PASSWORD` or `MYSQL_DATABASE` were changed - * `LOGGING_DIR`: the directory of logs for Devlake - you likely don't need to change it. -3. The `devlake` deployment store its configuration in `/app/.env`. In our sample yaml, we use `hostPath` volume, so please make sure directory `/var/lib/devlake` exists on your k8s workers, or employ other techniques to persist `/app/.env` file. Please do NOT mount the entire `/app` directory, because plugins are located in `/app/bin` folder. -4. Finally, execute the following command and DevLake should be up and running: - ```sh - kubectl apply -f k8s-deploy.yaml - ``` - - -## FAQ - -1. Can I use a managed Cloud database service instead of running database in k8s? - - Yes, it only takes a few changes in the sample yaml file. Below we'll use MySQL on AWS RDS as an example. - 1. (Optional) Create a MySQL instance on AWS RDS following this [doc](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_GettingStarted.CreatingConnecting.MySQL.html), skip this step if you'd like to use an existing instance - 2. Remove the `mysql` deployment and service sections from `k8s-deploy.yaml` - 3. Update `devlake-config` configmap according to your RDS instance setup: - * `MYSQL_ROOT_PASSWORD`: remove this line - * `MYSQL_USER`: use your RDS instance's master username - * `MYSQL_PASSWORD`: use your RDS instance's password - * `MYSQL_DATABASE`: use your RDS instance's DB name, you may need to create a database first with `CREATE DATABASE ;` - * `MYSQL_URL`: set this for `grafana` in `$HOST:$PORT` format, where $HOST and $PORT should be your RDS instance's endpoint and port respectively - * `DB_URL`: update the connection string with your RDS instance's info for `devlake` diff --git a/versioned_docs/version-v0.14/GettingStarted/RainbondSetup.md b/versioned_docs/version-v0.14/GettingStarted/RainbondSetup.md deleted file mode 100644 index 3b793c7ee92..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/RainbondSetup.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Install via Rainbond" -sidebar_position: 7 -description: > - The steps to install DevLake in Rainbond. ---- - -This tutorial is for users who don't know Kubernetes. [Rainbond](https://www.rainbond.com/) is cloud native application management platform built on Kubernetes, easy to use, no need to know Kubernetes knowledge, easily deploy applications in Kubernetes. - -Install DevLake in Rainbond is the easiest way to get started. - -## Requirements - -* Rainbond 5.8.x or above - -## Deploy DevLake - -1.Login to Rainbond console, click `Market` in the left menu, switch to open source app store, and search `DevLake` in the search box, and click `Install` button. - -![](/img/GettingStarted/install-devlake.jpg) - -2.fill in the installation information, and click `Confirm` button startup install. - * Team: select a team or create a new team - * Cluster: select a cluster - * Application: select an application or create a new application - * Version: select a version - -3.Moment later, DevLake will be installed successfully, via the `Access` button to access DevLake. - -![](/img/GettingStarted/topology-devlake.jpg) - -## Next Step - -Creating a Blueprint, ref [Tutorial](UserManuals/ConfigUI/Tutorial.md#creating-a-blueprint) \ No newline at end of file diff --git a/versioned_docs/version-v0.14/GettingStarted/TemporalSetup.md b/versioned_docs/version-v0.14/GettingStarted/TemporalSetup.md deleted file mode 100644 index 58132999f81..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/TemporalSetup.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Install via Temporal" -sidebar_position: 6 -description: > - The steps to install DevLake in Temporal mode. ---- - - -Normally, DevLake would execute pipelines on a local machine (we call it `local mode`), it is sufficient most of the time. However, when you have too many pipelines that need to be executed in parallel, it can be problematic, as the horsepower and throughput of a single machine is limited. - -`temporal mode` was added to support distributed pipeline execution, you can fire up arbitrary workers on multiple machines to carry out those pipelines in parallel to overcome the limitations of a single machine. - -But, be careful, many API services like JIRA/GITHUB have a request rate limit mechanism. Collecting data in parallel against the same API service with the same identity would most likely hit such limit. - -## How it works - -1. DevLake Server and Workers connect to the same temporal server by setting up `TEMPORAL_URL` -2. DevLake Server sends a `pipeline` to the temporal server, and one of the Workers pick it up and execute it - - -**IMPORTANT: This feature is in early stage of development. Please use with caution** - - -## Temporal Demo - -### Requirements - -- [Docker](https://docs.docker.com/get-docker) -- [docker-compose](https://docs.docker.com/compose/install/) -- [temporalio](https://temporal.io/) - -### How to setup - -1. Clone and fire up the [temporalio](https://temporal.io/) services -2. Clone this repo, and fire up DevLake with command `docker-compose -f deployment/temporal/docker-compose-temporal.yml up -d` \ No newline at end of file diff --git a/versioned_docs/version-v0.14/GettingStarted/_category_.json b/versioned_docs/version-v0.14/GettingStarted/_category_.json deleted file mode 100644 index 063400ae119..00000000000 --- a/versioned_docs/version-v0.14/GettingStarted/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Getting Started", - "position": 2, - "link":{ - "type": "generated-index", - "slug": "GettingStarted" - } -} diff --git a/versioned_docs/version-v0.14/Glossary.md b/versioned_docs/version-v0.14/Glossary.md deleted file mode 100644 index be814870a3f..00000000000 --- a/versioned_docs/version-v0.14/Glossary.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -sidebar_position: 7 -title: "Glossary" -linkTitle: "Glossary" -description: > - DevLake Glossary ---- - -*Last updated: May 16 2022* - - -## In Configuration UI (Regular Mode) - -The following terms are arranged in the order of their appearance in the actual user workflow. - -### Blueprints -**A blueprint is the plan that covers all the work to get your raw data ready for query and metric computation in the dashboards.** Creating a blueprint consists of four steps: -1. **Adding [Data Connections](Glossary.md#data-connections)**: For each [data source](Glossary.md#data-sources), one or more data connections can be added to a single blueprint, depending on the data you want to sync to DevLake. -2. **Setting the [Data Scope](Glossary.md#data-scope)**: For each data connection, you need to configure the scope of data, such as GitHub projects, Jira boards, and their corresponding [data entities](Glossary.md#data-entities). -3. **Adding [Transformation Rules](Glossary.md#transformation-rules) (optional)**: You can optionally apply transformation for the data scope you have just selected, in order to view more advanced metrics. -3. **Setting the Sync Frequency**: You can specify the sync frequency for your blueprint to achieve recurring data syncs and transformation. Alternatively, you can set the frequency to manual if you wish to run the tasks in the blueprint manually. - -The relationship among Blueprint, Data Connections, Data Scope and Transformation Rules is explained as follows: - -![Blueprint ERD](/img/Glossary/blueprint-erd.svg) -- Each blueprint can have multiple data connections. -- Each data connection can have multiple sets of data scope. -- Each set of data scope only consists of one GitHub/GitLab project or Jira board, along with their corresponding data entities. -- Each set of data scope can only have one set of transformation rules. - -### Data Sources -**A data source is a specific DevOps tool from which you wish to sync your data, such as GitHub, GitLab, Jira and Jenkins.** - -DevLake normally uses one [data plugin](Glossary.md#data-plugins) to pull data for a single data source. However, in some cases, DevLake uses multiple data plugins for one data source for the purpose of improved sync speed, among many other advantages. For instance, when you pull data from GitHub or GitLab, aside from the GitHub or GitLab plugin, Git Extractor is also used to pull data from the repositories. In this case, DevLake still refers GitHub or GitLab as a single data source. - -### Data Connections -**A data connection is a specific instance of a data source that stores information such as `endpoint` and `auth`.** A single data source can have one or more data connections (e.g. two Jira instances). Currently, DevLake supports one data connection for GitHub, GitLab and Jenkins, and multiple connections for Jira. - -You can set up a new data connection either during the first step of creating a blueprint, or in the Connections page that can be accessed from the navigation bar. Because one single data connection can be reused in multiple blueprints, you can update the information of a particular data connection in Connections, to ensure all its associated blueprints will run properly. For example, you may want to update your GitHub token in a data connection if it goes expired. - -### Data Scope -**In a blueprint, each data connection can have multiple sets of data scope configurations, including GitHub or GitLab projects, Jira boards and their corresponding [data entities](Glossary.md#data-entities).** The fields for data scope configuration vary according to different data sources. - -Each set of data scope refers to one GitHub or GitLab project, or one Jira board and the data entities you would like to sync for them, for the convenience of applying transformation in the next step. For instance, if you wish to sync 5 GitHub projects, you will have 5 sets of data scope for GitHub. - -To learn more about the default data scope of all data sources and data plugins, please refer to [Supported Data Sources](./SupportedDataSources.md). - -### Data Entities -**Data entities refer to the data fields from one of the five data domains: Issue Tracking, Source Code Management, Code Review, CI/CD and Cross-Domain.** - -For instance, if you wish to pull Source Code Management data from GitHub and Issue Tracking data from Jira, you can check the corresponding data entities during setting the data scope of these two data connections. - -To learn more details, please refer to [Domain Layer Schema](./DataModels/DevLakeDomainLayerSchema.md). - -### Transformation Rules -**Transformation rules are a collection of methods that allow you to customize how DevLake normalizes raw data for query and metric computation.** Each set of data scope is strictly accompanied with one set of transformation rules. However, for your convenience, transformation rules can also be duplicated across different sets of data scope. - -DevLake uses these normalized values in the transformation to design more advanced dashboards, such as the Weekly Bug Retro dashboard. Although configuring transformation rules is not mandatory, if you leave the rules blank or have not configured correctly, only the basic dashboards (e.g. GitHub Basic Metrics) will be displayed as expected, while the advanced dashboards will not. - -### Historical Runs -**A historical run of a blueprint is an actual execution of the data collection and transformation [tasks](Glossary.md#tasks) defined in the blueprint at its creation.** A list of historical runs of a blueprint is the entire running history of that blueprint, whether executed automatically or manually. Historical runs can be triggered in three ways: -- By the blueprint automatically according to its schedule in the Regular Mode of the Configuration UI -- By running the JSON in the Advanced Mode of the Configuration UI -- By calling the API `/pipelines` endpoint manually - -However, the name Historical Runs is only used in the Configuration UI. In DevLake API, they are called [pipelines](Glossary.md#pipelines). - -## In Configuration UI (Advanced Mode) and API - -The following terms have not appeared in the Regular Mode of Configuration UI for simplification, but can be very useful if you want to learn about the underlying framework of DevLake or use Advanced Mode and the DevLake API. - -### Data Plugins -**A data plugin is a specific module that syncs or transforms data.** There are two types of data plugins: Data Collection Plugins and Data Transformation Plugins. - -Data Collection Plugins pull data from one or more data sources. DevLake supports 8 data plugins in this category: `ae`, `feishu`, `gitextractor`, `github`, `gitlab`, `jenkins`, `jira` and `tapd`. - -Data Transformation Plugins transform the data pulled by other Data Collection Plugins. `refdiff` is currently the only plugin in this category. - -Although the names of the data plugins are not displayed in the regular mode of DevLake Configuration UI, they can be used directly in JSON in the Advanced Mode. - -For detailed information about the relationship between data sources and data plugins, please refer to [Supported Data Sources](./SupportedDataSources.md). - - -### Pipelines -**A pipeline is an orchestration of [tasks](Glossary.md#tasks) of data `collection`, `extraction`, `conversion` and `enrichment`, defined in the DevLake API.** A pipeline is composed of one or multiple [stages](Glossary.md#stages) that are executed in a sequential order. Any error occurring during the execution of any stage, task or subtask will cause the immediate fail of the pipeline. - -The composition of a pipeline is explained as follows: -![Blueprint ERD](/img/Glossary/pipeline-erd.svg) -Notice: **You can manually orchestrate the pipeline in Configuration UI Advanced Mode and the DevLake API; whereas in Configuration UI regular mode, an optimized pipeline orchestration will be automatically generated for you.** - - -### Stages -**A stages is a collection of tasks performed by data plugins.** Stages are executed in a sequential order in a pipeline. - -### Tasks -**A task is a collection of [subtasks](Glossary.md#subtasks) that perform any of the `collection`, `extraction`, `conversion` and `enrichment` jobs of a particular data plugin.** Tasks are executed in a parallel order in any stages. - -### Subtasks -**A subtask is the minimal work unit in a pipeline that performs in any of the four roles: `Collectors`, `Extractors`, `Converters` and `Enrichers`.** Subtasks are executed in sequential orders. -- `Collectors`: Collect raw data from data sources, normally via DevLake API and stored into `raw data table` -- `Extractors`: Extract data from `raw data table` to `domain layer tables` -- `Converters`: Convert data from `tool layer tables` into `domain layer tables` -- `Enrichers`: Enrich data from one domain to other domains. For instance, the Fourier Transformation can examine `issue_changelog` to show time distribution of an issue on every assignee. diff --git a/versioned_docs/version-v0.14/Metrics/AddedLinesOfCode.md b/versioned_docs/version-v0.14/Metrics/AddedLinesOfCode.md deleted file mode 100644 index 2921ea65bea..00000000000 --- a/versioned_docs/version-v0.14/Metrics/AddedLinesOfCode.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Added Lines of Code" -description: > - Added Lines of Code -sidebar_position: 7 ---- - -## What is this metric? -The accumulated number of added lines of code. - -## Why is it important? -1. identify potential bottlenecks that may affect the output -2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits - -## Which dashboard(s) does it exist in -N/A - -## How is it calculated? -This metric is calculated by summing the additions of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison. diff --git a/versioned_docs/version-v0.14/Metrics/BugAge.md b/versioned_docs/version-v0.14/Metrics/BugAge.md deleted file mode 100644 index 66cdcbad547..00000000000 --- a/versioned_docs/version-v0.14/Metrics/BugAge.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Bug Age" -description: > - Bug Age -sidebar_position: 9 ---- - -## What is this metric? -The amount of time it takes a bug to fix. - -## Why is it important? -1. Help the team to establish an effective hierarchical response mechanism for bugs. Focus on the resolution of important problems in the backlog. -2. Improve team's and individual's bug fixing efficiency. Identify good/to-be-improved practices that affect bug age age - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- Weekly Bug Retro - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "BUG". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-bug' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Bugs`. - - -## How to improve? -1. Observe the trend of bug age and locate the key reasons. -2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of bug age. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/BugCountPer1kLinesOfCode.md b/versioned_docs/version-v0.14/Metrics/BugCountPer1kLinesOfCode.md deleted file mode 100644 index 0c252e530d9..00000000000 --- a/versioned_docs/version-v0.14/Metrics/BugCountPer1kLinesOfCode.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "Bug Count per 1k Lines of Code" -description: > - Bug Count per 1k Lines of Code -sidebar_position: 12 ---- - -## What is this metric? -Amount of bugs per 1,000 lines of code. - -## Why is it important? -1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process -2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts -3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -The number of bugs divided by total accumulated lines of code (additions + deletions) in the given data range. - -Data Sources Required - -This metric relies on -- issues collected from Jira, GitHub or TAPD. -- commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -This metric relies on -- "Issue type mapping" in Jira, GitHub or TAPD's transformation rules page to let DevLake know what type(s) of issues can be regarded as bugs. -- "PR-Issue Mapping" in GitHub, GitLab's transformation rules page to let DevLake know the bugs are fixed by which PR/MRs. - - -## How to improve? -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc. -2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality -3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points -4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values diff --git a/versioned_docs/version-v0.14/Metrics/BuildCount.md b/versioned_docs/version-v0.14/Metrics/BuildCount.md deleted file mode 100644 index 50352bbc1c1..00000000000 --- a/versioned_docs/version-v0.14/Metrics/BuildCount.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Count" -description: > - Build Count -sidebar_position: 15 ---- - -## What is this metric? -The number of successful builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -This metric is calculated by counting the number of successful CI builds/pipelines/runs in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.14/Metrics/BuildDuration.md b/versioned_docs/version-v0.14/Metrics/BuildDuration.md deleted file mode 100644 index 1aa95385fd0..00000000000 --- a/versioned_docs/version-v0.14/Metrics/BuildDuration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Duration" -description: > - Build Duration -sidebar_position: 16 ---- - -## What is this metric? -The duration of successful builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -This metric is calculated by getting the duration of successful CI builds/pipelines/runs in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.14/Metrics/BuildSuccessRate.md b/versioned_docs/version-v0.14/Metrics/BuildSuccessRate.md deleted file mode 100644 index 401086d9632..00000000000 --- a/versioned_docs/version-v0.14/Metrics/BuildSuccessRate.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Build Success Rate" -description: > - Build Success Rate -sidebar_position: 17 ---- - -## What is this metric? -The ratio of successful builds to all builds. - -## Why is it important? -1. As a process indicator, it reflects the value flow efficiency of upstream production and research links -2. Identify excellent/to-be-improved practices that impact the build, and drive the team to precipitate reusable tools and mechanisms to build infrastructure for fast and high-frequency delivery - -## Which dashboard(s) does it exist in -- Jenkins - - -## How is it calculated? -The number of successful builds divided by the total number of builds in the given data range. - -Data Sources Required - -This metric relies on CI builds/pipelines/runs collected from Jenkins, GitLab or GitHub. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project dimension, compare the number of builds and success rate by combining the project phase and the complexity of tasks. -2. From the time dimension, analyze the trend of the number of builds and success rate to see if it has improved over time. diff --git a/versioned_docs/version-v0.14/Metrics/CFR.md b/versioned_docs/version-v0.14/Metrics/CFR.md deleted file mode 100644 index 91e4d4eb547..00000000000 --- a/versioned_docs/version-v0.14/Metrics/CFR.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "DORA - Change Failure Rate" -description: > - DORA - Change Failure Rate -sidebar_position: 21 ---- - -## What is this metric? -The percentage of changes that were made to a code that then resulted in incidents, rollbacks, or any type of production failure. - -## Why is it important? -Unlike Deployment Frequency and Lead Time for Changes that measure the throughput, Change Failure Rate measures the stability and quality of software delivery. A low CFR reflects a bad end-user experience as the production failure is relatively high. - -## Which dashboard(s) does it exist in -DORA dashboard. See [live demo](https://grafana-lake.demo.devlake.io/grafana/d/qNo8_0M4z/dora?orgId=1). - - -## How is it calculated? -The number of failures per the number of deployments. For example, if there are five deployments in a day and one causes a failure, that is a 20% change failure rate. - -Below are the benchmarks for different development teams from Google's report. However, it's difficult to tell which group a team falls into when the team's change failure rate is `18%` or `40%`. Therefore, DevLake provides its own benchmarks to address this problem: - -| Groups | Benchmarks | DevLake Benchmarks | -| -----------------| ----------------| -------------------| -| Elite performers | 0%-15% | 0%-15% | -| High performers | 16%-30% | 16-20% | -| Medium performers| 16%-30% | 21%-30% | -| Low performers | 16%-30% | > 30% | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on: -- `Deployments` collected in one of the following ways: - - Open APIs of Jenkins, GitLab, GitHub, etc. - - Webhook for general CI tools. - - Releases and PR/MRs from GitHub, GitLab APIs, etc. -- `Incidents` collected in one of the following ways: - - Issue tracking tools such as Jira, TAPD, GitHub, etc. - - Incident or Service Monitoring tools such as PagerDuty, ServiceNow, etc. - -Transformation Rules Required - -This metric relies on: -- Deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as `Deployments`. -- Incident configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - -SQL Queries - -If you want to measure the monthly trend of change failure rate as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/cfr-monthly.jpeg) - -``` -with _deployments as ( --- get the deployment count each month - SELECT - date_format(finished_date,'%y/%m') as month, - COUNT(distinct id) AS deployment_count - FROM - cicd_tasks - WHERE - type = 'DEPLOYMENT' - and result = 'SUCCESS' - GROUP BY 1 -), - -_incidents as ( --- get the incident count each month - SELECT - date_format(created_date,'%y/%m') as month, - COUNT(distinct id) AS incident_count - FROM - issues - WHERE - type = 'INCIDENT' - GROUP BY 1 -), - -_calendar_months as( --- deal with the month with no incidents - SELECT date_format(CAST((SYSDATE()-INTERVAL (month_index) MONTH) AS date), '%y/%m') as month - FROM ( SELECT 0 month_index - UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 - UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 - UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 - UNION ALL SELECT 10 UNION ALL SELECT 11 - ) month_index - WHERE (SYSDATE()-INTERVAL (month_index) MONTH) > SYSDATE()-INTERVAL 6 MONTH -) - -SELECT - cm.month, - case - when d.deployment_count is null or i.incident_count is null then 0 - else i.incident_count/d.deployment_count end as change_failure_rate -FROM - _calendar_months cm - left join _incidents i on cm.month = i.month - left join _deployments d on cm.month = d.month -ORDER BY 1 -``` - -If you want to measure in which category your team falls into as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/cfr-text.jpeg) - -``` -with _deployment_count as ( --- get the deployment deployed within the selected time period in the top-right corner - SELECT - COUNT(distinct id) AS deployment_count - FROM - cicd_tasks - WHERE - type = 'DEPLOYMENT' - and result = 'SUCCESS' - and $__timeFilter(finished_date) -), - -_incident_count as ( --- get the incident created within the selected time period in the top-right corner - SELECT - COUNT(distinct id) AS incident_count - FROM - issues - WHERE - type = 'INCIDENT' - and $__timeFilter(created_date) -) - -SELECT - case - when deployment_count is null or incident_count is null or deployment_count = 0 then NULL - when incident_count/deployment_count <= .15 then "0-15%" - when incident_count/deployment_count <= .20 then "16%-20%" - when incident_count/deployment_count <= .30 then "21%-30%" - else "> 30%" - end as change_failure_rate -FROM - _deployment_count, _incident_count -``` - -## How to improve? -- Add unit tests for all new feature -- "Shift left", start QA early and introduce more automated tests -- Enforce code review if it's not strictly executed diff --git a/versioned_docs/version-v0.14/Metrics/CodingTime.md b/versioned_docs/version-v0.14/Metrics/CodingTime.md deleted file mode 100644 index d788474810c..00000000000 --- a/versioned_docs/version-v0.14/Metrics/CodingTime.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "PR Coding Time" -description: > - PR Coding Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from the first commit until a PR is issued. - -## Why is it important? -It is recommended that you keep every task on a workable and manageable scale for a reasonably short amount of coding time. The average coding time of most engineering teams is around 3-4 days. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -Divide coding tasks into workable and manageable pieces. diff --git a/versioned_docs/version-v0.14/Metrics/CommitAuthorCount.md b/versioned_docs/version-v0.14/Metrics/CommitAuthorCount.md deleted file mode 100644 index 3be4ad20633..00000000000 --- a/versioned_docs/version-v0.14/Metrics/CommitAuthorCount.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Commit Author Count" -description: > - Commit Author Count -sidebar_position: 14 ---- - -## What is this metric? -The number of commit authors who have committed code. - -## Why is it important? -Take inventory of project/team R&D resource inputs, assess input-output ratio, and rationalize resource deployment. - - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -This metric is calculated by counting the number of commit authors in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -As a secondary indicator, this helps assess the labor cost of participating in coding. diff --git a/versioned_docs/version-v0.14/Metrics/CommitCount.md b/versioned_docs/version-v0.14/Metrics/CommitCount.md deleted file mode 100644 index ae85af8d2cd..00000000000 --- a/versioned_docs/version-v0.14/Metrics/CommitCount.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Commit Count" -description: > - Commit Count -sidebar_position: 6 ---- - -## What is this metric? -The number of commits created. - -## Why is it important? -1. Identify potential bottlenecks that may affect output -2. Encourage R&D practices of small step submissions and develop excellent coding habits - -## Which dashboard(s) does it exist in -- GitHub Release Quality and Contribution Analysis -- Demo-Is this month more productive than last? -- Demo-Commit Count by Author - -## How is it calculated? -This metric is calculated by counting the number of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -SQL Queries - -If you want to see the monthly trend, run the following SQL -``` - with _commits as( - SELECT - DATE_ADD(date(authored_date), INTERVAL -DAY(date(authored_date))+1 DAY) as time, - count(*) as commit_count - FROM commits - WHERE - message not like '%Merge%' - and $__timeFilter(authored_date) - group by 1 - ) - - SELECT - date_format(time,'%M %Y') as month, - commit_count as "Commit Count" - FROM _commits - ORDER BY time -``` - -## How to improve? -1. Identify the main reasons for the unusual number of commits and the possible impact on the number of commits through comparison -2. Evaluate whether the number of commits is reasonable in conjunction with more microscopic workload metrics (e.g. lines of code/code equivalents) diff --git a/versioned_docs/version-v0.14/Metrics/CycleTime.md b/versioned_docs/version-v0.14/Metrics/CycleTime.md deleted file mode 100644 index bbc98349ab8..00000000000 --- a/versioned_docs/version-v0.14/Metrics/CycleTime.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "PR Cycle Time" -description: > - PR Cycle Time -sidebar_position: 2 ---- - -## What is this metric? -PR Cycle Time is the sum of PR Coding Time, Pickup TIme, Review Time and Deploy Time. It is the total time from the first commit to when the PR is deployed. - -## Why is it important? -PR Cycle Time indicate the overall speed of the delivery progress in terms of PR. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -You can define `deployment` based on your actual practice. For a full list of `deployment`'s definitions that DevLake support, please refer to [Deployment Frequency](/docs/Metrics/DeploymentFrequency.md). - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Divide coding tasks into workable and manageable pieces; -2. Use DevLake's dashboards to monitor your delivery progress; -3. Have a habit to check for hanging PRs regularly; -4. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued; -2. Use automated tests for the initial work; -5. Reduce PR size; -6. Analyze the causes for long reviews. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/DeletedLinesOfCode.md b/versioned_docs/version-v0.14/Metrics/DeletedLinesOfCode.md deleted file mode 100644 index 218ceae0c54..00000000000 --- a/versioned_docs/version-v0.14/Metrics/DeletedLinesOfCode.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Deleted Lines of Code" -description: > - Deleted Lines of Code -sidebar_position: 8 ---- - -## What is this metric? -The accumulated number of deleted lines of code. - -## Why is it important? -1. identify potential bottlenecks that may affect the output -2. Encourage the team to implement a development model that matches the business requirements; develop excellent coding habits - -## Which dashboard(s) does it exist in -N/A - -## How is it calculated? -This metric is calculated by summing the deletions of commits in the given data range. - -Data Sources Required - -This metric relies on commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the project/team dimension, observe the accumulated change in Added lines to assess the team activity and code growth rate -2. From version cycle dimension, observe the active time distribution of code changes, and evaluate the effectiveness of project development model. -3. From the member dimension, observe the trend and stability of code output of each member, and identify the key points that affect code output by comparison. diff --git a/versioned_docs/version-v0.14/Metrics/DeployTime.md b/versioned_docs/version-v0.14/Metrics/DeployTime.md deleted file mode 100644 index d908480829f..00000000000 --- a/versioned_docs/version-v0.14/Metrics/DeployTime.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: "PR Deploy Time" -description: > - PR Deploy Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is merged to when it is deployed. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in? - - -## How is it calculated? -You can define `deployment` based on your actual practice. For a full list of `deployment`'s definitions that DevLake support, please refer to [Deployment Frequency](/docs/Metrics/DeploymentFrequency.md). - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -## How to improve? - diff --git a/versioned_docs/version-v0.14/Metrics/DeploymentFrequency.md b/versioned_docs/version-v0.14/Metrics/DeploymentFrequency.md deleted file mode 100644 index 90459adc593..00000000000 --- a/versioned_docs/version-v0.14/Metrics/DeploymentFrequency.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -title: "DORA - Deployment Frequency" -description: > - DORA - Deployment Frequency -sidebar_position: 18 ---- - -## What is this metric? -How often an organization deploys code to production or release it to end users. - -## Why is it important? -Deployment frequency reflects the efficiency of a team's deployment. A team that deploys more frequently can deliver the product faster and users' feature requirements can be met faster. - -## Which dashboard(s) does it exist in -DORA dashboard. See [live demo](https://grafana-lake.demo.devlake.io/grafana/d/qNo8_0M4z/dora?orgId=1). - - -## How is it calculated? -Deployment frequency is calculated based on the number of deployment days, not the number of deployments, e.g.,daily, weekly, monthly, yearly. - -Below are the benchmarks for different development teams from Google's report. DevLake uses the same benchmarks. - -| Groups | Benchmarks | DevLake Benchmarks | -| -----------------| --------------------------------------------- | ---------------------------------------------- | -| Elite performers | On-demand (multiple deploys per day) | On-demand | -| High performers | Between once per week and once per month | Between once per week and once per month | -| Medium performers| Between once per month and once every 6 months| Between once per month and once every 6 months | -| Low performers | Fewer than once per six months | Fewer than once per six months | - -

Source: 2021 Accelerate State of DevOps, Google

- - -Data Sources Required - -This metric relies on deployments collected in multiple ways: -- Open APIs of Jenkins, GitLab, GitHub, etc. -- Webhook for general CI tools. -- Releases and PR/MRs from GitHub, GitLab APIs, etc. - -Transformation Rules Required - -This metric relies on the deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as deployments. - -SQL Queries - -If you want to measure the monthly trend of deployment count as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/deployment-frequency-monthly.jpeg) - -``` -with _deployments as ( --- get the deployment count each month - SELECT - date_format(finished_date,'%y/%m') as month, - COUNT(distinct id) AS deployment_count - FROM - cicd_tasks - WHERE - type = 'DEPLOYMENT' - and result = 'SUCCESS' - GROUP BY 1 -), - -_calendar_months as( --- deal with the month with no deployments - SELECT date_format(CAST((SYSDATE()-INTERVAL (month_index) MONTH) AS date), '%y/%m') as month - FROM ( SELECT 0 month_index - UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 - UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 - UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 - UNION ALL SELECT 10 UNION ALL SELECT 11 - ) month_index - WHERE (SYSDATE()-INTERVAL (month_index) MONTH) > SYSDATE()-INTERVAL 6 MONTH -) - -SELECT - cm.month, - case when d.deployment_count is null then 0 else d.deployment_count end as deployment_count -FROM - _calendar_months cm - left join _deployments d on cm.month = d.month -ORDER BY 1 -``` - -If you want to measure in which category your team falls into as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/deployment-frequency-text.jpeg) - -``` -with last_few_calendar_months as( --- get the last few months within the selected time period in the top-right corner - SELECT CAST((SYSDATE()-INTERVAL (H+T+U) DAY) AS date) day - FROM ( SELECT 0 H - UNION ALL SELECT 100 UNION ALL SELECT 200 UNION ALL SELECT 300 - ) H CROSS JOIN ( SELECT 0 T - UNION ALL SELECT 10 UNION ALL SELECT 20 UNION ALL SELECT 30 - UNION ALL SELECT 40 UNION ALL SELECT 50 UNION ALL SELECT 60 - UNION ALL SELECT 70 UNION ALL SELECT 80 UNION ALL SELECT 90 - ) T CROSS JOIN ( SELECT 0 U - UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 - UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 - UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 - ) U - WHERE - (SYSDATE()-INTERVAL (H+T+U) DAY) > $__timeFrom() -), - -_days_weeks_deploy as( - SELECT - date(DATE_ADD(last_few_calendar_months.day, INTERVAL -WEEKDAY(last_few_calendar_months.day) DAY)) as week, - MAX(if(deployments.day is not null, 1, 0)) as week_deployed, - COUNT(distinct deployments.day) as days_deployed - FROM - last_few_calendar_months - LEFT JOIN( - SELECT - DATE(finished_date) AS day, - id - FROM cicd_tasks - WHERE - type = 'DEPLOYMENT' - and result = 'SUCCESS') deployments ON deployments.day = last_few_calendar_months.day - GROUP BY week - ), - -_monthly_deploy as( - SELECT - date(DATE_ADD(last_few_calendar_months.day, INTERVAL -DAY(last_few_calendar_months.day)+1 DAY)) as month, - MAX(if(deployments.day is not null, 1, 0)) as months_deployed - FROM - last_few_calendar_months - LEFT JOIN( - SELECT - DATE(finished_date) AS day, - id - FROM cicd_tasks - WHERE - type = 'DEPLOYMENT' - and result = 'SUCCESS') deployments ON deployments.day = last_few_calendar_months.day - GROUP BY month - ), - -_median_number_of_deployment_days_per_week as ( - SELECT x.days_deployed as median_number_of_deployment_days_per_week from _days_weeks_deploy x, _days_weeks_deploy y - GROUP BY x.days_deployed - HAVING SUM(SIGN(1-SIGN(y.days_deployed-x.days_deployed)))/COUNT(*) > 0.5 - LIMIT 1 -), - -_median_number_of_deployment_days_per_month as ( - SELECT x.months_deployed as median_number_of_deployment_days_per_month from _monthly_deploy x, _monthly_deploy y - GROUP BY x.months_deployed - HAVING SUM(SIGN(1-SIGN(y.months_deployed-x.months_deployed)))/COUNT(*) > 0.5 - LIMIT 1 -) - -SELECT - CASE - WHEN median_number_of_deployment_days_per_week >= 3 THEN 'On-demand' - WHEN median_number_of_deployment_days_per_week >= 1 THEN 'Between once per week and once per month' - WHEN median_number_of_deployment_days_per_month >= 1 THEN 'Between once per month and once every 6 months' - ELSE 'Fewer than once per six months' END AS 'Deployment Frequency' -FROM _median_number_of_deployment_days_per_week, _median_number_of_deployment_days_per_month -``` - -## How to improve? -- Trunk development. Work in small batches and often merge their work into shared trunks. -- Integrate CI/CD tools for automated deployment -- Improve automated test coverage diff --git a/versioned_docs/version-v0.14/Metrics/IncidentAge.md b/versioned_docs/version-v0.14/Metrics/IncidentAge.md deleted file mode 100644 index 4cd5e60cbb5..00000000000 --- a/versioned_docs/version-v0.14/Metrics/IncidentAge.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Incident Age" -description: > - Incident Age -sidebar_position: 10 ---- - -## What is this metric? -The amount of time it takes a incident to fix. - -## Why is it important? -1. Help the team to establish an effective hierarchical response mechanism for incidents. Focus on the resolution of important problems in the backlog. -2. Improve team's and individual's incident fixing efficiency. Identify good/to-be-improved practices that affect incident age - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "INCIDENT". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-incident' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - - -## How to improve? -1. Observe the trend of incident age and locate the key reasons. -2. According to the severity level, type (business, functional classification), affected module, source of bugs, count and observe the length of incident age. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/IncidentCountPer1kLinesOfCode.md b/versioned_docs/version-v0.14/Metrics/IncidentCountPer1kLinesOfCode.md deleted file mode 100644 index 9ad92787780..00000000000 --- a/versioned_docs/version-v0.14/Metrics/IncidentCountPer1kLinesOfCode.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Incident Count per 1k Lines of Code" -description: > - Incident Count per 1k Lines of Code -sidebar_position: 13 ---- - -## What is this metric? -Amount of incidents per 1,000 lines of code. - -## Why is it important? -1. Defect drill-down analysis to inform the development of design and code review strategies and to improve the internal QA process -2. Assist teams to locate projects/modules with higher defect severity and density, and clean up technical debts -3. Analyze critical points, identify good/to-be-improved practices that affect defect count or defect rate, to reduce the amount of future defects - -## Which dashboard(s) does it exist in -N/A - - -## How is it calculated? -The number of incidents divided by total accumulated lines of code (additions + deletions) in the given data range. - -Data Sources Required - -This metric relies on -- issues collected from Jira, GitHub or TAPD. -- commits collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -This metric relies on -- "Issue type mapping" in Jira, GitHub or TAPD's transformation rules page to let DevLake know what type(s) of issues can be regarded as incidents. -- "PR-Issue Mapping" in GitHub, GitLab's transformation rules page to let DevLake know the bugs are fixed by which PR/MRs. - -## How to improve? -1. From the project or team dimension, observe the statistics on the total number of defects, the distribution of the number of defects in each severity level/type/owner, the cumulative trend of defects, and the change trend of the defect rate in thousands of lines, etc. -2. From version cycle dimension, observe the statistics on the cumulative trend of the number of defects/defect rate, which can be used to determine whether the growth rate of defects is slowing down, showing a flat convergence trend, and is an important reference for judging the stability of software version quality -3. From the time dimension, analyze the trend of the number of test defects, defect rate to locate the key items/key points -4. Evaluate whether the software quality and test plan are reasonable by referring to CMMI standard values diff --git a/versioned_docs/version-v0.14/Metrics/LeadTimeForChanges.md b/versioned_docs/version-v0.14/Metrics/LeadTimeForChanges.md deleted file mode 100644 index 0c8dfc764bf..00000000000 --- a/versioned_docs/version-v0.14/Metrics/LeadTimeForChanges.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: "DORA - Lead Time for Changes" -description: > - DORA - Lead Time for Changes -sidebar_position: 19 ---- - -## What is this metric? -The median amount of time for a commit to be deployed into production. - -## Why is it important? -This metric measures the time it takes to commit code to the production environment and reflects the speed of software delivery. A lower average change preparation time means that your team is efficient at coding and deploying your project. - -## Which dashboard(s) does it exist in -DORA dashboard. See [live demo](https://grafana-lake.demo.devlake.io/grafana/d/qNo8_0M4z/dora?orgId=1). - - -## How is it calculated? -This metric is calculated by the median cycle time of the PRs deployed in a time range. A PR's cycle time is equal to the time a PR was deployed minus the PR's first commit's authored_date. - -![](https://i.imgur.com/edtqmRE.png) - -See the picture above, there were three deployments in the last month: Deploy-1, Deploy-2 and Deploy-3. Six PRs were deployed during the same period. - - Median Lead Time for Changes = The median cycle time of PR-1, PR-2, PR-3, PR-4, PR-5, PR-6 - -The way to calculate PR cycle time: -- PR-1 cycle time = Deploy-1's finished_date - PR-1's first commit's authored_date -- PR-2 cycle time = Deploy-2's finished_date - PR-2's first commit's authored_date -- PR-3 cycle time = Deploy-2's finished_date - PR-3's first commit's authored_date -- PR-4 cycle time = Deploy-3's finished_date - PR-4's first commit's authored_date -- PR-5 cycle time = Deploy-3's finished_date - PR-5's first commit's authored_date -- PR-6 cycle time = Deploy-3's finished_date - PR-6's first commit's authored_date - -PR cycle time is pre-calculated when dora plugin is triggered. You can connect to DevLake's database and find it in the field `change_timespan` in [table.pull_requests](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema/#pull_requests). - - -Below are the benchmarks for different development teams from Google's report. However, it's difficult to tell which group a team falls into when the team's median lead time for changes is `between one week and one month`. Therefore, DevLake provides its own benchmarks to address this problem: - -| Groups | Benchmarks | DevLake Benchmarks -| -----------------| -------------------------------------| --------------------------------| -| Elite performers | Less than one hour | Less than one hour | -| High performers | Between one day and one week | Less than one week | -| Medium performers| Between one month and six months | Between one week and six months | -| Low performers | More than six months | More than six months | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on deployments collected in multiple ways: -- Open APIs of Jenkins, GitLab, GitHub, etc. -- Webhook for general CI tools. -- Releases and PR/MRs from GitHub, GitLab APIs, etc. - -Transformation Rules Required - -This metric relies on the deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as deployments. - -SQL Queries - -If you want to measure the monthly trend of median lead time for changes as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/lead-time-for-changes-monthly.jpeg) - -``` -with _pr_stats as ( --- get PRs' cycle lead time in each month - SELECT - pr.id, - date_format(pr.merged_date,'%y/%m') as month, - pr.change_timespan as pr_cycle_time - FROM - pull_requests pr - WHERE - pr.merged_date is not null - and pr.change_timespan is not null - and $__timeFilter(pr.merged_date) -), - -_find_median_clt_each_month as ( - SELECT x.month, x.pr_cycle_time as med_change_lead_time - FROM _pr_stats x JOIN _pr_stats y ON x.month = y.month - GROUP BY x.month, x.pr_cycle_time - HAVING SUM(SIGN(1-SIGN(y.pr_cycle_time-x.pr_cycle_time)))/COUNT(*) > 0.5 -), - -_find_clt_rank_each_month as ( - SELECT - *, - rank() over(PARTITION BY month ORDER BY med_change_lead_time) as _rank - FROM - _find_median_clt_each_month -), - -_clt as ( - SELECT - month, - med_change_lead_time - from _find_clt_rank_each_month - WHERE _rank = 1 -), - -_calendar_months as( --- to deal with the month with no incidents - SELECT date_format(CAST((SYSDATE()-INTERVAL (month_index) MONTH) AS date), '%y/%m') as month - FROM ( SELECT 0 month_index - UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 - UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 - UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 - UNION ALL SELECT 10 UNION ALL SELECT 11 - ) month_index - WHERE (SYSDATE()-INTERVAL (month_index) MONTH) > SYSDATE()-INTERVAL 6 MONTH -) - -SELECT - cm.month, - case - when _clt.med_change_lead_time is null then 0 - else _clt.med_change_lead_time/60 end as med_change_lead_time_in_hour -FROM - _calendar_months cm - left join _clt on cm.month = _clt.month -ORDER BY 1 -``` - -If you want to measure in which category your team falls into as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/lead-time-for-changes-text.jpeg) - -``` -with _pr_stats as ( --- get PRs' cycle time in the selected period - SELECT - pr.id, - pr.change_timespan as pr_cycle_time - FROM - pull_requests pr - WHERE - pr.merged_date is not null - and pr.change_timespan is not null - and $__timeFilter(pr.merged_date) -), - -_median_change_lead_time as ( --- use median PR cycle time as the median change lead time - SELECT x.pr_cycle_time as median_change_lead_time from _pr_stats x, _pr_stats y - GROUP BY x.pr_cycle_time - HAVING SUM(SIGN(1-SIGN(y.pr_cycle_time-x.pr_cycle_time)))/COUNT(*) > 0.5 - LIMIT 1 -) - -SELECT - CASE - WHEN median_change_lead_time < 60 then "Less than one hour" - WHEN median_change_lead_time < 7 * 24 * 60 then "Less than one week" - WHEN median_change_lead_time < 180 * 24 * 60 then "Between one week and six months" - ELSE "More than six months" - END as median_change_lead_time -FROM _median_change_lead_time -``` - -## How to improve? -- Break requirements into smaller, more manageable deliverables -- Optimize the code review process -- "Shift left", start QA early and introduce more automated tests -- Integrate CI/CD tools to automate the deployment process diff --git a/versioned_docs/version-v0.14/Metrics/MTTR.md b/versioned_docs/version-v0.14/Metrics/MTTR.md deleted file mode 100644 index 8fa33fb6b91..00000000000 --- a/versioned_docs/version-v0.14/Metrics/MTTR.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: "DORA - Median Time to Restore Service" -description: > - DORA - Median Time to Restore Service -sidebar_position: 20 ---- - -## What is this metric? -The time to restore service after service incidents, rollbacks, or any type of production failure happened. - -## Why is it important? -This metric is essential to measure the disaster control capability of your team and the robustness of the software. - -## Which dashboard(s) does it exist in -DORA dashboard. See [live demo](https://grafana-lake.demo.devlake.io/grafana/d/qNo8_0M4z/dora?orgId=1). - - -## How is it calculated? -MTTR = Total [incident age](./IncidentAge.md) (in hours)/number of incidents. - -If you have three incidents that happened in the given data range, one lasting 1 hour, one lasting 2 hours and one lasting 3 hours. Your MTTR will be: (1 + 2 + 3) / 3 = 2 hours. - -Below are the benchmarks for different development teams from Google's report. However, it's difficult to tell which group a team falls into when the team's median time to restore service is `between one week and six months`. Therefore, DevLake provides its own benchmarks to address this problem: - -| Groups | Benchmarks | DevLake Benchmarks -| -----------------| -------------------------------------| -------------------------------| -| Elite performers | Less than one hour | Less than one hour | -| High performers | Less one day | Less than one day | -| Medium performers| Between one day and one week | Between one day and one week | -| Low performers | More than six months | More than one week | - -

Source: 2021 Accelerate State of DevOps, Google

- -Data Sources Required - -This metric relies on: -- `Deployments` collected in one of the following ways: - - Open APIs of Jenkins, GitLab, GitHub, etc. - - Webhook for general CI tools. - - Releases and PR/MRs from GitHub, GitLab APIs, etc. -- `Incidents` collected in one of the following ways: - - Issue tracking tools such as Jira, TAPD, GitHub, etc. - - Incident or Service Monitoring tools such as PagerDuty, ServiceNow, etc. - -Transformation Rules Required - -This metric relies on: -- Deployment configuration in Jenkins, GitLab or GitHub transformation rules to let DevLake know what CI builds/jobs can be regarded as `Deployments`. -- Incident configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Incidents`. - -SQL Queries - -If you want to measure the monthly trend of median time to restore service as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/mttr-monthly.jpeg) - -``` -with _incidents as ( --- get the incident count each month - SELECT - date_format(created_date,'%y/%m') as month, - cast(lead_time_minutes as signed) as lead_time_minutes - FROM - issues - WHERE - type = 'INCIDENT' -), - -_find_median_mttr_each_month as ( - SELECT - x.* - from _incidents x join _incidents y on x.month = y.month - WHERE x.lead_time_minutes is not null and y.lead_time_minutes is not null - GROUP BY x.month, x.lead_time_minutes - HAVING SUM(SIGN(1-SIGN(y.lead_time_minutes-x.lead_time_minutes)))/COUNT(*) > 0.5 -), - -_find_mttr_rank_each_month as ( - SELECT - *, - rank() over(PARTITION BY month ORDER BY lead_time_minutes) as _rank - FROM - _find_median_mttr_each_month -), - -_mttr as ( - SELECT - month, - lead_time_minutes as med_time_to_resolve - from _find_mttr_rank_each_month - WHERE _rank = 1 -), - -_calendar_months as( --- deal with the month with no incidents - SELECT date_format(CAST((SYSDATE()-INTERVAL (month_index) MONTH) AS date), '%y/%m') as month - FROM ( SELECT 0 month_index - UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 - UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 - UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 - UNION ALL SELECT 10 UNION ALL SELECT 11 - ) month_index - WHERE (SYSDATE()-INTERVAL (month_index) MONTH) > SYSDATE()-INTERVAL 6 MONTH -) - -SELECT - cm.month, - case - when m.med_time_to_resolve is null then 0 - else m.med_time_to_resolve/60 end as med_time_to_resolve_in_hour -FROM - _calendar_months cm - left join _mttr m on cm.month = m.month -ORDER BY 1 -``` - -If you want to measure in which category your team falls into as the picture shown below, run the following SQL in Grafana. - -![](/img/Metrics/mttr-text.jpeg) - -``` -with _incidents as ( --- get the incidents created within the selected time period in the top-right corner - SELECT - cast(lead_time_minutes as signed) as lead_time_minutes - FROM - issues - WHERE - type = 'INCIDENT' - and $__timeFilter(created_date) -), - -_median_mttr as ( - SELECT - x.lead_time_minutes as med_time_to_resolve - from _incidents x, _incidents y - WHERE x.lead_time_minutes is not null and y.lead_time_minutes is not null - GROUP BY x.lead_time_minutes - HAVING SUM(SIGN(1-SIGN(y.lead_time_minutes-x.lead_time_minutes)))/COUNT(*) > 0.5 - LIMIT 1 -) - -SELECT - case - WHEN med_time_to_resolve < 60 then "Less than one hour" - WHEN med_time_to_resolve < 24 * 60 then "Less than one Day" - WHEN med_time_to_resolve < 7 * 24 * 60 then "Between one day and one week" - ELSE "More than one week" - END as med_time_to_resolve -FROM - _median_mttr -``` - -## How to improve? -- Use automated tools to quickly report failure -- Prioritize recovery when a failure happens -- Establish a go-to action plan to respond to failures immediately -- Reduce the deployment time for failure-fixing diff --git a/versioned_docs/version-v0.14/Metrics/MergeRate.md b/versioned_docs/version-v0.14/Metrics/MergeRate.md deleted file mode 100644 index c8c274338c9..00000000000 --- a/versioned_docs/version-v0.14/Metrics/MergeRate.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "PR Merge Rate" -description: > - Pull Request Merge Rate -sidebar_position: 12 ---- - -## What is this metric? -The ratio of PRs/MRs that get merged. - -## Why is it important? -1. Code review metrics are process indicators to provide quick feedback on developers' code quality -2. Promote the team to establish a unified coding specification and standardize the code review criteria -3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- GitLab -- Weekly Community Retro -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -The number of merged PRs divided by the number of all PRs in the given data range. - -Data Sources Required - -This metric relies on PRs/MRs collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - - -## How to improve? -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds. -2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds. -3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks. diff --git a/versioned_docs/version-v0.14/Metrics/PRCount.md b/versioned_docs/version-v0.14/Metrics/PRCount.md deleted file mode 100644 index 4521e78617a..00000000000 --- a/versioned_docs/version-v0.14/Metrics/PRCount.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Pull Request Count" -description: > - Pull Request Count -sidebar_position: 11 ---- - -## What is this metric? -The number of pull requests created. - -## Why is it important? -1. Code review metrics are process indicators to provide quick feedback on developers' code quality -2. Promote the team to establish a unified coding specification and standardize the code review criteria -3. Identify modules with low-quality risks in advance, optimize practices, and precipitate into reusable knowledge and tools to avoid technical debt accumulation - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- GitLab -- Weekly Community Retro -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is calculated by counting the number of PRs in the given data range. - -Data Sources Required - -This metric relies on PRs/MRs collected from GitHub, GitLab or BitBucket. - -Transformation Rules Required - -N/A - -## How to improve? -1. From the developer dimension, we evaluate the code quality of developers by combining the task complexity with the metrics related to the number of review passes and review rounds. -2. From the reviewer dimension, we observe the reviewer's review style by taking into account the task complexity, the number of passes and the number of review rounds. -3. From the project/team dimension, we combine the project phase and team task complexity to aggregate the metrics related to the number of review passes and review rounds, and identify the modules with abnormal code review process and possible quality risks. diff --git a/versioned_docs/version-v0.14/Metrics/PRSize.md b/versioned_docs/version-v0.14/Metrics/PRSize.md deleted file mode 100644 index bf6a87d82d9..00000000000 --- a/versioned_docs/version-v0.14/Metrics/PRSize.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "PR Size" -description: > - PR Size -sidebar_position: 2 ---- - -## What is this metric? -The average code changes (in Lines of Code) of PRs in the selected time range. - -## Why is it important? -Small PRs can reduce risks of introducing new bugs and increase code review quality, as problems may often be hidden in big chuncks of code and difficult to identify. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is calculated by counting the total number of code changes (in LOC) divided by the total number of PRs in the selected time range. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Divide coding tasks into workable and manageable pieces; -1. Encourage developers to submit small PRs and only keep related changes in the same PR. diff --git a/versioned_docs/version-v0.14/Metrics/PickupTime.md b/versioned_docs/version-v0.14/Metrics/PickupTime.md deleted file mode 100644 index 07242ae772b..00000000000 --- a/versioned_docs/version-v0.14/Metrics/PickupTime.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "PR Pickup Time" -description: > - PR Pickup Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is issued until the first comment is added to that PR. - -## Why is it important? -PR Pickup Time shows how engaged your team is in collaborative work by identifying the delay in picking up PRs. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboard to monitor your delivery progress; -2. Have a habit to check for hanging PRs regularly; -3. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued. diff --git a/versioned_docs/version-v0.14/Metrics/RequirementCount.md b/versioned_docs/version-v0.14/Metrics/RequirementCount.md deleted file mode 100644 index e9a6bd32981..00000000000 --- a/versioned_docs/version-v0.14/Metrics/RequirementCount.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Requirement Count" -description: > - Requirement Count -sidebar_position: 2 ---- - -## What is this metric? -The number of delivered requirements or features. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -This metric is calculated by counting the number of delivered issues in type "REQUIREMENT" in the given data range. - -Data Sources Required - -This metric relies on the issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - -SQL Queries - -If you want to see a single count, run the following SQL in Grafana -``` - select - count(*) as "Requirement Count" - from issues i - join board_issues bi on i.id = bi.issue_id - where - i.type = 'REQUIREMENT' - and i.status = 'DONE' - -- this is the default variable in Grafana - and $__timeFilter(i.created_date) - and bi.board_id in ($board_id) -``` - -If you want to see the monthly trend, run the following SQL -``` - SELECT - DATE_ADD(date(i.created_date), INTERVAL -DAYOFMONTH(date(i.created_date))+1 DAY) as time, - count(distinct case when status != 'DONE' then i.id else null end) as "Number of Open Issues", - count(distinct case when status = 'DONE' then i.id else null end) as "Number of Delivered Issues" - FROM issues i - join board_issues bi on i.id = bi.issue_id - join boards b on bi.board_id = b.id - WHERE - i.type = 'REQUIREMENT' - and i.status = 'DONE' - and $__timeFilter(i.created_date) - and bi.board_id in ($board_id) - GROUP by 1 -``` - -## How to improve? -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog. diff --git a/versioned_docs/version-v0.14/Metrics/RequirementDeliveryRate.md b/versioned_docs/version-v0.14/Metrics/RequirementDeliveryRate.md deleted file mode 100644 index eb0a03133d5..00000000000 --- a/versioned_docs/version-v0.14/Metrics/RequirementDeliveryRate.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "Requirement Delivery Rate" -description: > - Requirement Delivery Rate -sidebar_position: 3 ---- - -## What is this metric? -The ratio of delivered requirements to all requirements. - -## Why is it important? -1. Based on historical data, establish a baseline of the delivery capacity of a single iteration to improve the organization and planning of R&D resources. -2. Evaluate whether the delivery capacity matches the business phase and demand scale. Identify key bottlenecks and reasonably allocate resources. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -The number of delivered requirements divided by the total number of requirements in the given data range. - -Data Sources Required - -This metric relies on the issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the number of requirements and delivery rate of different time cycles to find the stability and trend of the development process. -2. Analyze and compare the number of requirements delivered and delivery rate of each project/team, and compare the scale of requirements of different projects. -3. Based on historical data, establish a baseline of the delivery capacity of a single iteration (optimistic, probable and pessimistic values) to provide a reference for iteration estimation. -4. Drill down to analyze the number and percentage of requirements in different phases of SDLC. Analyze rationality and identify the requirements stuck in the backlog. diff --git a/versioned_docs/version-v0.14/Metrics/RequirementGranularity.md b/versioned_docs/version-v0.14/Metrics/RequirementGranularity.md deleted file mode 100644 index 03bb91767f5..00000000000 --- a/versioned_docs/version-v0.14/Metrics/RequirementGranularity.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Requirement Granularity" -description: > - Requirement Granularity -sidebar_position: 5 ---- - -## What is this metric? -The average number of story points per requirement. - -## Why is it important? -1. Promote product teams to split requirements carefully, improve requirements quality, help developers understand requirements clearly, deliver efficiently and with high quality, and improve the project management capability of the team. -2. Establish a data-supported workload estimation model to help R&D teams calibrate their estimation methods and more accurately assess the granularity of requirements, which is useful to achieve better issue planning in project management. - -## Which dashboard(s) does it exist in -- Jira -- GitHub - - -## How is it calculated? -The average story points of issues in type "REQUIREMENT" in the given data range. - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the story points/requirement lead time of requirements to evaluate whether the ticket size, ie. requirement complexity is optimal. -2. Compare the estimated requirement granularity with the actual situation and evaluate whether the difference is reasonable by combining more microscopic workload metrics (e.g. lines of code/code equivalents) diff --git a/versioned_docs/version-v0.14/Metrics/RequirementLeadTime.md b/versioned_docs/version-v0.14/Metrics/RequirementLeadTime.md deleted file mode 100644 index 74061d63dec..00000000000 --- a/versioned_docs/version-v0.14/Metrics/RequirementLeadTime.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "Requirement Lead Time" -description: > - Requirement Lead Time -sidebar_position: 4 ---- - -## What is this metric? -The amount of time it takes a requirement to deliver. - -## Why is it important? -1. Analyze key projects and critical points, identify good/to-be-improved practices that affect requirement lead time, and reduce the risk of delays -2. Focus on the end-to-end velocity of value delivery process; coordinate different parts of R&D to avoid efficiency shafts; make targeted improvements to bottlenecks. - -## Which dashboard(s) does it exist in -- Jira -- GitHub -- Community Experience - - -## How is it calculated? -This metric equals to `resolution_date` - `created_date` of issues in type "REQUIREMENT". - -Data Sources Required - -This metric relies on issues collected from Jira, GitHub, or TAPD. - -Transformation Rules Required - -This metric relies on the 'type-requirement' configuration in Jira, GitHub or TAPD transformation rules to let DevLake know what CI builds/jobs can be regarded as `Requirements`. - - -## How to improve? -1. Analyze the trend of requirement lead time to observe if it has improved over time. -2. Analyze and compare the requirement lead time of each project/team to identify key projects with abnormal lead time. -3. Drill down to analyze a requirement's staying time in different phases of SDLC. Analyze the bottleneck of delivery velocity and improve the workflow. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/ReviewDepth.md b/versioned_docs/version-v0.14/Metrics/ReviewDepth.md deleted file mode 100644 index 59bcfbe876c..00000000000 --- a/versioned_docs/version-v0.14/Metrics/ReviewDepth.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "PR Review Depth" -description: > - PR Review Depth -sidebar_position: 2 ---- - -## What is this metric? -The average number of comments of PRs in the selected time range. - -## Why is it important? -PR Review Depth (in Comments per RR) is related to the quality of code review, indicating how thorough your team reviews PRs. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - -## How is it calculated? -This metric is calculated by counting the total number of PR comments divided by the total number of PRs in the selected time range. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Encourage multiple reviewers to review a PR; -2. Review Depth is an indicator for generally how thorough your PRs are reviewed, but it does not mean the deeper the better. In some cases, spending an excessive amount of resources on reviewing PRs is also not recommended. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/ReviewTime.md b/versioned_docs/version-v0.14/Metrics/ReviewTime.md deleted file mode 100644 index 8cfe080b0cc..00000000000 --- a/versioned_docs/version-v0.14/Metrics/ReviewTime.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "PR Review Time" -description: > - PR Review Time -sidebar_position: 2 ---- - -## What is this metric? -The time it takes to complete a code review of a PR before it gets merged. - -## Why is it important? -Code review should be conducted almost in real-time and usually take less than two days. Abnormally long PR Review Time may indicate one or more of the following problems: -1. The PR size is too large that makes it difficult to review. -2. The team is too busy to review code. - -## Which dashboard(s) does it exist in? -- Engineering Throughput and Cycle Time -- Engineering Throughput and Cycle Time - Team View - - -## How is it calculated? -This metric is the time frame between when the first comment is added to a PR, to when the PR is merged. - -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboards to monitor your delivery progress; -2. Use automated tests for the initial work; -3. Reduce PR size; -4. Analyze the causes for long reviews. \ No newline at end of file diff --git a/versioned_docs/version-v0.14/Metrics/TimeToMerge.md b/versioned_docs/version-v0.14/Metrics/TimeToMerge.md deleted file mode 100644 index 04a39225fe0..00000000000 --- a/versioned_docs/version-v0.14/Metrics/TimeToMerge.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "PR Time To Merge" -description: > - PR Time To Merge -sidebar_position: 2 ---- - -## What is this metric? -The time it takes from when a PR is issued to when it is merged. Essentially, PR Time to Merge = PR Pickup Time + PR Review Time. - -## Why is it important? -The delay of reviewing and waiting to review PRs has large impact on delivery speed, while reasonably short PR Time to Merge can indicate frictionless teamwork. Improving on this metric is the key to reduce PR cycle time. - -## Which dashboard(s) does it exist in? -- GitHub Basic Metrics -- Bi-weekly Community Retro - - -## How is it calculated? -Data Sources Required - -This metric relies on PR/MRs collected from GitHub or GitLab. - -Transformation Rules Required - -N/A - -SQL Queries - - -## How to improve? -1. Use DevLake's dashboards to monitor your delivery progress; -2. Have a habit to check for hanging PRs regularly; -3. Set up alerts for your communication tools (e.g. Slack, Lark) when new PRs are issued; -4. Reduce PR size; -5. Analyze the causes for long reviews. diff --git a/versioned_docs/version-v0.14/Metrics/_category_.json b/versioned_docs/version-v0.14/Metrics/_category_.json deleted file mode 100644 index e944147d528..00000000000 --- a/versioned_docs/version-v0.14/Metrics/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Metrics", - "position": 5, - "link":{ - "type": "generated-index", - "slug": "Metrics" - } -} diff --git a/versioned_docs/version-v0.14/Overview/Architecture.md b/versioned_docs/version-v0.14/Overview/Architecture.md deleted file mode 100755 index 47bd4b48e03..00000000000 --- a/versioned_docs/version-v0.14/Overview/Architecture.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Architecture" -description: > - Understand the architecture of Apache DevLake -sidebar_position: 2 ---- - -## Overview - -

-

DevLake Components

- -A DevLake installation typically consists of the following components: - -- Config UI: A handy user interface to create, trigger, and debug Blueprints. A Blueprint specifies the where (data connection), what (data scope), how (transformation rule), and when (sync frequency) of a data pipeline. -- API Server: The main programmatic interface of DevLake. -- Runner: The runner does all the heavy-lifting for executing tasks. In the default DevLake installation, it runs within the API Server, but DevLake provides a temporal-based runner (beta) for production environments. -- Database: The database stores both DevLake's metadata and user data collected by data pipelines. DevLake supports MySQL and PostgreSQL as of v0.11. -- Plugins: Plugins enable DevLake to collect and analyze dev data from any DevOps tools with an accessible API. DevLake community is actively adding plugins for popular DevOps tools, but if your preferred tool is not covered yet, feel free to open a GitHub issue to let us know or check out our doc on how to build a new plugin by yourself. -- Dashboards: Dashboards deliver data and insights to DevLake users. A dashboard is simply a collection of SQL queries along with corresponding visualization configurations. DevLake's official dashboard tool is Grafana and pre-built dashboards are shipped in Grafana's JSON format. Users are welcome to swap for their own choice of dashboard/BI tool if desired. - -## Dataflow - -

-

DevLake Dataflow

- -A typical plugin's dataflow is illustrated below: - -1. The Raw layer stores the API responses from data sources (DevOps tools) in JSON. This saves developers' time if the raw data is to be transformed differently later on. Please note that communicating with data sources' APIs is usually the most time-consuming step. -2. The Tool layer extracts raw data from JSONs into a relational schema that's easier to consume by analytical tasks. Each DevOps tool would have a schema that's tailored to their data structure, hence the name, the Tool layer. -3. The Domain layer attempts to build a layer of abstraction on top of the Tool layer so that analytics logics can be re-used across different tools. For example, GitHub's Pull Request (PR) and GitLab's Merge Request (MR) are similar entities. They each have their own table name and schema in the Tool layer, but they're consolidated into a single entity in the Domain layer, so that developers only need to implement metrics like Cycle Time and Code Review Rounds once against the domain layer schema. - -## Principles - -1. Extensible: DevLake's plugin system allows users to integrate with any DevOps tool. DevLake also provides a dbt plugin that enables users to define their own data transformation and analysis workflows. -2. Portable: DevLake has a modular design and provides multiple options for each module. Users of different setups can freely choose the right configuration for themselves. -3. Robust: DevLake provides an SDK to help plugins efficiently and reliably collect data from data sources while respecting their API rate limits and constraints. - -
diff --git a/versioned_docs/version-v0.14/Overview/Introduction.md b/versioned_docs/version-v0.14/Overview/Introduction.md deleted file mode 100755 index 6bb1194941f..00000000000 --- a/versioned_docs/version-v0.14/Overview/Introduction.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Introduction" -description: General introduction of Apache DevLake -sidebar_position: 1 ---- - -## What is Apache DevLake? -Apache DevLake (Incubating) is an open-source dev data platform that ingests, analyzes, and visualizes the fragmented data from DevOps tools to extract insights for engineering excellence, developer experience, and community growth. - -Apache DevLake is designed for developer teams looking to make better sense of their development process and to bring a more data-driven approach to their own practices. You can ask Apache DevLake many questions regarding your development process. Just connect and query. - -## What can be accomplished with DevLake? -1. Collect DevOps data across the entire Software Development Life Cycle (SDLC) and connect the siloed data with a standard [data model](../DataModels/DevLakeDomainLayerSchema.md). -2. Visualize out-of-the-box [engineering metrics](../Metrics) in a series of use-case driven dashboards -3. Easily extend DevLake to support your data sources, metrics, and dashboards with a flexible [framework](Architecture.md) for data collection and ETL (Extract, Transform, Load). - -## How do I use DevLake? -### 1. Set up DevLake -You can easily set up Apache DevLake by following our step-by step instructions for [Docker Compose setup](../GettingStarted/DockerComposeSetup.md) or [Kubernetes setup](../GettingStarted/KubernetesSetup.md). - -### 2. Create a Blueprint -The DevLake Configuration UI will guide you through the process (a Blueprint) to define the data connections, data scope, transformation and sync frequency of the data you wish to collect. - -![img](/img/Introduction/userflow1.svg) - -### 3. Track the Blueprint's progress -You can track the progress of the Blueprint you have just set up. - -![img](/img/Introduction/userflow2.svg) - -### 4. View the pre-built dashboards -Once the first run of the Blueprint is completed, you can view the corresponding dashboards. - -![img](/img/Introduction/userflow3.png) - -### 5. Customize the dashboards with SQL -If the pre-built dashboards are limited for your use cases, you can always customize or create your own metrics or dashboards with SQL. - -![img](/img/Introduction/userflow4.png) diff --git a/versioned_docs/version-v0.14/Overview/Roadmap.md b/versioned_docs/version-v0.14/Overview/Roadmap.md deleted file mode 100644 index fac85f876e0..00000000000 --- a/versioned_docs/version-v0.14/Overview/Roadmap.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "Roadmap" -description: > - The goals and roadmap for DevLake in 2022 -sidebar_position: 3 ---- - -## Goals - -DevLake has joined the Apache Incubator and is aiming to become a top-level project. To achieve this goal, the Apache DevLake (Incubating) community will continue to make efforts in helping development teams to analyze and improve their engineering productivity. In the 2022 Roadmap, we have summarized three major goals followed by the feature breakdown to invite the broader community to join us and grow together. - -1. As a dev data analysis application, discover and implement 3 (or even more!) usage scenarios: - - A collection of metrics to track the contribution, quality and growth of open-source projects - - DORA metrics for DevOps engineers - - To be decided ([let us know](https://join.slack.com/t/devlake-io/shared_invite/zt-17b6vuvps-x98pqseoUagM7EAmKC82xQ) if you have any suggestions!) -2. As dev data infrastructure, provide robust data collection modules, customizable data models, and data extensibility. -3. Design better user experience for end-users and contributors. - -## Feature Breakdown - -Apache DevLake is currently under rapid development. You are more than welcome to use the following table to explore your intereted features and make contributions. We deeply appreciate the collective effort of our community to make this project possible! - -| Category | Features | -| --------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| More data sources across different [DevOps domains](../DataModels/DevLakeDomainLayerSchema.md) (Goal No.1 & 2) | Features in **bold** are of higher priority

Issue/Task Management:
  • **Jira server** [#886 (closed)](https://github.com/apache/incubator-devlake/issues/886)
  • **Jira data center** [#1687 (closed)](https://github.com/apache/incubator-devlake/issues/1687)
  • GitLab Issues [#715 (closed)](https://github.com/apache/incubator-devlake/issues/715)
  • Trello [#1881 (closed)](https://github.com/apache/incubator-devlake/issues/1881)
  • **TAPD** [#560 (closed)](https://github.com/apache/incubator-devlake/issues/560)
  • Teambition [#1882 (open)](https://github.com/apache/incubator-devlake/issues/1882)
  • Ones [#1884 (closed)](https://github.com/apache/incubator-devlake/issues/1884)
Source Code Management:
  • BitBucket [#2100 (closed)](https://github.com/apache/incubator-devlake/issues/2100)
  • Gitee [#1883 (closed)](https://github.com/apache/incubator-devlake/issues/1883)
  • Coder [#3447 (open)](https://github.com/apache/incubator-devlake/issues/3447)
Code Review:
  • Gerrit
CI/CD:
  • GitHub Action
  • ArgoCI [#2585 (closed)](https://github.com/apache/incubator-devlake/issues/2585)
  • ArgoCD
  • TeamCity
Quality:
  • **SonarQube** [#2305 (open)](https://github.com/apache/incubator-devlake/issues/2305)
  • Coverity
QA:
  • Selenium
  • Junit
  • JMeter
  • Cucumber Test
Calendar:
  • Google Calendar
  • Zoom Calendar
  • Lark Calendar
  • Tencent Calendar
OSS Community Metrics:
  • GitHub stars, clones, watches
| -| Improved data collection, [data models](../DataModels/DevLakeDomainLayerSchema.md) and data extensibility (Goal No.2) | Data Collection:
  • Complete the logging system
  • Implement a good error handling mechanism during data collection
Data Models:
  • Introduce DBT to allow users to create and modify the domain layer schema. [#1479 (closed)](https://github.com/apache/incubator-devlake/issues/1479)
  • Design the data models for 5 new domains, please refers to the data models of the tools under each domain (see the cell above):
    • Quality
    • Testing
    • Calendar
    • Documentation
    • OSS Community Metrics
  • Polish the data models for [existing domains](../DataModels/DevLakeDomainLayerSchema.md): Issue/Task Management, Source Code Management, Code Review and CI/CD.
Data Extensibility:
  • Enhance the performance of data application under large-scaled usage scenarios
  • Support OLAP databases for more flexible data storage options
| -| Better user experience (Goal No.3) | For new users:
  • Iterate on a clearer step-by-step guide to improve the pre-configuration experience.
  • Provide a new Config UI to reduce frictions for data configuration [#1700 (in-progress)](https://github.com/apache/incubator-devlake/issues/1700)
  • Showcase dashboard live demos to let users explore and learn about the dashboards. [#1784 (open)](https://github.com/apache/incubator-devlake/issues/1784)
For returning users:
  • Provide detailed guides to help users customize Grafana dashboards.
  • Work on the documentation for advanced features in the Config UI, such as the usage of Advanced Mode and replacements of old auth tokens for data connections.
For contributors:
  • Add more guide to set up DevLake in different operating system.
  • Provide clearer docs for contributors to get on board easier.
  • Add Swagger to document API [#292 closed](https://github.com/apache/incubator-devlake/issues/292)
  • More docs about raw/tool/domain data models
| - -## How to Influence the Roadmap - -A roadmap is only useful when it captures real user needs. We are glad to hear from you if you have specific use cases, feedback, or ideas. You can submit an issue to let us know! -Also, if you plan to work (or are already working) on a new or existing feature, tell us, so that we can update the roadmap accordingly. We are happy to share knowledge and context to help your feature land successfully. -


diff --git a/versioned_docs/version-v0.14/Overview/_category_.json b/versioned_docs/version-v0.14/Overview/_category_.json deleted file mode 100644 index 3e819ddc4ff..00000000000 --- a/versioned_docs/version-v0.14/Overview/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Overview", - "position": 1, - "link":{ - "type": "generated-index", - "slug": "Overview" - } -} diff --git a/versioned_docs/version-v0.14/Plugins/_category_.json b/versioned_docs/version-v0.14/Plugins/_category_.json deleted file mode 100644 index bbea8d5910c..00000000000 --- a/versioned_docs/version-v0.14/Plugins/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Plugins", - "position": 9, - "link":{ - "type": "generated-index", - "slug": "Plugins" - } -} diff --git a/versioned_docs/version-v0.14/Plugins/bitbucket.md b/versioned_docs/version-v0.14/Plugins/bitbucket.md deleted file mode 100644 index 3543b06dac9..00000000000 --- a/versioned_docs/version-v0.14/Plugins/bitbucket.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: "BitBucket (WIP)" -description: > - BitBucket Plugin ---- - - - -## Summary - -This plugin collects various entities from Bitbucket, including pull requests, issues, comments, pipelines, git commits, and etc. - -As of v0.14.2, `bitbucket` plugin can only be invoked through DevLake API. Its support in Config-UI is WIP. - - -## Usage via DevLake API - -> Note: Please replace the `http://localhost:8080` in the sample requests with your actual DevLake API endpoint. For how to view DevLake API's swagger documentation, please refer to the "Using DevLake API" section of [Developer Setup](../DeveloperManuals/DeveloperSetup.md). - - -1. Create a Bitbucket data connection: `POST /plugins/bitbucket/connections`. Please see a sample request below: - -``` -curl --location --request POST 'http://localhost:8080/plugins/bitbucket/connections' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "endpoint": "https://api.bitbucket.org/2.0/", - "username": "", - "password": "", - "name": "Bitbucket Cloud" -}' -``` - -2. Create a blueprint to collect data from Bitbucket: `POST /blueprints`. Please see a sample request below: - -``` -curl --location --request POST 'http://localhost:8080/blueprints' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "enable": true, - "mode": "NORMAL", - "name": "My Bitbucket Blueprint", - "cronConfig": "", - "isManual": false, - "plan": [[]], - "settings": { - "connections": [ - { - "plugin": "bitbucket", - "connectionId": 1, - "scope": [ - { - "entities": [ - "CODE", - "TICKET", - "CODEREVIEW", - "CROSS" - ], - "options": { - "owner": "", - "repo": "" - } - } - ] - } - ], - "version": "1.0.0" - } -}' -``` - -3. [Optional] Trigger the blueprint manually: `POST /blueprints/{blueprintId}/trigger`. Run this step if you want to trigger the newly created blueprint right away. See an example request below: - -``` -curl --location --request POST 'http://localhost:8080/blueprints//trigger' \ ---header 'Content-Type: application/json' -``` diff --git a/versioned_docs/version-v0.14/Plugins/customize.md b/versioned_docs/version-v0.14/Plugins/customize.md deleted file mode 100644 index 1516160b594..00000000000 --- a/versioned_docs/version-v0.14/Plugins/customize.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: "Customize" -description: > - Customize Plugin ---- - - - -## Summary - -This plugin provides users the ability to create/delete columns and extract data from certain raw layer tables. -The columns created with this plugin must be start with the prefix `x_` - -**NOTE:** All columns created by this plugin are of the datatype `VARCHAR(255)` - -## Sample Request -To extract data, switch to `Advanced Mode` on the the first step of creating a Blueprint and paste a JSON config as the following: - -The example below demonstrates how to extract status name from the table `_raw_jira_api_issues` and assign it to the `x_test` column of the table `issues`. -We leverage the package `https://github.com/tidwall/gjson` to extract value from the JSON. For the extraction syntax, please refer to this [docs](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) - -- `table`: domain layer table name -- `rawDataTable`: raw layer table, from which we extract values by json path -- `rawDataParams`: the filter to select records from the raw layer table (**The value should be a string not an object**) -- `mapping` the extraction rule; the key is the extension field name; the value is json path - -```json -[ - [ - { - "plugin":"customize", - "options":{ - "transformationRules":[ - { - "table":"issues", - "rawDataTable":"_raw_jira_api_issues", - "rawDataParams":"{\"ConnectionId\":1,\"BoardId\":8}", - "mapping":{ - "x_test":"fields.status.name" - } - } - ] - } - } - ] -] -``` - -You can also trigger data extraction by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "extract fields", - "plan": [ - [ - { - "plugin": "customize", - "options": { - "transformationRules": [ - { - "table": "issues", - "rawDataTable": "_raw_jira_api_issues", - "rawDataParams": "{\"ConnectionId\":1,\"BoardId\":8}", - "mapping": { - "x_test": "fields.status.name" - } - } - ] - } - } - ] - ] -} -' -``` -Get all extension columns(start with `x_`) of the table `issues` -> GET /plugins/customize/issues/fields - -response -```json -[ - { - "columnName": "x_test", - "columnType": "VARCHAR(255)" - } -] -``` -Create extension column `x_test` for the table `issues` - -> POST /plugins/customize/issues/fields -```json -{ - "name": "x_test" -} -``` -Drop the column `x_text` for the table `issues` -> DELETE /plugins/customize/issues/fields/x_test diff --git a/versioned_docs/version-v0.14/Plugins/dbt.md b/versioned_docs/version-v0.14/Plugins/dbt.md deleted file mode 100644 index 059bf12c61d..00000000000 --- a/versioned_docs/version-v0.14/Plugins/dbt.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "DBT" -description: > - DBT Plugin ---- - - -## Summary - -dbt (data build tool) enables analytics engineers to transform data in their warehouses by simply writing select statements. dbt handles turning these select statements into tables and views. -dbt does the T in ELT (Extract, Load, Transform) processes – it doesn’t extract or load data, but it’s extremely good at transforming data that’s already loaded into your warehouse. - -## User setup -- If you plan to use this product, you need to install some environments first. - -#### Required Packages to Install -- [python3.7+](https://www.python.org/downloads/) -- [dbt-mysql](https://pypi.org/project/dbt-mysql/#configuring-your-profile) - -#### Commands to run or create in your terminal and the dbt project -1. pip install dbt-mysql -2. dbt init demoapp (demoapp is project name) -3. create your SQL transformations and data models - -## Convert Data By DBT - -Use the Raw JSON API to manually initiate a run using **cURL** or graphical API tool such as **Postman**. `POST` the following request to the DevLake API Endpoint. - -```json -[ - [ - { - "plugin": "dbt", - "options": { - "projectPath": "/Users/abeizn/demoapp", - "projectName": "demoapp", - "projectTarget": "dev", - "selectedModels": ["my_first_dbt_model","my_second_dbt_model"], - "projectVars": { - "demokey1": "demovalue1", - "demokey2": "demovalue2" - } - } - } - ] -] -``` - -- `projectPath`: the absolute path of the dbt project. (required) -- `projectName`: the name of the dbt project. (required) -- `projectTarget`: this is the default target your dbt project will use. (optional) -- `selectedModels`: a model is a select statement. Models are defined in .sql files, and typically in your models directory. (required) -And selectedModels accepts one or more arguments. Each argument can be one of: -1. a package name, runs all models in your project, example: example -2. a model name, runs a specific model, example: my_fisrt_dbt_model -3. a fully-qualified path to a directory of models. - -- `projectVars`: variables to parametrize dbt models. (optional) -example: -`select * from events where event_type = '{{ var("event_type") }}'` -To execute this SQL query in your model, you need set a value for `event_type`. - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers - -


diff --git a/versioned_docs/version-v0.14/Plugins/feishu.md b/versioned_docs/version-v0.14/Plugins/feishu.md deleted file mode 100644 index 306f3bd9893..00000000000 --- a/versioned_docs/version-v0.14/Plugins/feishu.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: "Feishu" -description: > - Feishu Plugin ---- - -## Summary - -This plugin collects Feishu meeting data through [Feishu Openapi](https://open.feishu.cn/document/home/user-identity-introduction/introduction). - -## Configuration - -In order to fully use this plugin, you will need to get `app_id` and `app_secret` from a Feishu administrator (for help on App info, please see [official Feishu Docs](https://open.feishu.cn/document/ukTMukTMukTM/ukDNz4SO0MjL5QzM/auth-v3/auth/tenant_access_token_internal)), - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/feishu/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu", - "endpoint": "https://open.feishu.cn/open-apis/vc/v1/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "appId": "", - "appSecret": "" -} -' -``` - -## Collect data from Feishu - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - - -```json -[ - [ - { - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - } - ] -] -``` - -> `numOfDaysToCollect`: The number of days you want to collect - -> `rateLimitPerSecond`: The number of requests to send(Maximum is 8) - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "feishu 20211126", - "plan": [[{ - "plugin": "feishu", - "options": { - "connectionId": 1, - "numOfDaysToCollect" : 80 - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.14/Plugins/gitee.md b/versioned_docs/version-v0.14/Plugins/gitee.md deleted file mode 100644 index 79c3c907dac..00000000000 --- a/versioned_docs/version-v0.14/Plugins/gitee.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: "Gitee(WIP)" -description: > - Gitee Plugin ---- - -## Summary - -This plugin collects `Gitee` data through [Gitee Openapi](https://gitee.com/api/v5/swagger). - -## Configuration - -In order to fully use this plugin, you will need to get `token` on the Gitee website. - -A connection should be created before you can collection any data. Currently, this plugin supports creating connection by requesting `connections` API: - -``` -curl 'http://localhost:8080/plugins/gitee/connections' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee", - "endpoint": "https://gitee.com/api/v5/", - "proxy": "http://localhost:1080", - "rateLimitPerHour": 20000, - "token": "" -} -' -``` - - - -## Collect data from Gitee - -In order to collect data, you have to compose a JSON looks like following one, and send it by selecting `Advanced Mode` on `Create Pipeline Run` page: - -1. Configure-UI Mode -```json -[ - [ - { - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` -and if you want to perform certain subtasks. -```json -[ - [ - { - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -2. Curl Mode: - You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` -and if you want to perform certain subtasks. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitee 20211126", - "plan": [[{ - "plugin": "gitee", - "subtasks": ["collectXXX", "extractXXX", "convertXXX"], - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.14/Plugins/gitextractor.md b/versioned_docs/version-v0.14/Plugins/gitextractor.md deleted file mode 100644 index d4c10ca7770..00000000000 --- a/versioned_docs/version-v0.14/Plugins/gitextractor.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: "GitExtractor" -description: > - GitExtractor Plugin ---- - -## Summary -This plugin extracts commits and references from a remote or local git repository. It then saves the data into the database or csv files. - -## Steps to make this plugin work - -1. Use the Git repo extractor to retrieve data about commits and branches from your repository. -2. Use the GitHub plugin to retrieve data about Github issues and PRs from your repository. -NOTE: you can run only one issue collection stage as described in the Github Plugin README. -3. Use the [RefDiff](./refdiff.md) plugin to calculate version diff, which will be stored in `refs_commits_diffs` table. - -## Sample Request - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "git repo extractor", - "plan": [ - [ - { - "Plugin": "gitextractor", - "Options": { - "url": "https://github.com/merico-dev/lake.git", - "repoId": "github:GithubRepo:384111310" - } - } - ] - ] -} -' -``` -- `url`: the location of the git repository. It should start with `http`/`https` for a remote git repository and with `/` for a local one. -- `repoId`: column `id` of `repos`. - Note : For GitHub, to find the repo id run `$("meta[name=octolytics-dimension-repository_id]").getAttribute('content')` in browser console. -- `proxy`: optional, http proxy, e.g. `http://your-proxy-server.com:1080`. -- `user`: optional, for cloning private repository using HTTP/HTTPS -- `password`: optional, for cloning private repository using HTTP/HTTPS -- `privateKey`: optional, for SSH cloning, base64 encoded `PEM` file -- `passphrase`: optional, passphrase for the private key - - -## Standalone Mode - -You call also run this plugin in a standalone mode without any DevLake service running using the following command: - -``` -go run plugins/gitextractor/main.go -url https://github.com/merico-dev/lake.git -id github:GithubRepo:384111310 -db "merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True" -``` - -For more options (e.g., saving to a csv file instead of a db), please read `plugins/gitextractor/main.go`. - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. [Click here](./refdiff.md#Development) for a brief guide. - -


diff --git a/versioned_docs/version-v0.14/Plugins/github-connection-in-config-ui.png b/versioned_docs/version-v0.14/Plugins/github-connection-in-config-ui.png deleted file mode 100644 index 5359fb1551b..00000000000 Binary files a/versioned_docs/version-v0.14/Plugins/github-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.14/Plugins/github.md b/versioned_docs/version-v0.14/Plugins/github.md deleted file mode 100644 index fd804a14569..00000000000 --- a/versioned_docs/version-v0.14/Plugins/github.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "GitHub" -description: > - GitHub Plugin ---- - - - -## Summary - -This plugin gathers data from `GitHub` to display information to the user in `Grafana`. We can help tech leaders answer such questions as: - -- Is this month more productive than last? -- How fast do we respond to customer requirements? -- Was our quality improved or not? - -## Metrics - -Here are some examples metrics using `GitHub` data: -- Avg Requirement Lead Time By Assignee -- Bug Count per 1k Lines of Code -- Commit Count over Time - -## Screenshot - -![image](/img/Plugins/github-demo.png) - - -## Configuration -- Configuring GitHub via [config-ui](/UserManuals/ConfigUI/GitHub.md). - -## Sample Request -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - } - ] -] -``` - -You can also trigger data collection by making a POST request to `/pipelines`. -``` -curl 'http://localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "github 20211126", - "plan": [[{ - "plugin": "github", - "options": { - "connectionId": 1, - "repo": "lake", - "owner": "merico-dev" - } - }]] -} -' -``` diff --git a/versioned_docs/version-v0.14/Plugins/gitlab-connection-in-config-ui.png b/versioned_docs/version-v0.14/Plugins/gitlab-connection-in-config-ui.png deleted file mode 100644 index 7aacee8d828..00000000000 Binary files a/versioned_docs/version-v0.14/Plugins/gitlab-connection-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.14/Plugins/gitlab.md b/versioned_docs/version-v0.14/Plugins/gitlab.md deleted file mode 100644 index 05f8ba5ec34..00000000000 --- a/versioned_docs/version-v0.14/Plugins/gitlab.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "GitLab" -description: > - GitLab Plugin ---- - - -## Metrics - -| Metric Name | Description | -|:----------------------------|:-------------------------------------------------------------| -| Pull Request Count | Number of Pull/Merge Requests | -| Pull Request Pass Rate | Ratio of Pull/Merge Review requests to merged | -| Pull Request Reviewer Count | Number of Pull/Merge Reviewers | -| Pull Request Review Time | Time from Pull/Merge created time until merged | -| Commit Author Count | Number of Contributors | -| Commit Count | Number of Commits | -| Added Lines | Accumulated Number of New Lines | -| Deleted Lines | Accumulated Number of Removed Lines | -| Pull Request Review Rounds | Number of cycles of commits followed by comments/final merge | - -## Configuration -Configuring GitLab via [config-ui](/UserManuals/ConfigUI/GitLab.md). - -## Gathering Data with GitLab - -To collect data, you can make a POST request to `/pipelines` - -``` -curl --location --request POST 'localhost:8080/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw ' -{ - "name": "gitlab 20211126", - "plan": [[{ - "plugin": "gitlab", - "options": { - "projectId": - } - }]] -} -' -``` - -


diff --git a/versioned_docs/version-v0.14/Plugins/jenkins.md b/versioned_docs/version-v0.14/Plugins/jenkins.md deleted file mode 100644 index 9bb0177d0b6..00000000000 --- a/versioned_docs/version-v0.14/Plugins/jenkins.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: "Jenkins" -description: > - Jenkins Plugin ---- - -## Summary - -This plugin collects Jenkins data through [Remote Access API](https://www.jenkins.io/doc/book/using/remote-access-api/). It then computes and visualizes various DevOps metrics from the Jenkins data. - -![image](https://user-images.githubusercontent.com/61080/141943122-dcb08c35-cb68-4967-9a7c-87b63c2d6988.png) - -## Metrics - -| Metric Name | Description | -|:-------------------|:------------------------------------| -| Build Count | The number of builds created | -| Build Success Rate | The percentage of successful builds | - -## Configuration - -In order to fully use this plugin, you will need to set various configurations via Dev Lake's `config-ui`. - -### By `config-ui` - -The connection section of the configuration screen requires the following key fields to connect to the Jenkins API. - -## Collect Data From Jenkins - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -```json -[ - [ - { - "plugin": "jenkins", - "options": { - "connectionId": 1 - } - } - ] -] -``` - -## Relationship between job and build - -Build is kind of a snapshot of job. Running job each time creates a build. diff --git a/versioned_docs/version-v0.14/Plugins/jira-connection-config-ui.png b/versioned_docs/version-v0.14/Plugins/jira-connection-config-ui.png deleted file mode 100644 index df2e8e39875..00000000000 Binary files a/versioned_docs/version-v0.14/Plugins/jira-connection-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.14/Plugins/jira-more-setting-in-config-ui.png b/versioned_docs/version-v0.14/Plugins/jira-more-setting-in-config-ui.png deleted file mode 100644 index dffb0c994d2..00000000000 Binary files a/versioned_docs/version-v0.14/Plugins/jira-more-setting-in-config-ui.png and /dev/null differ diff --git a/versioned_docs/version-v0.14/Plugins/jira.md b/versioned_docs/version-v0.14/Plugins/jira.md deleted file mode 100644 index 5b512b77bf3..00000000000 --- a/versioned_docs/version-v0.14/Plugins/jira.md +++ /dev/null @@ -1,295 +0,0 @@ ---- -title: "Jira" -description: > - Jira Plugin ---- - - -## Summary - -This plugin collects Jira data through Jira Cloud REST API. It then computes and visualizes various engineering metrics from the Jira data. - -jira metric display - -## Project Metrics This Covers - -| Metric Name | Description | -|:------------------------------------|:--------------------------------------------------------------------------------------------------| -| Requirement Count | Number of issues with type "Requirement" | -| Requirement Lead Time | Lead time of issues with type "Requirement" | -| Requirement Delivery Rate | Ratio of delivered requirements to all requirements | -| Requirement Granularity | Number of story points associated with an issue | -| Bug Count | Number of issues with type "Bug"
bugs are found during testing | -| Bug Age | Lead time of issues with type "Bug"
both new and deleted lines count | -| Bugs Count per 1k Lines of Code | Amount of bugs per 1000 lines of code | -| Incident Count | Number of issues with type "Incident"
incidents are found when running in production | -| Incident Age | Lead time of issues with type "Incident" | -| Incident Count per 1k Lines of Code | Amount of incidents per 1000 lines of code | - -## Configuration -Configuring Jira via [config-ui](/UserManuals/ConfigUI/Jira.md). - -## Collect Data From JIRA - -To collect data, select `Advanced Mode` on the `Create Pipeline Run` page and paste a JSON config like the following: - -> Warning: Data collection only supports single-task execution, and the results of concurrent multi-task execution may not meet expectations. - -``` -[ - [ - { - "plugin": "jira", - "options": { - "connectionId": 1, - "boardId": 8, - "since": "2006-01-02T15:04:05Z" - } - } - ] -] -``` - -- `connectionId`: The `ID` field from **JIRA Integration** page. -- `boardId`: JIRA board id, see "Find Board Id" for details. -- `since`: optional, download data since a specified date only. - - -## API - -### Data Connections - -1. Get all data connection - -```GET /plugins/jira/connections -[ - { - "ID": 14, - "CreatedAt": "2021-10-11T11:49:19.029Z", - "UpdatedAt": "2021-10-11T11:49:19.029Z", - "name": "test-jira-connection", - "endpoint": "https://merico.atlassian.net/rest", - "basicAuthEncoded": "basicAuth", - "epicKeyField": "epicKeyField", - "storyPointField": "storyPointField" - } -] -``` - -2. Create a new data connection - -```POST /plugins/jira/connections -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type" - } - } -} -``` - - -3. Update data connection - -```PUT /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -4. Get data connection detail -```GET /plugins/jira/connections/:connectionId -{ - "name": "jira data connection name", - "endpoint": "jira api endpoint, i.e. https://merico.atlassian.net/rest", - "basicAuthEncoded": "generated by `echo -n {jira login email}:{jira token} | base64`", - "epicKeyField": "name of customfield of epic key", - "storyPointField": "name of customfield of story point", - "typeMappings": { // optional, send empty object to delete all typeMappings of the data connection - "userType": { - "standardType": "devlake standard type", - } - } -} -``` - -5. Delete data connection - -```DELETE /plugins/jira/connections/:connectionId -``` - - -### Type mappings setting - -1. mappings struct - -```json /blueprints/{blueprintId} -{ - "settings": { - "connections": - [{ - "scope": - [{ - "transformation": - { - "epicKeyField": "", - "storyPointField": "", - "remotelinkCommitShaPattern": "", - "typeMappings": - { - "": - { - "standardType": "", - "statusMappings": - { - "": { "standardStatus": "" }, - "": { "standardStatus": "" } - } - } - } - } - }] - }] - } -} -``` - -2. set mappings example: - -```json PATCH /blueprints/{blueprintId} -{ - "name": "jira-test", - "mode": "NORMAL", - "plan": - [ - [ - { - "plugin": "jira", - "subtasks": - [ - "collectStatus", - "extractStatus", - "collectProjects", - "extractProjects", - "collectBoard", - "extractBoard", - "collectIssueTypes", - "extractIssueType", - "collectIssues", - "extractIssues", - "collectIssueChangelogs", - "extractIssueChangelogs", - "collectAccounts", - "collectWorklogs", - "extractWorklogs", - "collectRemotelinks", - "extractRemotelinks", - "collectSprints", - "extractSprints", - "convertBoard", - "convertIssues", - "convertWorklogs", - "convertIssueChangelogs", - "convertSprints", - "convertSprintIssues", - "convertIssueCommits", - "extractAccounts", - "convertAccounts", - "collectEpics", - "extractEpics" - ] - } - ] - ], - "enable": true, - "cronConfig": "0 0 * * *", - "isManual": true, - "settings": { - "connections": - [{ - "connectionId": 1, - "plugin": "jira", - "scope": - [{ - "entities": - [ - "TICKET", - "CROSS" - ], - "options": { "boardId": 1 }, - "transformation": - { - "epicKeyField": "", - "storyPointField": "", - "remotelinkCommitShaPattern": "", - "typeMappings": - { - "Task1": - { - "standardType": "Task1", - "statusMappings": - { - "done": { "standardStatus": "hello world" }, - "new": { "standardStatus": "nice to meet you" } - } - }, - "Task2": - { - "standardType": "Task2", - "statusMappings": - { - "done": { "standardStatus": "hello world" }, - "new": { "standardStatus": "nice to meet you too" } - } - } - } - } - }] - }], - "version": "1.0.0" - }, - "id": 1, - "createdAt": "2022-08-30T11:25:10.699Z", - "updatedAt": "2022-08-30T11:28:22.891Z" -} -``` - -3. API forwarding -For example: -Requests to `http://your_devlake_host/plugins/jira/connections/1/proxy/rest/agile/1.0/board/8/sprint` -would be forwarded to `https://your_jira_host/rest/agile/1.0/board/8/sprint` - -```GET /plugins/jira/connections/:connectionId/proxy/rest/*path -{ - "maxResults": 1, - "startAt": 0, - "isLast": false, - "values": [ - { - "id": 7, - "self": "https://merico.atlassian.net/rest/agile/1.0/sprint/7", - "state": "closed", - "name": "EE Sprint 7", - "startDate": "2020-06-12T00:38:51.882Z", - "endDate": "2020-06-26T00:38:00.000Z", - "completeDate": "2020-06-22T05:59:58.980Z", - "originBoardId": 8, - "goal": "" - } - ] -} -``` diff --git a/versioned_docs/version-v0.14/Plugins/refdiff.md b/versioned_docs/version-v0.14/Plugins/refdiff.md deleted file mode 100644 index 6068aafa675..00000000000 --- a/versioned_docs/version-v0.14/Plugins/refdiff.md +++ /dev/null @@ -1,165 +0,0 @@ ---- -title: "RefDiff" -description: > - RefDiff Plugin ---- - - -## Summary - -For development workload analysis, we often need to know how many commits have been created between 2 releases. This plugin calculates which commits differ between 2 Ref (branch/tag), and the result will be stored back into database for further analysis. - -## Important Note - -You need to run gitextractor before the refdiff plugin. The gitextractor plugin should create records in the `refs` table in your DB before this plugin can be run. - -## Configuration - -This is an enrichment plugin based on Domain Layer data, no configuration needed - -## How to use - -In order to trigger the enrichment, you need to insert a new task into your pipeline. - -1. Make sure `commits` and `refs` are collected into your database, `refs` table should contain records like following: - ``` - id ref_type - github:GithubRepo:384111310:refs/tags/0.3.5 TAG - github:GithubRepo:384111310:refs/tags/0.3.6 TAG - github:GithubRepo:384111310:refs/tags/0.5.0 TAG - github:GithubRepo:384111310:refs/tags/v0.0.1 TAG - github:GithubRepo:384111310:refs/tags/v0.2.0 TAG - github:GithubRepo:384111310:refs/tags/v0.3.0 TAG - github:GithubRepo:384111310:refs/tags/v0.4.0 TAG - github:GithubRepo:384111310:refs/tags/v0.6.0 TAG - github:GithubRepo:384111310:refs/tags/v0.6.1 TAG - ``` -2. If you want to run calculatePrCherryPick, please configure GITHUB_PR_TITLE_PATTERN in .env, you can check the example in .env.example(we have a default value, please make sure your pattern is disclosed by single quotes '') -3. And then, trigger a pipeline like following, you can also define sub-tasks, calculateRefDiff will calculate commits between two ref, and creatRefBugStats will create a table to show bug list between two ref: -```shell -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "pairs": [ - { "newRef": "refs/tags/v0.6.0", "oldRef": "refs/tags/0.5.0" }, - { "newRef": "refs/tags/0.5.0", "oldRef": "refs/tags/0.4.0" } - ], - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` -Or if you preferred calculating latest releases -```shell -curl -v -XPOST http://localhost:8080/pipelines --data @- <<'JSON' -{ - "name": "test-refdiff", - "plan": [ - [ - { - "plugin": "refdiff", - "options": { - "repoId": "github:GithubRepo:384111310", - "tagsPattern": "v\d+\.\d+.\d+", - "tagsLimit": 10, - "tagsOrder": "reverse semver", - "tasks": [ - "calculateCommitsDiff", - "calculateIssuesDiff", - "calculatePrCherryPick", - ] - } - } - ] - ] -} -JSON -``` - -## Development - -This plugin depends on `libgit2`, you need to install version 1.3.0 in order to run and debug this plugin on your local -machine. - -### Linux - -``` -1. require cmake -[ubuntu] -apt install cmake -y -[centos] -yum install cmake -y - -2. compiling -git clone -b v1.3.0 https://github.com/libgit2/libgit2.git && cd libgit2 -mkdir build && cd build && cmake .. -make && make install - -3.PKG_CONFIG and LD_LIBRARY_PATH -[centos] -export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib64:/usr/local/lib64/pkgconfig -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64 -[ubuntu] -export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib -``` - -#### Troubleshooting (linux) - -> Q: # pkg-config --cflags -- libgit2 Package libgit2 was not found in the pkg-config search path. -Perhaps you should add the directory containing `libgit2.pc` to the PKG_CONFIG_PATH environment variable -No package 'libgit2' found pkg-config: exit status 1 - -> A: -> Make sure your pkg config path covers the installation: -> if your libgit2.pc in `/usr/local/lib64/pkgconfig`(like centos) -> -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib64:/usr/local/lib64/pkgconfig` -> -> else if your libgit2.pc in `/usr/local/lib/pkgconfig`(like ubuntu) -> -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig` -> -> else consider install pkgconfig or rebuild the libgit2 - -### MacOS - -NOTE:Do **NOT** install libgit2 via `MadPorts` or `homebrew`, install from source instead. -``` -brew install cmake -git clone https://github.com/libgit2/libgit2.git -cd libgit2 -git checkout v1.3.0 -mkdir build -cd build -cmake .. -make -make install -``` - -#### Troubleshooting (MacOS) - -> Q: I got an error saying: `pkg-config: exec: "pkg-config": executable file not found in $PATH` - -> A: -> 1. Make sure you have pkg-config installed: -> -> `brew install pkg-config` -> -> 2. Make sure your pkg config path covers the installation: -> `export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib:/usr/local/lib/pkgconfig` - -


diff --git a/versioned_docs/version-v0.14/Plugins/tapd.md b/versioned_docs/version-v0.14/Plugins/tapd.md deleted file mode 100644 index 47eaf991adb..00000000000 --- a/versioned_docs/version-v0.14/Plugins/tapd.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: "Tapd" -description: > - Tapd Plugin ---- - -## Summary - -This plugin collects Tapd data through Tapd REST API. - -## Configuration -Configuring Tapd via [config-ui](/UserManuals/ConfigUI/Tapd.md). - -## Collect Data From Tapd - -To collect data, select `Advanced Mode` on the `Create Blueprint` page and paste a JSON config like the following: - -> Warning: Data collection only supports single-task execution, and the results of concurrent multi-task execution may not meet expectations. - -``` -[ - [ - { - "plugin": "tapd", - "options": { - "workspaceId": 34***66, - "connectionId": 1 - } - } - ], - [ - { - "plugin": "tapd", - "options": { - "workspaceId": 6***14, - "connectionId": 1 - } - } - ] -] -``` - -- `connectionId`: The `ID` field from **TAPD Integration** page. -- `workspaceId`: TAPD workspace id, you can get it from two ways: - - url: ![tapd-workspace-id](/img/ConfigUI/tapd-find-workspace-id.png) - - db: you can check workspace info from db._tool_tapd_workspaces and get all workspaceId you want to collect after execution of the following json in `advanced mode` - ```json - [ - [ - { - "plugin": "tapd", - "options": { - "companyId": 55850509, - "workspaceId": 1, - "connectionId": 1 - }, - "subtasks": [ - "collectCompanies", - "extractCompanies" - ] - } - ] - ] - ``` - diff --git a/versioned_docs/version-v0.14/Plugins/webhook.md b/versioned_docs/version-v0.14/Plugins/webhook.md deleted file mode 100644 index 8d3f504ab21..00000000000 --- a/versioned_docs/version-v0.14/Plugins/webhook.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: "Webhook" -description: > - Webhook Plugin ---- - -## Overview - -An Incoming Webhook allows users to actively push data to DevLake. It's particularly useful when DevLake is missing the plugin that pulls data from your DevOps tool. - -When you create an Incoming Webhook within DevLake, DevLake generates a unique URL. You can post JSON payloads to this URL to push data to DevLake. - -As of v0.14.0, users can push incidents and deployments required by DORA metrics to DevLake via Incoming Webhooks. - -## Creating webhooks in DevLake - -### Add a new webhook -To add a new webhook, go to the "Data Connections" page in config-ui and select "Issue/Deployment Incoming/Webhook". -![](https://i.imgur.com/jq6lzg1.png) - -We recommend that you give your webhook connection a unique name so that you can identify and manage where you have used it later. - -After clicking on the "Generate POST URL" button, you will find several webhook URLs. You can then post to these URLs from your CI/CD tool or issue tracking system to push data directly to DevLake. You can always come back to the webhook page to access the URLs later. - -![](https://i.imgur.com/jBMQnjt.png) - -### Put webhook on the internet - -For the new webhook to work, it needs to be accessible from the DevOps tools from which you would like to push data to DevLake. If DevLake is deployed in your private network and your DevOps tool (e.g. CircleCI) is a cloud service that lives outside of your private network, then you need to make DevLake's webhook accessible to the outside cloud service. - -There're many tools for this: - - - For testing and quick setup, [ngrok](https://ngrok.com/) is a useful utility that provides a publicly accessible web URL to any locally hosted application. You can put DevLake's webhook on the internet within 5 mins by following ngrok's [Getting Started](https://ngrok.com/docs/getting-started) guide. Note that, when posting to webhook, you may need to replace the `localhost` part in the webhook URL with the forwarding URL that ngrok provides. - - If you prefer DIY, please checkout open-source reverse proxies like [fatedier/frp](https://github.com/fatedier/frp) or go for the classic [nginx](https://www.nginx.com/). - - -## Register a deployment - -You can copy the generated deployment curl commands to your CI/CD script to post deployments to Apache DevLake. Below is the detailed payload schema: - -| Key | Required | Notes | -| :---------: | :------: | ------------------------------------------------------------ | -| commit_sha | ✔️ Yes | the sha of the deployment commit | -| repo_url | ✔️ Yes | the repo URL of the deployment commit | -| environment | ✖️ No | the environment this deployment happens. For example, `PRODUCTION` `STAGING` `TESTING` `DEVELOPMENT`.
The default value is `PRODUCTION` | -| start_time | ✖️ No | Time. Eg. 2020-01-01T12:00:00+00:00
No default value.| -| end_time | ✖️ No | Time. Eg. 2020-01-01T12:00:00+00:00
The default value is the time when DevLake receives the POST request.| - - -### Deployment - Sample API Calls - -Sample CURL to post deployments to DevLake. The following command should be replaced with the actual curl command copied from your Config UI: - -``` -curl https://sample-url.com/api/plugins/webhook/1/deployments -X 'POST' -d '{ - "commit_sha":"015e3d3b480e417aede5a1293bd61de9b0fd051d", - "repo_url":"https://github.com/apache/incubator-devlake/", - "environment":"PRODUCTION", - "start_time":"2020-01-01T12:00:00+00:00", - "end_time":"2020-01-02T12:00:00+00:00" - }' -``` - -If you have set a [username/password](https://devlake.apache.org/docs/next/GettingStarted/Authentication) for Config UI, you'll need to add them to the curl command to register a `deployment`: -``` -curl https://sample-url.com/api/plugins/webhook/1/deployments -X 'POST' -u 'username:password' -d '{ - "commit_sha":"015e3d3b480e417aede5a1293bd61de9b0fd051d", - "repo_url":"https://github.com/apache/incubator-devlake/", - "environment":"PRODUCTION", - "start_time":"2020-01-01T12:00:00+00:00", - "end_time":"2020-01-02T12:00:00+00:00" - }' -``` - -Read more in [Swagger](https://sample-url.com/api/swagger/index.html#/plugins%2Fwebhook/post_plugins_webhook__connectionId_deployments). - - - -#### Deployment - A real-world example in CircleCI - -The following demo shows how to post "deployments" to DevLake from CircleCI. In this example, the CircleCI job 'deploy' is used to do deployments. - - - ``` - version: 2.1 - - jobs: - build: - docker: - - image: cimg/base:stable - steps: - - checkout - - run: - name: "build" - command: | - echo Hello, World! - - deploy: - docker: - - image: cimg/base:stable - steps: - - checkout - - run: - name: "deploy" - command: | - # The time a deploy started - start_time=`date '+%Y-%m-%dT%H:%M:%S%z'` - - # Some deployment tasks here ... - echo Hello, World! - - # Send the request to DevLake after deploy - # The values start with a '$CIRCLE_' are CircleCI's built-in variables - curl https://sample-url.com/api/plugins/webhook/1/deployments -X 'POST' -d "{ - \"commit_sha\":\"$CIRCLE_SHA1\", - \"repo_url\":\"$CIRCLE_REPOSITORY_URL\", - \"start_time\":\"$start_time\" - }" - - workflows: - build_and_deploy_workflow: - jobs: - - build - - deploy - ``` - - - -## Incident / Issue - -If you want to collect issue or incident data from your system, you can use the two webhooks for issues. - -#### Update or Create Issues - -`POST https://sample-url.com/api/plugins/webhook/1/issues` - -needs to be called when an issue or incident is created. The body should be a JSON and include columns as follows: - -| Keyname | Required | Notes | -| :-----------------------: | :------: | ------------------------------------------------------------ | -| board_key | ✔️ Yes | issue belongs to which board/project | -| url | ✖️ No | issue's URL | -| issue_key | ✔️ Yes | issue's key, needs to be unique in a connection | -| title | ✔️ Yes | | -| description | ✖️ No | | -| epic_key | ✖️ No | in which epic. | -| type | ✖️ No | type, such as bug/incident/epic/... | -| status | ✔️ Yes | issue's status. Must be one of `TODO` `DONE` `IN_PROGRESS` | -| original_status | ✔️ Yes | status in your system, such as created/open/closed/... | -| story_point | ✖️ No | | -| resolution_date | ✖️ No | date, Format should be 2020-01-01T12:00:00+00:00 | -| created_date | ✔️ Yes | date, Format should be 2020-01-01T12:00:00+00:00 | -| updated_date | ✖️ No | date, Format should be 2020-01-01T12:00:00+00:00 | -| lead_time_minutes | ✖️ No | how long from this issue accepted to develop | -| parent_issue_key | ✖️ No | | -| priority | ✖️ No | | -| original_estimate_minutes | ✖️ No | | -| time_spent_minutes | ✖️ No | | -| time_remaining_minutes | ✖️ No | | -| creator_id | ✖️ No | the user id of the creator | -| creator_name | ✖️ No | the user name of the creator, it will just be used to display | -| assignee_id | ✖️ No | | -| assignee_name | ✖️ No | | -| severity | ✖️ No | | -| component | ✖️ No | which component is this issue in. | - -More information about these columns at [DomainLayerIssueTracking](https://devlake.apache.org/docs/DataModels/DevLakeDomainLayerSchema#domain-1---issue-tracking). - - - -#### Close Issues (Optional) - -`POST https://sample-url.com/api/plugins/webhook/1/issue/:boardKey/:issueId/close` - -needs to be called when an issue or incident is closed. Replace `:boardKey` and `:issueId` with specific strings and keep the body empty. - - - -### Issues Sample API Calls - -Sample CURL for Issue Creating : - -``` -curl https://sample-url.com/api/plugins/webhook/1/issues -X 'POST' -d '{"board_key":"DLK","url":"","issue_key":"DLK-1234","title":"a feature from DLK","description":"","epic_key":"","type":"BUG","status":"TODO","original_status":"created","story_point":0,"resolution_date":null,"created_date":"2020-01-01T12:00:00+00:00","updated_date":null,"lead_time_minutes":0,"parent_issue_key":"DLK-1200","priority":"","original_estimate_minutes":0,"time_spent_minutes":0,"time_remaining_minutes":0,"creator_id":"user1131","creator_name":"Nick name 1","assignee_id":"user1132","assignee_name":"Nick name 2","severity":"","component":""}' -``` - -Sample CURL for Issue Closing: - -``` -curl http://127.0.0.1:4000/api/plugins/webhook/1/issue/DLK/DLK-1234/close -X 'POST' -``` - -Read more in Swagger: https://sample-url.com/api/swagger/index.html#/plugins%2Fwebhook/post_plugins_webhook__connectionId_issues. - diff --git a/versioned_docs/version-v0.14/SupportedDataSources.md b/versioned_docs/version-v0.14/SupportedDataSources.md deleted file mode 100644 index b8132513259..00000000000 --- a/versioned_docs/version-v0.14/SupportedDataSources.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: "Supported Data Sources" -description: > - Data sources that DevLake supports -sidebar_position: 4 ---- - - -## Data Sources and Data Plugins -DevLake supports the following data sources. The data from each data source is collected with one or more plugins. - - -| Data Source | Domain(s) | Supported Versions | Triggered Plugins | Collection Mode | -|-------------------------------|------------------------------------------------------------|--------------------------------------|---------------------------- | --------------------- | -| GitHub (include GitHub Action)| Source Code Management, Code Review, Issue Tracking, CI/CD | Cloud |`github`, `gitextractor` | Full Refresh, Incremental Sync(for `issues`, `PRs`) | -| GitLab (include GitLabCI) | Source Code Management, Code Review, Issue Tracking, CI/CD | Cloud, Community Edition 13.x+ |`gitlab`, `gitextractor` | Full Refresh, Incremental Sync(for `issues`)| -| Gitee | Source Code Management, Code Review, Issue Tracking | Cloud |`gitee`, `gitextractor` | Incremental Sync | -| BitBucket | Source Code Management, Code Review | Cloud |`bitbucket`, `gitextractor` | Full Refresh | -| Jira | Issue Tracking | Cloud, Server 8.x+, Data Center 8.x+ |`jira` | Full Refresh, Incremental Sync(for `issues`, `changelogs`, `worklogs`) | -| TAPD | Issue Tracking | Cloud |`tapd` | Full Refresh, Incremental Sync(for `stories`, `bugs`, `tasks`) | -| Jenkins | CI/CD | 2.263.x+ |`jenkins` | Full Refresh | -| Feishu | Calendar | Cloud |`feishu` | Full Refresh | -| AE | Source Code Management | | `ae` | Full Refresh | - - - - -## Data Collection Scope By Each Plugin -This table shows the entities collected by each plugin. Domain layer entities in this table are consistent with the entities [here](./DataModels/DevLakeDomainLayerSchema.md). - -| Domain Layer Entities | ae | gitextractor | github | gitlab | jenkins | jira | refdiff | tapd | -| --------------------- | -------------- | ------------ | -------------- | ------- | ------- | ------- | ------- | ------- | -| commits | update commits | default | not-by-default | default | | | | | -| commit_parents | | default | | | | | | | -| commit_files | | default | | | | | | | -| pull_requests | | | default | default | | | | | -| pull_request_commits | | | default | default | | | | | -| pull_request_comments | | | default | default | | | | | -| pull_request_labels | | | default | | | | | | -| refs | | default | | | | | | | -| refs_commits_diffs | | | | | | | default | | -| refs_issues_diffs | | | | | | | default | | -| ref_pr_cherry_picks | | | | | | | default | | -| repos | | | default | default | | | | | -| repo_commits | | default | default | | | | | | -| board_repos | | | | | | | | | -| issue_commits | | | | | | | | | -| issue_repo_commits | | | | | | | | | -| pull_request_issues | | | | | | | | | -| refs_issues_diffs | | | | | | | | | -| boards | | | default | | | default | | default | -| board_issues | | | default | | | default | | default | -| issue_changelogs | | | | | | default | | default | -| issues | | | default | | | default | | default | -| issue_comments | | | | | | default | | default | -| issue_labels | | | default | | | | | | -| sprints | | | | | | default | | default | -| issue_worklogs | | | | | | default | | default | -| users o | | | default | | | default | | default | -| builds | | | | | default | | | | -| jobs | | | | | default | | | | - diff --git a/versioned_docs/version-v0.14/Troubleshooting.md b/versioned_docs/version-v0.14/Troubleshooting.md deleted file mode 100644 index dba0cbf1d5b..00000000000 --- a/versioned_docs/version-v0.14/Troubleshooting.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: "Troubleshooting" -sidebar_position: 10 -description: > - DevLake Troubleshooting ---- - -### Common Error Code while collecting/processing data - -| Error code | An example | Causes | Solutions | -| ---------- | ----------------------------|--------|-----------| -| 429 | subtask collectAPiPipelines ended unexpectedly caused: Error waiting for async Collector execution caused by: retry exceeded 3 times calling projects/{projectId}/pipelines {429} | This error exmaple is caused by GitLab's Pipeline APIs. These APIs are implemented via Cloudflare, which is different from other GitLab entities. | Two ways:
- Enable `fixed rate limit` in the GitLab connection, lower the API rates to 2,000. If it works, you can try increase the rates to accerlerate. This solution also applies to other plugins that return the 429 while collecting data, such as GitHub, TAPD, etc.
- Upgrade to v0.15.x | -| 403 | error: preparing task data for gitextractor caused by: unexpected http status code: 403 | This is usually caused by the permission of your tokens. For example, if you're using an un-supported auth method, or using a token without ticking permissions to certain entities you want to collect. | Find the supported authentication methods and token permissions that should be selected in the corresponding plugin's Config UI manuals, for example, [configuring GitHub](UserManuals/ConfigUI/GitHub.md#auth-tokens) | -| 1406 | subtask extractApiBuilds ended unexpectedly caused by: error adding the result to batch caused by: Error 1406: Data too long for column 'full_display_name' at row 138. See bug [#4053](https://github.com/apache/incubator-devlake/issues/4053) | This is usually thrown by MySQL because a certain value is too long | A work-around is to manually change the field length to varchar(255) or longer in MySQL. Also, please put up a [bug](https://github.com/apache/incubator-devlake/issues/new?assignees=&labels=type%2Fbug&template=bug-report.yml&title=%5BBug%5D%5BModule+Name%5D+Bug+title+) to let us know. | - - -### Failed to collect data from the server with a self-signed certificate - -There might be two problems when trying to collect data from a private GitLab server with a self-signed certificate: - -1. "Test Connection" error. This can be solved by setting the environment variable `IN_SECURE_SKIP_VERIFY=true` for the `devlake` container -2. "GitExtractor" fails to clone the repository due to certificate verification, sadly, neither gogit nor git2go we are using supports insecure HTTPS. - -A better approach would be adding your root CA to the `devlake` container: - -1. Mount your `rootCA.crt` into the `devlake` container -2. Add a `command` node to install the mounted certificate - -Here is an example of the `docker-compose`` installation, the idea applies to other installation methods. -``` - devlake: - image: apache/devlake:v... - ... - volumes: - ... - - /path/to/your/rootCA.crt:/usr/local/share/ca-certificates/rootCA.crt - command: [ "sh", "-c", "update-ca-certificates; lake" ] - ... -``` - -### GitExtractor task failed in a GitHub/GitLab/BitBucket blueprint -See bug [#3719](https://github.com/apache/incubator-devlake/issues/3719) - -This bug happens occasionally in v0.14.x and previous versions. It is fixed by changing the docker base image. Please upgrade to v0.15.x to get it fixed if you encounter it. - - -### Pipeline failed with "The total number of locks exceeds the lock table size" - -We have had a couple of reports suggesting MySQL InnoDB would fail with the message. - -- [Error 1206: The total number of locks exceeds the lock table size · Issue #3849 · apache/incubator-devlake](https://github.com/apache/incubator-devlake/issues/3849) -- [[Bug][Gitlab] gitlab collectApiJobs task failed for mysql locks error · Issue #3653 · apache/incubator-devlake](https://github.com/apache/incubator-devlake/issues/3653) - -The cause of the problem is: - -- Before Apache DevLake data collection starts, it must purge expired data in the database. -- MySQL InnoDB Engine would create locks in memory for the records being deleted. -- When deleting huge amounts of records, the memory bursts, hence the error. - -You are likely to see the error when dealing with a huge repository or board. For MySQL, you can solve it by increasing the `innodb_buffer_pool_size` to a higher value. - -Here is an example of the `docker-compose` installation, the idea applies to other installation methods. -``` - mysql: - image: mysql:8..... - ... - # add the follow line to the mysql container - command: --innodb-buffer-pool-size=200M -``` - - -## None of them solve your problem? - -Sorry for the inconvenience, please help us improve by [creating an issue](https://github.com/apache/incubator-devlake/issues) diff --git a/versioned_docs/version-v0.14/UserManuals/Authentication.md b/versioned_docs/version-v0.14/UserManuals/Authentication.md deleted file mode 100644 index fe949858a24..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/Authentication.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: "Security and Authentication" -sidebar_position: 6 -description: How to secure your deployment and enable the Authentication ---- - -The document explains how you can set up Apache DevLake securely. - -First of all, there are 4 services included in the deployment: - -- database: `postgress` and `mysql` are supported, you may choose one of them or any other compatible DBS like cloud-based systems. You should follow the document from the database to make it secure. -- grafana: You are likely to use it most of the time, browsing built-in dashboards, and creating your own customized metric. grafana supports [User Management](https://grafana.com/docs/grafana/latest/administration/user-management/), please follow the official document to set it up based on your need. -- devlake: This is the core service for Data Collection and Metric Calculation, all collected/calculated data would be stored to the database, and accessed by the `grafana` service. `devlake` itself doesn't support User Management of any kind, so we don't recommend that you expose its port to the outside world. -- config-ui: A web interface to set up `devlake` to do the work. You may set up an automated `blueprint` to collect data. `config-ui` supports `Basic Authentication`, by simply set up the Environment Variable `ADMIN_USER` and `ADMIN_PASS` for the container. There are commented lines in `config-ui.environment` section in our `docker-compose.yml` file for your convenience. -In General, we suggest that you reduce the Attack Surface as small as possible. - - -### Internal Deployment (Recommended) - -- database: Remove the `ports` if you don't need to access the database directly -- devlake: Remove the `ports` section. If you want to call the API directly, do it via `config-ui/api` endpoint. -- grafana: We have no choice but to expose the `ports` for people to browse the dashboards. However, you may want to set up the User Management, and a read-only database account for `grafana` -- config-ui: Normally, exposing the `ports` with `Basic Authentication` is sufficient for Internal Deployment, you may choose to remove the `ports` and use techniques like `k8s port-forwarding` or `expose-port-when-needed` to enhance the security. Keep in mind config-ui is NOT designed to be used by many people, and it shouldn't be. Do NOT grant access if NOT necessary. - - -### Internet Deployment (NOT Recommended) - -THIS IS DANGEROUS, DON'T DO IT. If you insist, here are some suggestions you may follow, please consult Security Advisor before everything: - -- database: Same as above. -- grafana: Same as above. In addition, set up the `HTTPS` for the transportation. -- devlake: Same as above. -- config-ui: Same as above. In addition, use port-forward if you are using `k8s`, otherwise, set up `HTTPS` for the transportation. - - -## Disclaimer - -Security is complicated, all suggestions listed above are based on what we learned so far. Apache Devlake makes no guarantee of any kind, please consult your Security Advisor before applying. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/AdvancedMode.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/AdvancedMode.md deleted file mode 100644 index 7f984ed5ca1..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/AdvancedMode.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: "Using Advanced Mode" -sidebar_position: 7 -description: > - Using the advanced mode of Config-UI ---- - - -## Why advanced mode? - -Advanced mode allows users to create any pipeline by writing JSON. This is useful for users who want to: - -1. Collect multiple GitHub/GitLab repos or Jira projects within a single pipeline -2. Have fine-grained control over what entities to collect or what subtasks to run for each plugin -3. Orchestrate a complex pipeline that consists of multiple stages of plugins. - -Advanced mode gives utmost flexibility to users by exposing the JSON API. - -## How to use advanced mode to create pipelines? - -1. Click on "+ New Blueprint" on the Blueprint page. - -![image](/img/AdvancedMode/AdvancedMode1.png) - -2. In step 1, click on the "Advanced Mode" link. - -![image](/img/AdvancedMode/AdvancedMode2.png) - -3. The pipeline editor expects a 2D array of plugins. The first dimension represents different stages of the pipeline and the second dimension describes the plugins in each stage. Stages run in sequential order and plugins within the same stage runs in parallel. We provide some templates for users to get started. Please also see the next section for some examples. - -![image](/img/AdvancedMode/AdvancedMode3.png) - -4. You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your preferred schedule. After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -## Examples - -1. Collect multiple GitLab repos sequentially. - ->When there're multiple collection tasks against a single data source, we recommend running these tasks sequentially since the collection speed is mostly limited by the API rate limit of the data source. ->Running multiple tasks against the same data source is unlikely to speed up the process and may overwhelm the data source. - - -Below is an example for collecting 2 GitLab repos sequentially. It has 2 stages, each contains a GitLab task. - - -``` -[ - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 15238074 - } - } - ], - [ - { - "Plugin": "gitlab", - "Options": { - "projectId": 11624398 - } - } - ] -] -``` - - -2. Collect a GitHub repo and a Jira board in parallel - -Below is an example for collecting a GitHub repo and a Jira board in parallel. It has a single stage with a GitHub task and a Jira task. Since users can configure multiple Jira connection, it's required to pass in a `connectionId` for Jira task to specify which connection to use. - -``` -[ - [ - { - "Plugin": "github", - "Options": { - "repo": "lake", - "owner": "merico-dev" - } - }, - { - "Plugin": "jira", - "Options": { - "connectionId": 1, - "boardId": 76 - } - } - ] -] -``` -## Editing a Blueprint (Advanced Mode) -This section is for editing a Blueprint in the Advanced Mode. To edit in the Normal mode, please refer to [this guide](Tutorial.md#editing-a-blueprint-normal-mode). - -To edit a Blueprint created in the Advanced mode, you can simply go the Settings page of that Blueprint and click on Edit JSON to edit its configuration. - -![img](/img/ConfigUI/BlueprintEditing/blueprint-edit2.png) diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitHub.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitHub.md deleted file mode 100644 index 6afbdabd72b..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitHub.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: "Configuring GitHub" -sidebar_position: 2 -description: Config UI instruction for GitHub ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![github-add-data-connections](/img/ConfigUI/github-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint, eg. `https://api.github.com/`. The url should end with `/`. - -#### Auth Token(s) -You can use one of the following GitHub tokens: personal access tokens(PATs) or fine-grained personal access tokens. - -###### GitHub personal access tokens(PATs) -Learn about [how to create a GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). The following permissions are required to collect data from repositories: - - `repo:status` - - `repo_deployment` - - `read:user` - - `read:org` - -###### Fine-grained personal access tokens(Fine-grained PATs) -If you're concerned with giving classic PATs full unrestricted access to your repositories, you can use fine-grained PATs announced by GitHub recently. With fine-grained PATs, GitHub users can create read-only PATs that only have access to repositories under certain GitHub orgs. But in order to do that, org admin needs to enroll that org with fine-grained PATs beta feature first. Please check [this doc](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token#creating-a-fine-grained-personal-access-token) for more details. -The token should be granted the read-only permission for the following entities. -- `Actions` -- `Contents` -- `Discussions` -- `Issues` -- `Metadata` -- `Pull requests` - -Token Rate Limit:
-The data collection speed is restricted by the **rate limit of [5,000 requests](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) per hour per token** (15,000 requests/hour if you pay for GitHub enterprise). You can accelerate data collection by configuring _multiple_ personal access tokens. Please note that multiple tokens should be created by different GitHub accounts. Tokens belonging to the same GitHub account share the rate limit. - - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![github-set-data-scope](/img/ConfigUI/github-set-data-scope.png) - -#### Projects -Enter the GitHub repos to collect. If you want to collect more than 1 repo, please separate repos with comma. For example, "apache/incubator-devlake,apache/incubator-devlake-website". - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitHub entities, you can unselect some entities to accelerate the collection speed. -- Issue Tracking: GitHub issues, issue comments, issue labels, etc. -- Source Code Management: GitHub repos, refs, commits, etc. -- Code Review: GitHub PRs, PR comments and reviews, etc. -- Cross Domain: GitHub accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![github-add-transformation-rules-list](/img/ConfigUI/github-add-transformation-rules-list.png) -![github-add-transformation-rules](/img/ConfigUI/github-add-transformation-rules.png) - -Without adding transformation rules, you can still view the "[GitHub Metrics](/livedemo/DataSources/GitHub)" dashboard. However, if you want to view "[Weekly Bug Retro](/livedemo/QAEngineers/WeeklyBugRetro)", "[Weekly Community Retro](/livedemo/OSSMaintainers/WeeklyCommunityRetro)" or other pre-built dashboards, the following transformation rules, especially "Type/Bug", should be added.
- -Each GitHub repo has at most ONE set of transformation rules. - -#### Issue Tracking - -- Severity: Parse the value of `severity` from issue labels. - - when your issue labels for severity level are like 'severity/p0', 'severity/p1', 'severity/p2', then input 'severity/(.*)$' - - when your issue labels for severity level are like 'p0', 'p1', 'p2', then input '(p0|p1|p2)$' - -- Component: Same as "Severity". - -- Priority: Same as "Severity". - -- Type/Requirement: The `type` of issues with labels that match the given regular expression will be set to "REQUIREMENT". Unlike "PR.type", submatch does nothing, because for issue management analysis, users tend to focus on 3 kinds of types (Requirement/Bug/Incident), however, the concrete naming varies from repo to repo, from time to time, so we decided to standardize them to help analysts metrics. - -- Type/Bug: Same as "Type/Requirement", with `type` set to "BUG". - -- Type/Incident: Same as "Type/Requirement", with `type` set to "INCIDENT". - -#### CI/CD -![image](https://user-images.githubusercontent.com/14050754/208100921-abc28c75-6001-493d-b307-3fd9879db552.png) - -This set of configurations is used for calculating [DORA metrics](../DORA.md). - -If you're using GitHub Action to conduct `deployments`, please select "Detect Deployment from Jobs in GitHub Action", and input the RegEx in the following fields: -- Deployment: A GitHub Action job with a name that matches the given regEx will be considered as a deployment. -- Production: A GitHub Action job with a name that matches the given regEx will be considered a job in the production environment. - -By the above two fields, DevLake can identify a production deployment among massive CI jobs. - -You can also select "Not using Jobs in GitHub Action as Deployments" if you're not using GitHub action for deployments. - - -#### Code Review - -- Type: The `type` of pull requests will be parsed from PR labels by given regular expression. For example: - - when your labels for PR types are like 'type/feature-development', 'type/bug-fixing' and 'type/docs', please input 'type/(.*)$' - - when your labels for PR types are like 'feature-development', 'bug-fixing' and 'docs', please input '(feature-development|bug-fixing|docs)$' - -- Component: The `component` of pull requests will be parsed from PR labels by given regular expression. - - -#### Additional Settings (Optional) - -- Tags Limit: It'll compare the last N pairs of tags to get the "commit diff', "issue diff" between tags. N defaults to 10. - - commit diff: new commits for a tag relative to the previous one - - issue diff: issues solved by the new commits for a tag relative to the previous one - -- Tags Pattern: Only tags that meet given regular expression will be counted. - -- Tags Order: Only "reverse semver" order is supported for now. - -Please click `Save` to save the transformation rules for the repo. In the data scope list, click `Next Step` to continue configuring. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitLab.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitLab.md deleted file mode 100644 index feef84a8ebd..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/GitLab.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: "Configuring GitLab" -sidebar_position: 3 -description: Config UI instruction for GitLab ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![gitlab-add-data-connections](/img/ConfigUI/gitlab-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. - - If you are using gitlab.com, the endpoint will be `https://gitlab.com/api/v4/` - - If you are self-hosting GitLab, the endpoint will look like `https://gitlab.example.com/api/v4/` -The endpoint url should end with `/`. - -#### Auth Token(s) -GitLab personal access tokens are required to add a connection. Learn about [how to create a GitLab personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html). - -###### Rate Limit -The data collection speed is restricted by the **rate limit of [120,000 requests/hour](https://docs.gitlab.com/ee/user/gitlab_com/index.html#gitlabcom-specific-rate-limits)** for GitLab Cloud. Tokens under the same IP address share the rate limit, so the actual rate limit for your token will be lower than this number. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope - -![image](https://user-images.githubusercontent.com/3294100/199533780-f506b308-6808-499c-90db-b39fcda27888.png) - -#### Projects -Choose the Gitlab projects to collect. Limited by GitLab API, You need to type more than 2 characters to search. - -* If you want to collect public repositories in GitLab, please uncheck "Only search my repositories" to search all repositories. - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain GitLab entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: GitLab issues, issue comments, issue labels, etc. -- Source Code Management: GitLab repos, refs, commits, etc. -- Code Review: GitLab MRs, MR comments and reviews, etc. -- Cross Domain: GitLab accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) - -#### CI/CD -This set of configurations is used for calculating [DORA metrics](../DORA.md). - -If you're using GitLab CI to conduct `deployments`, please select "Detect Deployment from Jobs in GitLab CI", and input the RegEx in the following fields: -- Deployment: A GitLab CI job with a name that matches the given regEx will be considered as a deployment. -- Production: A GitLab CI job with a name that matches the given regEx will be considered a job in the production environment. - -By the above two fields, DevLake can identify a production deployment among massive CI jobs. - -You can also select "Not using Jobs in GitLab CI as Deployments" if you're not using GitLab CI to conduct deployments. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jenkins.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jenkins.md deleted file mode 100644 index cedf832b6c2..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jenkins.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: "Configuring Jenkins" -sidebar_position: 5 -description: Config UI instruction for Jenkins ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jenkins-add-data-connections](/img/ConfigUI/jenkins-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint. Eg. `https://ci.jenkins.io/`. The endpoint url should end with `/`. - -#### Username (E-mail) -Your User ID for the Jenkins Instance. - -#### Password -For help on Username and Password, please see Jenkins docs on [using credentials](https://www.jenkins.io/doc/book/using/using-credentials/). You can also use "API Access Token" for this field, which can be generated at `User` -> `Configure` -> `API Token` section on Jenkins. - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - -### Step 2 - Setting Data Scope -There is no data cope setting for Jenkins. - -### Step 3 - Adding Transformation Rules (Optional) -This set of configurations is used for calculating [DORA metrics](../DORA.md). - -If you're using Jenkins builds to conduct `deployments`, please select "Detect Deployment from Jenkins Builds", and input the RegEx in the following fields: -- Deployment: A Jenkins build with a name that matches the given regEx will be considered as a deployment. -- Production: A Jenkins build with a name that matches the given regEx will be considered a build in the production environment. - -By the above two fields, DevLake can identify a production deployment among massive CI jobs. - -You can also select "Not using Jenkins builds as Deployments" if you're not using Jenkins to conduct deployments. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jira.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jira.md deleted file mode 100644 index 952ecddea6a..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Jira.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "Configuring Jira" -sidebar_position: 4 -description: Config UI instruction for Jira ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![jira-add-data-connections](/img/ConfigUI/jira-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint - - If you are using Jira Cloud, the endpoint will be `https://.atlassian.net/rest/` - - If you are self-hosting Jira v8+, the endpoint will look like `https://jira..com/rest/` -The endpoint url should end with `/`. - -#### Username / Email -Input the username or email of your Jira account. - - -#### Password -- If you are using Jira Cloud, please input the [Jira personal access token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html). -- If you are using Jira Server v8+, please input the password of your Jira account. - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - - -### Step 2 - Setting Data Scope -![jira-set-data-scope](/img/ConfigUI/jira-set-data-scope.png) - -#### Projects -Choose the Jira boards to collect. - -#### Data Entities -Usually, you don't have to modify this part. However, if you don't want to collect certain Jira entities, you can unselect some entities to accerlerate the collection speed. -- Issue Tracking: Jira issues, issue comments, issue labels, etc. -- Cross Domain: Jira accounts, etc. - -### Step 3 - Adding Transformation Rules (Optional) -![jira-add-transformation-rules-list](/img/ConfigUI/jira-add-transformation-rules-list.png) - -Without adding transformation rules, you can not view all charts in "Jira" or "Engineering Throughput and Cycle Time" dashboards.
- -Each Jira board has at most ONE set of transformation rules. - -![jira-add-transformation-rules](/img/ConfigUI/jira-add-transformation-rules.png) - -#### Issue Tracking - -- Requirement: choose the issue types to be transformed to "REQUIREMENT". -- Bug: choose the issue types to be transformed to "BUG". -- Incident: choose the issue types to be transformed to "INCIDENT". -- Epic Key: choose the custom field that represents Epic key. In most cases, it is "Epic Link". -- Story Point: choose the custom field that represents story points. In most cases, it is "Story Points". - -#### Additional Settings -- Remotelink Commit SHA: parse the commits from an issue's remote links by the given regular expression so that the relationship between `issues` and `commits` can be created. You can directly use the regular expression `/commit/([0-9a-f]{40})$`. - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tapd.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tapd.md deleted file mode 100644 index ed11cf181ce..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tapd.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "Configuring Tapd(WIP)" -sidebar_position: 6 -description: Config UI instruction for Tapd ---- - -Visit config-ui: `http://localhost:4000`. -### Step 1 - Add Data Connections -![tapd-add-data-connections](/img/ConfigUI/tapd-add-data-connections.png) - -#### Connection Name -Name your connection. - -#### Endpoint URL -This should be a valid REST API endpoint - - `https://api.tapd.cn/` -The endpoint url should end with `/`. - -#### Username / Password -Input the username and password of your Tapd account, you can follow the steps as below. -![tapd-account](/img/ConfigUI/tapd-account.png) - -#### Proxy URL (Optional) -If you are behind a corporate firewall or VPN you may need to utilize a proxy server. Enter a valid proxy server address on your network, e.g. `http://your-proxy-server.com:1080` - -#### Ralte Limit (Optional) -For tapd, we suggest you to set rate limit to 3500 - -#### Test and Save Connection -Click `Test Connection`, if the connection is successful, click `Save Connection` to add the connection. - -### \ No newline at end of file diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tutorial.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tutorial.md deleted file mode 100644 index 5c61e930c79..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/Tutorial.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Tutorial" -sidebar_position: 1 -description: Config UI instruction ---- - -## Overview -The Apache DevLake Config UI allows you to configure the data you wish to collect through a graphical user interface. Visit config-ui at `http://localhost:4000`. - -## Creating a Blueprint - -### Introduction -A Blueprint is the plan that covers all the work to get your raw data ready for query and metric computaion in the dashboards. We have designed the Blueprint to help you with data collection within only one workflow. Creating a Blueprint consists of four steps: - -1. Adding Data Connections: Add new or select from existing data connections for the data you wish to collect -2. Setting Data Scope: Select the scope of data (e.g. GitHub projects or Jira boards) for your data connections -3. Adding Transformation (Optional): Add transformation rules for the data scope you have selected in order to view corresponding metrics -4. Setting Sync Frequency: Set up a schedule for how often you wish your data to be synced - -### Step 1 - Adding Data Connections -There are two ways to add data connections to your Blueprint: adding them during the creation of a Blueprint and adding them separately on the Data Integrations page. There is no difference between these two ways. - -When adding data connections from the Blueprint, you can either create a new or select from an exisitng data connections. - -![img](/img/ConfigUI/BlueprintCreation/step1.png) - -### Step 2 - Setting Data Scope -After adding data connections, click on "Next Step" and you will be prompted to select the data scope of each data connections. For instance, for a GitHub connection, you will need to enter the projects you wish to sync and for Jira, you will need to select the boards. - -![img](/img/ConfigUI/BlueprintCreation/step2.png) - -### Step 3 - Adding Transformation (Optional) -This step is only required for viewing certain metrics in the pre-built dashboards that require data transformation. Without adding transformation rules, you can still view the basic metrics. - -Currently, DevLake only supports transformation for GitHub and Jira connections. - -![img](/img/ConfigUI/BlueprintCreation/step3.png) - -### Step 4 - Setting Sync Frequency -You can choose how often you would like to sync your data in this step by selecting a sync frequency option or enter a cron code to specify your prefered schedule. - -After setting up the Blueprint, you will be prompted to the Blueprint's activity detail page, where you can track the progress of the current run and wait for it to finish before the dashboards become available. You can also view all historical runs of previously created Blueprints from the list on the Blueprint page. - -![img](/img/ConfigUI/BlueprintCreation/step4.png) - -## Editing a Blueprint (Normal Mode) -On the Blueprint list page, clicking on any Blueprint will lead you to the detail page of the blueprint. If you switch to the Settings tab on the detail page, you can see the settings of your Blueprint and edit parts of it seperately. - -In the current version, the Blueprint editing feature **allows** editing: -- The Blueprint's name -- The sync frequency -- The data scope of a connection -- The data entities of the data scope -- The transformation rules of any data scope - -and does **NOT allow**: -- Adding or deleting connections to an existing blueprint (will be available in the future) -- Editing any connections - -Please note: -1. The connections of some data sources, such as Jenkins, do not have an editing button, because their configuration do not contain data scope, data entities and/or transformation. -2. If you have created the Blueprint in the Normal mode, you will only be able to edit it in the Normal Mode; if you have created it in the Advanced Mode, please refer to [this guide](AdvancedMode.md#editing-a-blueprint-advanced-mode) for editing. - -The Settings page for editing Blueprints: -![img](/img/ConfigUI/BlueprintEditing/blueprint-edit1.png) - -## Creating and Managing Data Connections -The Data Connections page allows you to view, create and manage all your data connections at one place. diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/_category_.json b/versioned_docs/version-v0.14/UserManuals/ConfigUI/_category_.json deleted file mode 100644 index 62f99d484f6..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Config UI", - "position": 4 -} diff --git a/versioned_docs/version-v0.14/UserManuals/ConfigUI/webhook.md b/versioned_docs/version-v0.14/UserManuals/ConfigUI/webhook.md deleted file mode 100644 index 7b47e90131e..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/ConfigUI/webhook.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: "Configuring Webhook" -sidebar_position: 7 -description: Config UI instruction for Webhook ---- - -Visit config-ui via the Domain Name or IP Address and Port, - -### Add a new webhook -![image](https://user-images.githubusercontent.com/3294100/191309840-460fbc9c-15a1-4b12-a510-9ed5ccd8f2b0.png) - -#### Webhook name -We recommand that you give your webhook connection a unique name so that you can identify and manage where you have used it later. - -### Use Webhooks -After clicking on "Generate POST URL", you will find four webhook URLs. Copy the ones that suits your usage into your CI or issue tracking systems. You can always come back to the webhook page to copy the URLs later on. - -![image](https://user-images.githubusercontent.com/3294100/191400110-327c153f-b236-47e3-88cc-85bf8fcae310.png) - -For more usage: [plugins/webhook](/Plugins/webhook.md). diff --git a/versioned_docs/version-v0.14/UserManuals/DORA.md b/versioned_docs/version-v0.14/UserManuals/DORA.md deleted file mode 100644 index 2270af25a03..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/DORA.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -title: "DORA" -sidebar_position: 7 -description: > - DORA Metrics ---- - - -This document describes everything you need to know about DORA, and implementing this powerful and practical framework in DevLake. - -## What are DORA metrics? -Created six years ago by a team of researchers, DORA stands for "DevOps Research & Assessment" and is the answer to years of research, having examined thousands of teams, seeking a reliable and actionable approach to understanding the performance of software development teams. - -DORA has since become a standardized framework focused on the stability and velocity of development processes, one that avoids the more controversial aspects of productivity and individual performance measures. - -There are two key clusters of data inside DORA: Velocity and Stability. The DORA framework is focused on keeping them in context with each other, as a whole, rather than as independent variables, making the data more challenging to misinterpret or abuse. - -Within velocity are two core metrics: -- [Deployment Frequency](https://devlake.apache.org/docs/Metrics/DeploymentFrequency): Number of successful deployments to production, how rapidly is your team releasing to users? -- [Lead Time for Changes](https://devlake.apache.org/docs/Metrics/LeadTimeForChanges): How long does it take from commit to the code running in production? This is important, as it reflects how quickly your team can respond to user requirements. - -Stability is composed of two core metrics: -- [Median Time to Restore Service](https://devlake.apache.org/docs/Metrics/MTTR): How long does it take the team to properly recover from a failure once it is identified? -- [Change Failure Rate](https://devlake.apache.org/docs/Metrics/CFR): How often are your deployments causing a failure? - -![](https://i.imgur.com/71EUflb.png) - -To make DORA even more actionable, there are well-established benchmarks to determine if you are performing at "Elite", "High", "Medium", or "Low" levels. Inside DevLake, you will find the benchmarking table available to assess and compare your own projects. - -## Why is DORA important? -DORA metrics help teams and projects measure and improve software development practices to consistently deliver reliable products, and thus happy users! - - -## How to implement DORA metrics with Apache DevLake? - -You can set up DORA metrics in DevLake in a few steps: -- **Install**: [Getting Started](https://devlake.apache.org/docs/GettingStarted) -- **Collect**: Collect data via blueprint - - In the blueprint, select the data you wish to collect, and make sure you have selected the data required for DORA metrics - - Configure DORA-related transformation rules to define `deployments` and `incidents` - - Select a sync frequency for your data, save and run the blueprint. -- **Report**: DevLake provides a built-in DORA dashboard. See an example screenshot below or check out our [live demo](https://grafana-lake.demo.devlake.io/grafana/d/qNo8_0M4z/dora?orgId=1). -![DORA Dashboard](https://i.imgur.com/y1pUIsk.png) - -DevLake now supports Jenkins, GitHub Action and GitLabCI as data sources for `deployments` data; Jira, GitHub issues, and TAPD as the sources for `incidents` data; Github PRs, GitLab MRs as the sources for `changes` data. - -If your CI/CD tools are not listed on the [Supported Data Sources](https://devlake.apache.org/docs/next/Overview/SupportedDataSources) page, have no fear! DevLake provides incoming webhooks to push your `deployments` data to DevLake. The webhook configuration doc can be found [here](https://devlake.apache.org/docs/Configuration/webhook). - - -## A real-world example - -Let's walk through the DORA implementation process for a team with the following toolchain - -- Code Hosting: GitHub -- CI/CD: GitHub Actions + CircleCI -- Issue Tracking: Jira - -Calculating DORA metrics requires three key entities: **changes**, **deployments**, and **incidents**. Their exact definitions of course depend on a team's DevOps practice and varies team by team. For the team in this example, let's assume the following definition: - -- Changes: All pull requests in GitHub. -- Deployments: GitHub action jobs that have "deploy" in their names and CircleCI's deployment jobs. -- Incidents: Jira issues whose types are `Crash` or `Incident` - -In the next section, we'll demonstrate how to configure DevLake to implement DORA metrics for the aforementioned example team. - -### Collect GitHub & Jira data via `blueprint` -1. Visit the config-ui at `http://localhost:4000` -2. Create a `blueprint`, let's name it "Blueprint for DORA", add a Jira and a GitHub connection. Click `Next Step` -![](https://i.imgur.com/lpPRZ6v.png) - -3. Select Jira boards and GitHub repos to collect, click `Next Step` -![](https://i.imgur.com/Ko38n6J.png) - -4. Click `Add Transformation` to configure for DORA metrics -![](https://i.imgur.com/Lhcu2DE.png) - -5. To make it simple, fields with a ![](https://i.imgur.com/rrLopFx.png) label are DORA-related configurations for every data source. Via these fields, you can define what are "incidents" and "deployments" for each data source. After all data connections have been configured, click `Next Step` - - This team uses Jira issue types `Crash` and `Incident` as "incident", so choose the two types in field "incident". Jira issues in these two types will be transformed to "incidents" in DevLake. - - This team uses the GitHub action jobs named `deploy` and `build-and-deploy` to deploy, so type in `(?i)deploy` to match these jobs. These jobs will be transformed to "deployments" in DevLake. - ![](https://i.imgur.com/1JZA2xn.png) - - Note: The following example shows where to find GitHub action jobs. It's easy to mix them up with GitHub workflows. - ![](https://i.imgur.com/Y2hchEh.png) - - -6. Choose sync frequency, click 'Save and Run Now' to start data collection. The time to completion varies by data source and depends on the volume of data. -![](https://i.imgur.com/zPkfzGr.png) - -For more details, please refer to our [blueprint manuals](https://devlake.apache.org/docs/Configuration/Tutorial). - -### Collect CircleCI data via `webhook` - -Using CircleCI as an example, we demonstrate how to actively push data to DevLake using the Webhook approach, in cases where DevLake doesn't have a plugin specific to that tool to pull data from your data source. - -7. Visit "Data Connections" page in config-ui and select "Issue/Deployment Incoming Webhook". - -8. Click "Add Incoming Webhook", give it a name, and click "Generate POST URL". DevLake will generate URLs that you can send JSON payloads to push `deployments` and `incidents` to Devlake. Copy the `Deployment` curl command. -![](https://i.imgur.com/jq6lzg1.png) -![](https://i.imgur.com/jBMQnjt.png) - -9. Now head to your CircleCI's pipelines page in a new tab. Find your deployment pipeline and click `Configuration File` -![](https://i.imgur.com/XwPzmyk.png) - -10. Paste the curl command copied in step 8 to the `config.yml`, change the key-values in the payload. See full payload schema [here](https://devlake.apache.org/docs/Plugins/webhook/##register-a-deployment). - ``` - version: 2.1 - - jobs: - build: - docker: - - image: cimg/base:stable - steps: - - checkout - - run: - name: "build" - command: | - echo Hello, World! - - deploy: - docker: - - image: cimg/base:stable - steps: - - checkout - - run: - name: "deploy" - command: | - # The time a deploy started - start_time=`date '+%Y-%m-%dT%H:%M:%S%z'` - - # Some deployment tasks here ... - echo Hello, World! - - # Send the request to DevLake after deploy - # The values start with a '$CIRCLE_' are CircleCI's built-in variables - curl https://sample-url.com/api/plugins/webhook/1/deployments -X 'POST' -d "{ - \"commit_sha\":\"$CIRCLE_SHA1\", - \"repo_url\":\"$CIRCLE_REPOSITORY_URL\", - \"start_time\":\"$start_time\" - }" - - workflows: - build_and_deploy_workflow: - jobs: - - build - - deploy - ``` - If you have set a [username/password](https://devlake.apache.org/docs/next/GettingStarted/Authentication) for Config UI, you need to add them to the curl to register a deployment: - - ``` - curl https://sample-url.com/api/plugins/webhook/1/deployments -X 'POST' -u 'username:password' -d '{ - \"commit_sha\":\"$CIRCLE_SHA1\", - \"repo_url\":\"$CIRCLE_REPOSITORY_URL\", - \"start_time\":\"$start_time\" - }' - ``` - -11. Run the modified CircleCI pipeline. Check to verify that the request has been successfully sent. -![](https://i.imgur.com/IyneAMn.png) - -12. You will find the corresponding `deployments` in table.cicd_tasks in DevLake's database. -![](https://i.imgur.com/6hguCYK.png) - -### View and customize DevLake's DORA dashboard - -With all the data collected, DevLake's DORA dashboard is ready to deliver your DORA metrics and benchmarks. You can find the DORA dashboard within the Grafana instance shipped with DevLake, ready for you to put into action. - -You can customize the DORA dashboard by editing the underlying SQL query of each panel. - -For a breakdown of each metric's SQL query, please refer to the corresponding metric docs: - - [Deployment Frequency](https://devlake.apache.org/docs/Metrics/DeploymentFrequency) - - [Lead Time for Changes](https://devlake.apache.org/docs/Metrics/LeadTimeForChanges) - - [Median Time to Restore Service](https://devlake.apache.org/docs/Metrics/MTTR) - - [Change Failure Rate](https://devlake.apache.org/docs/Metrics/CFR) - -If you aren't familiar with Grafana, please refer to our [Grafana doc](./Dashboards/GrafanaUserGuide.md), or jump into Slack for help. - -
- -:tada::tada::tada: Congratulations! You are now a DevOps Hero, with your own DORA dashboard! - -


- diff --git a/versioned_docs/version-v0.14/UserManuals/Dashboards/GrafanaUserGuide.md b/versioned_docs/version-v0.14/UserManuals/Dashboards/GrafanaUserGuide.md deleted file mode 100644 index 41a8e37f78f..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/Dashboards/GrafanaUserGuide.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: "Grafana User Guide" -sidebar_position: 2 -description: > - Grafana User Guide ---- - - -# Grafana - - - -When first visiting Grafana, you will be provided with a sample dashboard with some basic charts setup from the database. - -## Contents - -Section | Link -:------------ | :------------- -Logging In | [View Section](#logging-in) -Viewing All Dashboards | [View Section](#viewing-all-dashboards) -Customizing a Dashboard | [View Section](#customizing-a-dashboard) -Dashboard Settings | [View Section](#dashboard-settings) -Provisioning a Dashboard | [View Section](#provisioning-a-dashboard) -Troubleshooting DB Connection | [View Section](#troubleshooting-db-connection) - -## Logging In - -Once the app is up and running, visit `http://localhost:3002` to view the Grafana dashboard. - -Default login credentials are: - -- Username: `admin` -- Password: `admin` - -## Viewing All Dashboards - -To see all dashboards created in Grafana visit `/dashboards` - -Or, use the sidebar and click on **Manage**: - -![Screen Shot 2021-08-06 at 11 27 08 AM](https://user-images.githubusercontent.com/3789273/128534617-1992c080-9385-49d5-b30f-be5c96d5142a.png) - - -## Customizing a Dashboard - -When viewing a dashboard, click the top bar of a panel, and go to **edit** - -![Screen Shot 2021-08-06 at 11 35 36 AM](https://user-images.githubusercontent.com/3789273/128535505-a56162e0-72ad-46ac-8a94-70f1c7a910ed.png) - -**Edit Dashboard Panel Page:** - -![grafana-sections](https://user-images.githubusercontent.com/3789273/128540136-ba36ee2f-a544-4558-8282-84a7cb9df27a.png) - -### 1. Preview Area -- **Top Left** is the variable select area (custom dashboard variables, used for switching projects, or grouping data) -- **Top Right** we have a toolbar with some buttons related to the display of the data: - - View data results in a table - - Time range selector - - Refresh data button -- **The Main Area** will display the chart and should update in real time - -> Note: Data should refresh automatically, but may require a refresh using the button in some cases - -### 2. Query Builder -Here we form the SQL query to pull data into our chart, from our database -- Ensure the **Data Source** is the correct database - - ![Screen Shot 2021-08-06 at 10 14 22 AM](https://user-images.githubusercontent.com/3789273/128545278-be4846e0-852d-4bc8-8994-e99b79831d8c.png) - -- Select **Format as Table**, and **Edit SQL** buttons to write/edit queries as SQL - - ![Screen Shot 2021-08-06 at 10 17 52 AM](https://user-images.githubusercontent.com/3789273/128545197-a9ff9cb3-f12d-4331-bf6a-39035043667a.png) - -- The **Main Area** is where the queries are written, and in the top right is the **Query Inspector** button (to inspect returned data) - - ![Screen Shot 2021-08-06 at 10 18 23 AM](https://user-images.githubusercontent.com/3789273/128545557-ead5312a-e835-4c59-b9ca-dd5c08f2a38b.png) - -### 3. Main Panel Toolbar -In the top right of the window are buttons for: -- Dashboard settings (regarding entire dashboard) -- Save/apply changes (to specific panel) - -### 4. Grafana Parameter Sidebar -- Change chart style (bar/line/pie chart etc) -- Edit legends, chart parameters -- Modify chart styling -- Other Grafana specific settings - -## Dashboard Settings - -When viewing a dashboard click on the settings icon to view dashboard settings. Here are 2 important sections to use: - -![Screen Shot 2021-08-06 at 1 51 14 PM](https://user-images.githubusercontent.com/3789273/128555763-4d0370c2-bd4d-4462-ae7e-4b140c4e8c34.png) - -- Variables - - Create variables to use throughout the dashboard panels, that are also built on SQL queries - - ![Screen Shot 2021-08-06 at 2 02 40 PM](https://user-images.githubusercontent.com/3789273/128553157-a8e33042-faba-4db4-97db-02a29036e27c.png) - -- JSON Model - - Copy `json` code here and save it to a new file in `/grafana/dashboards/` with a unique name in the `lake` repo. This will allow us to persist dashboards when we load the app - - ![Screen Shot 2021-08-06 at 2 02 52 PM](https://user-images.githubusercontent.com/3789273/128553176-65a5ae43-742f-4abf-9c60-04722033339e.png) - -## Provisioning a Dashboard - -To save a dashboard in the `lake` repo and load it: - -1. Create a dashboard in browser (visit `/dashboard/new`, or use sidebar) -2. Save dashboard (in top right of screen) -3. Go to dashboard settings (in top right of screen) -4. Click on _JSON Model_ in sidebar -5. Copy code into a new `.json` file in `/grafana/dashboards` - -## Troubleshooting DB Connection - -To ensure we have properly connected our database to the data source in Grafana, check database settings in `./grafana/datasources/datasource.yml`, specifically: -- `database` -- `user` -- `secureJsonData/password` diff --git a/versioned_docs/version-v0.14/UserManuals/Dashboards/_category_.json b/versioned_docs/version-v0.14/UserManuals/Dashboards/_category_.json deleted file mode 100644 index 0db83c6e9b8..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/Dashboards/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Dashboards", - "position": 5 -} diff --git a/versioned_docs/version-v0.14/UserManuals/TeamConfiguration.md b/versioned_docs/version-v0.14/UserManuals/TeamConfiguration.md deleted file mode 100644 index 84c9c6ac08b..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/TeamConfiguration.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: "Team Configuration" -sidebar_position: 9 -description: > - Team Configuration ---- -## What is 'Team Configuration' and how it works? - -To organize and display metrics by `team`, Apache DevLake needs to know about the team configuration in an organization, specifically: - -1. What are the teams? -2. Who are the users(unified identities)? -3. Which users belong to a team? -4. Which accounts(identities in specific tools) belong to the same user? - -Each of the questions above corresponds to a table in DevLake's schema, illustrated below: - -![image](/img/Team/teamflow0.png) - -1. `teams` table stores all the teams in the organization. -2. `users` table stores the organization's roster. An entry in the `users` table corresponds to a person in the org. -3. `team_users` table stores which users belong to a team. -4. `user_accounts` table stores which accounts belong to a user. An `account` refers to an identiy in a DevOps tool and is automatically created when importing data from that tool. For example, a `user` may have a GitHub `account` as well as a Jira `account`. - -Apache DevLake uses a simple heuristic algorithm based on emails and names to automatically map accounts to users and populate the `user_accounts` table. -When Apache DevLake cannot confidently map an `account` to a `user` due to insufficient information, it allows DevLake users to manually configure the mapping to ensure accuracy and integrity. - -## A step-by-step guide - -In the following sections, we'll walk through how to configure teams and create the five aforementioned tables (`teams`, `users`, `team_users`, `accounts`, and `user_accounts`). -The overall workflow is: - -1. Create the `teams` table -2. Create the `users` and `team_users` table -3. Populate the `accounts` table via data collection -4. Run a heuristic algorithm to populate `user_accounts` table -5. Manually update `user_accounts` when the algorithm can't catch everything - -Note: - -1. Please replace `/path/to/*.csv` with the absolute path of the CSV file you'd like to upload. -2. Please replace `127.0.0.1:4000` with your actual Apache DevLake ConfigUI service IP and port number. - -## Step 1 - Create the `teams` table - -You can create the `teams` table by sending a PUT request to `/plugins/org/teams.csv` with a `teams.csv` file. To jumpstart the process, you can download a template `teams.csv` from `/plugins/org/teams.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `teams.csv` file - - i. GET http://127.0.0.1:4000/api/plugins/org/teams.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/teams.csv?fake_data=true' - - -b. Fill out `teams.csv` file and upload it to DevLake - - i. Fill out `teams.csv` with your org data. Please don't modify the column headers or the file suffix. - - ii. Upload `teams.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/teams.csv' --form 'file=@"/path/to/teams.csv"' - - iii. The PUT request would populate the `teams` table with data from `teams.csv` file. - You can connect to the database and verify the data in the `teams` table. - See Appendix for how to connect to the database. - -![image](/img/Team/teamflow3.png) - - -## Step 2 - Create the `users` and `team_users` table - -You can create the `users` and `team_users` table by sending a single PUT request to `/plugins/org/users.csv` with a `users.csv` file. To jumpstart the process, you can download a template `users.csv` from `/plugins/org/users.csv?fake_data=true`. Below are the detailed instructions: - -a. Download the template `users.csv` file - - i. GET http://127.0.0.1:4000/api/plugins/org/users.csv?fake_data=true (pasting the URL into your browser will download the template) - - ii. If you prefer using curl: - curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/users.csv?fake_data=true' - - -b. Fill out `users.csv` and upload to DevLake - - i. Fill out `users.csv` with your org data. Please don't modify the column headers or the file suffix - - ii. Upload `users.csv` to DevLake with the following curl command: - curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/users.csv' --form 'file=@"/path/to/users.csv"' - - iii. The PUT request would populate the `users` table along with the `team_users` table with data from `users.csv` file. - You can connect to the database and verify these two tables. - -![image](/img/Team/teamflow1.png) - -![image](/img/Team/teamflow2.png) - -c. If you ever want to update `team_users` or `users` table, simply upload the updated `users.csv` to DevLake again following step b. - -## Step 3 - Populate the `accounts` table via data collection - -The `accounts` table is automatically populated when you collect data from data sources like GitHub and Jira through DevLake. - -For example, the GitHub plugin would create one entry in the `accounts` table for each GitHub user involved in your repository. -For demo purposes, we'll insert some mock data into the `accounts` table using SQL: - -``` -INSERT INTO `accounts` (`id`, `created_at`, `updated_at`, `_raw_data_params`, `_raw_data_table`, `_raw_data_id`, `_raw_data_remark`, `email`, `full_name`, `user_name`, `avatar_url`, `organization`, `created_date`, `status`) -VALUES - ('github:GithubAccount:1:1234', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"Owner\":\"apache\",\"Repo\":\"incubator-devlake\"}', '_raw_github_api_pull_request_reviews', 28, '', 'TyroneKCummings@teleworm.us', '', 'Tyrone K. Cummings', 'https://avatars.githubusercontent.com/u/101256042?u=a6e460fbaffce7514cbd65ac739a985f5158dabc&v=4', '', NULL, 0), - ('jira:JiraAccount:1:629cdf', '2022-07-12 10:54:09.632', '2022-07-12 10:54:09.632', '{\"ConnectionId\":1,\"BoardId\":\"76\"}', '_raw_jira_api_users', 5, '', 'DorothyRUpdegraff@dayrep.com', '', 'Dorothy R. Updegraff', 'https://avatars.jiraxxxx158dabc&v=4', '', NULL, 0); - -``` - -![image](/img/Team/teamflow4.png) - -## Step 4 - Run a heuristic algorithm to populate `user_accounts` table - -Now that we have data in both the `users` and `accounts` table, we can tell DevLake to infer the mappings between `users` and `accounts` with a simple heuristic algorithm based on names and emails. - -a. Send an API request to DevLake to run the mapping algorithm - -``` -curl --location --request POST '127.0.0.1:4000/api/pipelines' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "name": "test", - "plan":[ - [ - { - "plugin": "org", - "subtasks":["connectUserAccountsExact"], - "options":{ - "connectionId":1 - } - } - ] - ] -}' -``` - -b. After successful execution, you can verify the data in `user_accounts` in the database. - -![image](/img/Team/teamflow5.png) - -## Step 5 - Manually update `user_accounts` when the algorithm can't catch everything - -It is recommended to examine the generated `user_accounts` table after running the algorithm. -We'll demonstrate how to manually update `user_accounts` when the mapping is inaccurate/incomplete in this section. -To make manual verification easier, DevLake provides an API for users to download `user_accounts` as a CSV file. -Alternatively, you can verify and modify `user_accounts` all by SQL, see Appendix for more info. - -a. GET http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv(pasting the URL into your browser will download the file). If you prefer using curl: -``` -curl --location --request GET 'http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv' -``` - -![image](/img/Team/teamflow6.png) - -b. If you find the mapping inaccurate or incomplete, you can modify the `user_account_mapping.csv` file and then upload it to DevLake. -For example, here we change the `UserId` of row 'Id=github:GithubAccount:1:1234' in the `user_account_mapping.csv` file to 2. -Then we upload the updated `user_account_mapping.csv` file with the following curl command: - -``` -curl --location --request PUT 'http://127.0.0.1:4000/api/plugins/org/user_account_mapping.csv' --form 'file=@"/path/to/user_account_mapping.csv"' -``` - -c. You can verify the data in the `user_accounts` table has been updated. - -![image](/img/Team/teamflow7.png) - -## Appendix A: how to connect to the database - -Here we use MySQL as an example. You can install database management tools like Sequel Ace, DataGrip, MySQLWorkbench, etc. - - -Or through the command line: - -``` -mysql -h -u -p -P -``` - -## Appendix B: how to examine `user_accounts` via SQL - -``` -SELECT a.id as account_id, a.email, a.user_name as account_user_name, u.id as user_id, u.name as real_name -FROM accounts a - join user_accounts ua on a.id = ua.account_id - join users u on ua.user_id = u.id -``` diff --git a/versioned_docs/version-v0.14/UserManuals/_category_.json b/versioned_docs/version-v0.14/UserManuals/_category_.json deleted file mode 100644 index 23ce768a59c..00000000000 --- a/versioned_docs/version-v0.14/UserManuals/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "User Manuals", - "position": 3, - "link":{ - "type": "generated-index", - "slug": "UserManuals" - } -} diff --git a/versioned_sidebars/version-v0.11-sidebars.json b/versioned_sidebars/version-v0.11-sidebars.json deleted file mode 100644 index 39332bfe752..00000000000 --- a/versioned_sidebars/version-v0.11-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/versioned_sidebars/version-v0.12-sidebars.json b/versioned_sidebars/version-v0.12-sidebars.json deleted file mode 100644 index 39332bfe752..00000000000 --- a/versioned_sidebars/version-v0.12-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/versioned_sidebars/version-v0.13-sidebars.json b/versioned_sidebars/version-v0.13-sidebars.json deleted file mode 100644 index 39332bfe752..00000000000 --- a/versioned_sidebars/version-v0.13-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/versioned_sidebars/version-v0.14-sidebars.json b/versioned_sidebars/version-v0.14-sidebars.json deleted file mode 100644 index 39332bfe752..00000000000 --- a/versioned_sidebars/version-v0.14-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/versions.json b/versions.json index 49b69a9fff2..edcdf565c28 100644 --- a/versions.json +++ b/versions.json @@ -4,9 +4,5 @@ "v0.18", "v0.17", "v0.16", - "v0.15", - "v0.14", - "v0.13", - "v0.12", - "v0.11" + "v0.15" ]