Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: add copyright notices to COPYRIGHT.md #4

Closed
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,8 @@ dependencies = [
"anyhow",
"serde",
"serde_json",
"tempfile",
"thiserror",
]

[[package]]
Expand Down
5 changes: 4 additions & 1 deletion src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,17 @@ impl Step for GenerateCopyright {
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let license_metadata = builder.ensure(CollectLicenseMetadata);
// let license_metadata = builder.ensure(CollectLicenseMetadata);
let license_metadata = builder.out.join("license-metadata.json");

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("OUT_DIR", &builder.out);
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);

dest
Expand Down
2 changes: 2 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
license = "MIT OR Apache-2.0"

[dependencies]
anyhow = "1.0.65"
Expand Down
5 changes: 5 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ use std::path::PathBuf;
const CONDENSED_DIRECTORIES: &[(&str, &str)] =
&[("./src/llvm-project/", "./src/llvm-project/README.md")];

/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
///
/// Run `x.py run collect-license-metadata`
fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
Expand Down
3 changes: 3 additions & 0 deletions src/tools/generate-copyright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
name = "generate-copyright"
version = "0.1.0"
edition = "2021"
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
thiserror = "1"
tempfile = "3"
222 changes: 222 additions & 0 deletions src/tools/generate-copyright/src/cargo_metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
//! Gets metadata about a workspace from Cargo

use std::collections::{BTreeMap, BTreeSet};
use std::ffi::{OsStr, OsString};
use std::path::Path;

/// Describes how this module can fail
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Failed to run cargo metadata: {0:?}")]
LaunchingMetadata(#[from] std::io::Error),
#[error("Failed get output from cargo metadata: {0:?}")]
GettingMetadata(String),
#[error("Failed parse JSON output from cargo metadata: {0:?}")]
ParsingJson(#[from] serde_json::Error),
#[error("Failed find expected JSON element {0} in output from cargo metadata")]
MissingJsonElement(&'static str),
#[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")]
MissingJsonElementForPackage(String, String),
#[error("Failed to run cargo vendor: {0:?}")]
LaunchingVendor(std::io::Error),
#[error("Failed to complete cargo vendor")]
RunningVendor,
}

/// Describes one of our dependencies
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Dependency {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
pub authors: Vec<String>,
/// A list of important files from the package, with their contents.
///
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
pub notices: BTreeMap<OsString, String>,
}

/// Use `cargo` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeSet<Dependency>, Error> {
let mut temp_set = BTreeSet::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
}
let metadata_json = get_metadata_json(cargo, manifest_path)?;
let packages = metadata_json["packages"]
.as_array()
.ok_or_else(|| Error::MissingJsonElement("packages array"))?;
for package in packages {
let package =
package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?;
let manifest_path = package
.get("manifest_path")
.and_then(|v| v.as_str())
.map(Path::new)
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
if manifest_path.starts_with(&root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
// otherwise it's an out-of-tree dependency
let get_string = |field_name: &str, package_name: &str| {
package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| {
Error::MissingJsonElementForPackage(
format!("package.{field_name}"),
package_name.to_owned(),
)
})
};
let name = get_string("name", "unknown")?;
let license = get_string("license", name)?;
let version = get_string("version", name)?;
let authors_list = package
.get("authors")
.and_then(|v| v.as_array())
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
let authors: Vec<String> =
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
temp_set.insert(Dependency {
name: name.to_owned(),
version: version.to_owned(),
license: license.to_owned(),
authors,
notices: BTreeMap::new(),
});
}
}

// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;

// Now for each dependency we found, go and grab any important looking files
let mut output = BTreeSet::new();
for mut dep in temp_set {
load_important_files(&mut dep, &vendor_path)?;
output.insert(dep);
}

Ok(output)
}

/// Get cargo-metdata for a package, as JSON
fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::Value, Error> {
let metadata_output = std::process::Command::new(cargo)
.arg("metadata")
.arg("--format-version=1")
.arg("--all-features")
.arg("--manifest-path")
.arg(manifest_path)
.env("RUSTC_BOOTSTRAP", "1")
.output()
.map_err(|e| Error::LaunchingMetadata(e))?;
if !metadata_output.status.success() {
return Err(Error::GettingMetadata(
String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
));
}
let json = serde_json::from_slice(&metadata_output.stdout)?;
Ok(json)
}

/// Run cargo-vendor, fetching into the given dir
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
let mut vendor_command = std::process::Command::new(cargo);
vendor_command.env("RUSTC_BOOTSTRAP", "1");
vendor_command.arg("vendor");
vendor_command.arg("--quiet");
vendor_command.arg("--versioned-dirs");
for manifest_path in manifest_paths {
vendor_command.arg("-s");
vendor_command.arg(manifest_path);
}
vendor_command.arg(dest);

let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;

if !vendor_status.success() {
return Err(Error::RunningVendor);
}

Ok(())
}

/// Add important files off disk into this dependency.
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
let name_version = format!("{}-{}", dep.name, dep.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {
let entry = entry?;
let metadata = entry.metadata()?;
let path = entry.path();
if let Some(filename) = path.file_name() {
let lc_filename = filename.to_ascii_lowercase();
let lc_filename_str = lc_filename.to_string_lossy();
let mut keep = false;
for m in ["copyright", "licence", "license", "author", "notice"] {
if lc_filename_str.contains(m) {
keep = true;
break;
}
}
if keep {
if metadata.is_dir() {
// scoop up whole directory
} else if metadata.is_file() {
println!("Scraping {}", filename.to_string_lossy());
dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?);
}
}
}
}
Ok(())
}

// /// Fetch any important notices for this package
// fn fetch_notices(cargo: &Path, package_name: &str, package_version: &str) -> Result<BTreeMap<OsString, String>, Error> {
// println!("Fetching for {package_name} {package_version}");
// let mut output = BTreeMap::new();
// // 1. Make a temp directory
// let workarea = tempfile::tempdir()?;
// // 2. Run `cargo download` into it
// let name_version = format!("{package_name}={package_version}");
// let download_output = std::process::Command::new(cargo)
// .arg("download")
// .arg("-o")
// .arg(workarea.path())
// .arg("-x")
// .arg(&name_version)
// .env("RUSTC_BOOTSTRAP", "1")
// .output()
// .map_err(|e| Error::LaunchingDownload(e, name_version.clone()))?;
// if !download_output.status.success() {
// return Err(Error::GettingDownload(
// String::from_utf8(download_output.stderr).expect("UTF-8 output from cargo"),
// name_version.clone()
// ));
// }
// Ok(output)
// }
Loading