From 11dfd38d1f02cdb6fa4a4f1dade119db5fdaf2fa Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sat, 3 Apr 2021 18:59:59 -0400 Subject: [PATCH 1/3] Use `--emit=unshared-files` instead of `--print=unversioned-files` This avoids false positives for the essential files. Previously, docs.rs would copy any files with a resource suffix as an essential file, even if it couldn't be shared between crates (such as search-index.js). --- src/docbuilder/rustwide_builder.rs | 72 ++++++++---------------------- src/utils/copy.rs | 43 +++++++++--------- src/utils/mod.rs | 2 +- 3 files changed, 42 insertions(+), 75 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 574830c7f..9bbee8d75 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -14,13 +14,13 @@ use docsrs_metadata::{Metadata, DEFAULT_TARGETS, HOST_TARGET}; use failure::ResultExt; use log::{debug, info, warn, LevelFilter}; use postgres::Client; -use rustwide::cmd::{Binary, Command, SandboxBuilder, SandboxImage}; +use rustwide::cmd::{Command, SandboxBuilder, SandboxImage}; use rustwide::logging::{self, LogStorage}; use rustwide::toolchain::ToolchainError; use rustwide::{Build, Crate, Toolchain, Workspace, WorkspaceBuilder}; use serde_json::Value; use std::collections::{HashMap, HashSet}; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::Arc; const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)"; @@ -182,26 +182,22 @@ impl RustwideBuilder { let krate = Crate::crates_io(DUMMY_CRATE_NAME, DUMMY_CRATE_VERSION); krate.fetch(&self.workspace)?; - // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. - struct Rustdoc<'a> { - toolchain_version: &'a str, - } - impl rustwide::cmd::Runnable for Rustdoc<'_> { - fn name(&self) -> Binary { - Binary::ManagedByRustwide(PathBuf::from("rustdoc")) - } - - fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { - cmd.args(&[format!("+{}", self.toolchain_version)]) - } - } - build_dir .build(&self.toolchain, &krate, self.prepare_sandbox(&limits)) .run(|build| { let metadata = Metadata::from_crate_root(&build.host_source_dir())?; - let res = self.execute_build(HOST_TARGET, true, build, &limits, &metadata)?; + let rustdoc_flags = vec![ + "--emit=unversioned-shared-resources,toolchain-shared-resources".to_string(), + ]; + let res = self.execute_build( + HOST_TARGET, + true, + build, + &limits, + &metadata, + rustdoc_flags, + )?; if !res.result.successful { failure::bail!("failed to build dummy crate for {}", self.rustc_version); } @@ -211,39 +207,7 @@ impl RustwideBuilder { let dest = tempfile::Builder::new() .prefix("essential-files") .tempdir()?; - - let toolchain_version = self.toolchain.as_dist().unwrap().name(); - let output = build.cmd(Rustdoc { toolchain_version }) - .args(&["-Zunstable-options", "--print=unversioned-files"]) - .run_capture() - .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; - let essential_files_unversioned = output - .stdout_lines() - .iter() - .map(PathBuf::from); - let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); - let essential_files_versioned: Vec<_> = source.read_dir()? - .collect::, _>>()? - .into_iter() - .filter_map(|entry| { - entry.file_name().to_str().and_then(|name| if name.contains(&resource_suffix) { - Some(entry.file_name().into()) - } else { None }) - }) - .collect(); - for file_name in essential_files_unversioned.chain(essential_files_versioned) { - let source_path = source.join(&file_name); - let dest_path = dest.path().join(&file_name); - debug!("copying {} to {}", source_path.display(), dest_path.display()); - ::std::fs::copy(&source_path, &dest_path).with_context(|_| { - format!( - "couldn't copy '{}' to '{}'", - source_path.display(), - dest_path.display() - ) - })?; - } - + crate::utils::copy_dir_all(source, &dest, |_| true)?; add_path_into_database(&self.storage, "", &dest)?; conn.query( "INSERT INTO config (name, value) VALUES ('rustc_version', $1) \ @@ -352,7 +316,8 @@ impl RustwideBuilder { } = metadata.targets(self.config.include_default_targets); // Perform an initial build - let res = self.execute_build(default_target, true, &build, &limits, &metadata)?; + let res = + self.execute_build(default_target, true, &build, &limits, &metadata, vec![])?; if res.result.successful { if let Some(name) = res.cargo_metadata.root().library_name() { let host_target = build.host_target_dir(); @@ -458,7 +423,7 @@ impl RustwideBuilder { successful_targets: &mut Vec, metadata: &Metadata, ) -> Result<()> { - let target_res = self.execute_build(target, false, build, limits, metadata)?; + let target_res = self.execute_build(target, false, build, limits, metadata, Vec::new())?; if target_res.result.successful { // Cargo is not giving any error and not generating documentation of some crates // when we use a target compile options. Check documentation exists before @@ -534,12 +499,11 @@ impl RustwideBuilder { build: &Build, limits: &Limits, metadata: &Metadata, + mut rustdoc_flags: Vec, ) -> Result { let cargo_metadata = CargoMetadata::load(&self.workspace, &self.toolchain, &build.host_source_dir())?; - let mut rustdoc_flags = Vec::new(); - rustdoc_flags.extend(vec![ "--resource-suffix".to_string(), format!("-{}", parse_rustc_version(&self.rustc_version)?), diff --git a/src/utils/copy.rs b/src/utils/copy.rs index 9e30efcd0..bffea4053 100644 --- a/src/utils/copy.rs +++ b/src/utils/copy.rs @@ -1,7 +1,9 @@ -use crate::error::Result; +use std::ffi::OsStr; use std::fs; +use std::io; use std::path::Path; +use crate::error::Result; use regex::Regex; /// Copies documentation from a crate's target directory to destination. @@ -9,31 +11,32 @@ use regex::Regex; /// Target directory must have doc directory. /// /// This function is designed to avoid file duplications. -pub fn copy_doc_dir, Q: AsRef>(source: P, destination: Q) -> Result<()> { - let destination = destination.as_ref(); - - // Make sure destination directory exists - if !destination.exists() { - fs::create_dir_all(destination)?; - } - +pub(crate) fn copy_doc_dir(source: impl AsRef, destination: impl AsRef) -> Result<()> { // Avoid copying common files let dup_regex = Regex::new( r"(\.lock|\.txt|\.woff|\.svg|\.css|main-.*\.css|main-.*\.js|normalize-.*\.js|rustdoc-.*\.css|storage-.*\.js|theme-.*\.js)$") .unwrap(); - for file in source.as_ref().read_dir()? { - let file = file?; - let destination_full_path = destination.join(file.file_name()); - - let metadata = file.metadata()?; + copy_dir_all(source, destination, |filename| { + dup_regex.is_match(filename.to_str().unwrap()) + }) + .map_err(Into::into) +} - if metadata.is_dir() { - copy_doc_dir(file.path(), destination_full_path)? - } else if dup_regex.is_match(&file.file_name().into_string().unwrap()[..]) { - continue; - } else { - fs::copy(&file.path(), &destination_full_path)?; +pub(crate) fn copy_dir_all( + src: impl AsRef, + dst: impl AsRef, + should_copy: impl Copy + Fn(&OsStr) -> bool, +) -> io::Result<()> { + let dst = dst.as_ref(); + fs::create_dir_all(dst)?; + for entry in fs::read_dir(src)? { + let entry = entry?; + let filename = entry.file_name(); + if entry.file_type()?.is_dir() { + copy_dir_all(entry.path(), dst.join(filename), should_copy)?; + } else if should_copy(&filename) { + fs::copy(entry.path(), dst.join(filename))?; } } Ok(()) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index eccc03461..b9bacc00d 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,7 +1,7 @@ //! Various utilities for docs.rs pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage}; -pub(crate) use self::copy::copy_doc_dir; +pub(crate) use self::copy::{copy_dir_all, copy_doc_dir}; pub use self::daemon::start_daemon; pub use self::github_updater::GithubUpdater; pub(crate) use self::html::rewrite_lol; From a68f2c2d2521592240e8e3400a150785e786bcb9 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sat, 3 Apr 2021 19:05:59 -0400 Subject: [PATCH 2/3] Pass --emit=invocation-specific for crate builds This avoids copying shared files for crates other than the empty library. This a) Allows removing `copy_doc_dir`, which allows crates to add their own custom CSS (with `--theme` or otherwise), and b) Avoids unnecessary disk IO. Here are the files generated for a build of regex 1.4.1: ``` cratesfyi=# select path from files where path like '%regex/1.4.1%' and path not like '%regex/1.4.1/%/%'; path ----------------------------------------------------------------------- sources/regex/1.4.1/PERFORMANCE.md rustdoc/regex/1.4.1/.lock sources/regex/1.4.1/Cargo.toml sources/regex/1.4.1/.cargo_vcs_info.json sources/regex/1.4.1/LICENSE-APACHE rustdoc/regex/1.4.1/source-files-20210402-1.53.0-nightly-138fd56cf.js rustdoc/regex/1.4.1/settings.html rustdoc/regex/1.4.1/search-index-20210402-1.53.0-nightly-138fd56cf.js rustdoc/regex/1.4.1/crates-20210402-1.53.0-nightly-138fd56cf.js sources/regex/1.4.1/CHANGELOG.md sources/regex/1.4.1/LICENSE-MIT sources/regex/1.4.1/rustfmt.toml sources/regex/1.4.1/HACKING.md sources/regex/1.4.1/test sources/regex/1.4.1/Cargo.toml.orig sources/regex/1.4.1/UNICODE.md sources/regex/1.4.1/.gitignore sources/regex/1.4.1/Cargo.lock sources/regex/1.4.1/README.md (19 rows) ``` Note in particular that search-index.js and crates.js are included, but not shared files like `storage.js` or `rustdoc.css`. --- src/docbuilder/rustwide_builder.rs | 30 +++++++++----------- src/utils/copy.rs | 44 ++++-------------------------- src/utils/mod.rs | 2 +- 3 files changed, 19 insertions(+), 57 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 9bbee8d75..e155b2694 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -7,7 +7,7 @@ use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; use crate::index::api::ReleaseData; use crate::storage::CompressionAlgorithms; -use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata, GithubUpdater}; +use crate::utils::{copy_dir_all, parse_rustc_version, CargoMetadata, GithubUpdater}; use crate::{db::blacklist::is_blacklisted, utils::MetadataPackage}; use crate::{Config, Context, Index, Metrics, Storage}; use docsrs_metadata::{Metadata, DEFAULT_TARGETS, HOST_TARGET}; @@ -187,17 +187,7 @@ impl RustwideBuilder { .run(|build| { let metadata = Metadata::from_crate_root(&build.host_source_dir())?; - let rustdoc_flags = vec![ - "--emit=unversioned-shared-resources,toolchain-shared-resources".to_string(), - ]; - let res = self.execute_build( - HOST_TARGET, - true, - build, - &limits, - &metadata, - rustdoc_flags, - )?; + let res = self.execute_build(HOST_TARGET, true, build, &limits, &metadata, true)?; if !res.result.successful { failure::bail!("failed to build dummy crate for {}", self.rustc_version); } @@ -207,7 +197,7 @@ impl RustwideBuilder { let dest = tempfile::Builder::new() .prefix("essential-files") .tempdir()?; - crate::utils::copy_dir_all(source, &dest, |_| true)?; + copy_dir_all(source, &dest)?; add_path_into_database(&self.storage, "", &dest)?; conn.query( "INSERT INTO config (name, value) VALUES ('rustc_version', $1) \ @@ -317,7 +307,7 @@ impl RustwideBuilder { // Perform an initial build let res = - self.execute_build(default_target, true, &build, &limits, &metadata, vec![])?; + self.execute_build(default_target, true, &build, &limits, &metadata, false)?; if res.result.successful { if let Some(name) = res.cargo_metadata.root().library_name() { let host_target = build.host_target_dir(); @@ -423,7 +413,7 @@ impl RustwideBuilder { successful_targets: &mut Vec, metadata: &Metadata, ) -> Result<()> { - let target_res = self.execute_build(target, false, build, limits, metadata, Vec::new())?; + let target_res = self.execute_build(target, false, build, limits, metadata, false)?; if target_res.result.successful { // Cargo is not giving any error and not generating documentation of some crates // when we use a target compile options. Check documentation exists before @@ -499,11 +489,17 @@ impl RustwideBuilder { build: &Build, limits: &Limits, metadata: &Metadata, - mut rustdoc_flags: Vec, + create_essential_files: bool, ) -> Result { let cargo_metadata = CargoMetadata::load(&self.workspace, &self.toolchain, &build.host_source_dir())?; + let mut rustdoc_flags = vec![if create_essential_files { + "--emit=unversioned-shared-resources,toolchain-shared-resources" + } else { + "--emit=invocation-specific" + } + .to_string()]; rustdoc_flags.extend(vec![ "--resource-suffix".to_string(), format!("-{}", parse_rustc_version(&self.rustc_version)?), @@ -620,7 +616,7 @@ impl RustwideBuilder { } info!("{} {}", source.display(), dest.display()); - copy_doc_dir(source, dest) + copy_dir_all(source, dest).map_err(Into::into) } fn upload_docs( diff --git a/src/utils/copy.rs b/src/utils/copy.rs index bffea4053..c690267b7 100644 --- a/src/utils/copy.rs +++ b/src/utils/copy.rs @@ -1,41 +1,17 @@ -use std::ffi::OsStr; use std::fs; use std::io; use std::path::Path; -use crate::error::Result; -use regex::Regex; - -/// Copies documentation from a crate's target directory to destination. -/// -/// Target directory must have doc directory. -/// -/// This function is designed to avoid file duplications. -pub(crate) fn copy_doc_dir(source: impl AsRef, destination: impl AsRef) -> Result<()> { - // Avoid copying common files - let dup_regex = Regex::new( - r"(\.lock|\.txt|\.woff|\.svg|\.css|main-.*\.css|main-.*\.js|normalize-.*\.js|rustdoc-.*\.css|storage-.*\.js|theme-.*\.js)$") - .unwrap(); - - copy_dir_all(source, destination, |filename| { - dup_regex.is_match(filename.to_str().unwrap()) - }) - .map_err(Into::into) -} - -pub(crate) fn copy_dir_all( - src: impl AsRef, - dst: impl AsRef, - should_copy: impl Copy + Fn(&OsStr) -> bool, -) -> io::Result<()> { +/// cp -r src dst +pub(crate) fn copy_dir_all(src: impl AsRef, dst: impl AsRef) -> io::Result<()> { let dst = dst.as_ref(); fs::create_dir_all(dst)?; for entry in fs::read_dir(src)? { let entry = entry?; let filename = entry.file_name(); if entry.file_type()?.is_dir() { - copy_dir_all(entry.path(), dst.join(filename), should_copy)?; - } else if should_copy(&filename) { + copy_dir_all(entry.path(), dst.join(filename))?; + } else { fs::copy(entry.path(), dst.join(filename))?; } } @@ -62,21 +38,11 @@ mod test { fs::create_dir(doc.join("inner")).unwrap(); fs::write(doc.join("index.html"), "spooky").unwrap(); - fs::write(doc.join("index.txt"), "spooky").unwrap(); fs::write(doc.join("inner").join("index.html"), "spooky").unwrap(); - fs::write(doc.join("inner").join("index.txt"), "spooky").unwrap(); - fs::write(doc.join("inner").join("important.svg"), "").unwrap(); // lets try to copy a src directory to tempdir - copy_doc_dir(source.path().join("doc"), destination.path()).unwrap(); + copy_dir_all(source.path().join("doc"), destination.path()).unwrap(); assert!(destination.path().join("index.html").exists()); - assert!(!destination.path().join("index.txt").exists()); assert!(destination.path().join("inner").join("index.html").exists()); - assert!(!destination.path().join("inner").join("index.txt").exists()); - assert!(!destination - .path() - .join("inner") - .join("important.svg") - .exists()); } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index b9bacc00d..aa3dd8f16 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,7 +1,7 @@ //! Various utilities for docs.rs pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage}; -pub(crate) use self::copy::{copy_dir_all, copy_doc_dir}; +pub(crate) use self::copy::copy_dir_all; pub use self::daemon::start_daemon; pub use self::github_updater::GithubUpdater; pub(crate) use self::html::rewrite_lol; From d9a14bcc6e777b728bf27d6f18dd152231973a2d Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sat, 3 Apr 2021 19:34:18 -0400 Subject: [PATCH 3/3] Give precedence to global shared files over local ones This avoids broken docs for crates build with an old version of rustdoc that didn't respect the shared static root. --- src/web/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/web/mod.rs b/src/web/mod.rs index 8a254bb60..ccc51bedc 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -157,9 +157,9 @@ impl Handler for CratesfyiHandler { handle: impl FnOnce() -> IronResult, ) -> IronResult { if e.response.status == Some(status::NotFound) { - // the routes are ordered from most specific to least; give precedence to the - // original error message. - handle().or(Err(e)) + // the routes are ordered from least specific to most; give precedence to the + // new error message. + handle() } else { Err(e) } @@ -176,11 +176,11 @@ impl Handler for CratesfyiHandler { // specific path means that buggy docs from 2018 will have missing CSS (#1181) so until // that's fixed, we need to keep the current (buggy) behavior. // - // It's important that `router_handler` comes first so that local rustdoc files take - // precedence over global ones (see #1324). - self.router_handler + // It's important that `shared_resource_handler` comes first so that global rustdoc files take + // precedence over local ones (see #1327). + self.shared_resource_handler .handle(req) - .or_else(|e| if_404(e, || self.shared_resource_handler.handle(req))) + .or_else(|e| if_404(e, || self.router_handler.handle(req))) .or_else(|e| { let err = if let Some(err) = e.error.downcast_ref::() { *err