feat(core): use rust utilities for caching operations (#17638)

This commit is contained in:
Jonathan Cammisuli 2023-06-27 11:08:22 -04:00 committed by GitHub
parent 77ca8d7c33
commit d3272108c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 307 additions and 123 deletions

18
Cargo.lock generated
View File

@ -355,6 +355,12 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsevent-sys"
version = "4.1.0"
@ -1159,6 +1165,7 @@ dependencies = [
"assert_fs",
"colored",
"crossbeam-channel",
"fs_extra",
"globset",
"hashbrown",
"ignore",
@ -1172,8 +1179,10 @@ dependencies = [
"serde",
"serde_json",
"thiserror",
"tokio",
"tracing",
"tracing-subscriber",
"walkdir",
"watchexec",
"watchexec-events",
"watchexec-filterer-ignore",
@ -1661,9 +1670,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.28.1"
version = "1.28.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105"
checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2"
dependencies = [
"autocfg",
"bytes",
@ -1799,12 +1808,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]

View File

@ -4,32 +4,31 @@ version = '0.1.0'
edition = '2021'
[dependencies]
xxhash-rust = { version = '0.8.5', features = ['xxh3', 'xxh64'] }
anyhow = "1.0.71"
colored = "2"
crossbeam-channel = '0.5'
fs_extra = "1.3.0"
globset = "0.4.10"
hashbrown = { version = "0.14.0", features = ["rayon"] }
ignore = '0.4'
ignore-files = "1.3.0"
itertools = "0.10.5"
jsonc-parser = { version = "0.21.1", features = ["serde"] }
napi = { version = '2.12.6', default-features = false, features = ['anyhow', 'napi4', 'tokio_rt'] }
napi-derive = '2.9.3'
ignore = '0.4'
crossbeam-channel = '0.5'
globset = "0.4.10"
ignore-files = "1.3.0"
watchexec = "2.3.0"
watchexec-filterer-ignore = "1.2.1"
watchexec-events = "1.0.0"
watchexec-signals = "1.0.0"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"]}
anyhow = "1.0.71"
thiserror = "1.0.40"
itertools = "0.10.5"
rayon = "1.7.0"
hashbrown = {version = "0.14.0", features = ["rayon"]}
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
jsonc-parser = {version = "0.21.1", features = ["serde"] }
colored = "2"
thiserror = "1.0.40"
tokio = { version = "1.28.2", features = ["fs"] }
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
walkdir = '2.3.3'
watchexec = "2.3.0"
watchexec-events = "1.0.0"
watchexec-filterer-ignore = "1.2.1"
watchexec-signals = "1.0.0"
xxhash-rust = { version = '0.8.5', features = ['xxh3', 'xxh64'] }
[lib]
crate-type = ['cdylib']

View File

@ -0,0 +1,83 @@
use std::path::PathBuf;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use crate::native::walker::nx_walker_sync;
#[napi]
/// Expands the given entries into a list of existing files.
/// First checks if the entry exists, if not, it will glob the working directory to find the file.
pub fn expand_outputs(directory: String, entries: Vec<String>) -> anyhow::Result<Vec<String>> {
let directory: PathBuf = directory.into();
let (existing_paths, not_found): (Vec<_>, Vec<_>) = entries.into_iter().partition(|entry| {
let path = directory.join(entry);
path.exists()
});
if not_found.is_empty() {
return Ok(existing_paths);
}
let glob_set = build_glob_set(not_found)?;
let found_paths = nx_walker_sync(directory)
.filter_map(|path| {
if glob_set.is_match(&path) {
Some(path.to_normalized_string())
} else {
None
}
})
.chain(existing_paths);
Ok(found_paths.collect())
}
#[cfg(test)]
mod test {
use super::*;
use assert_fs::prelude::*;
use assert_fs::TempDir;
use std::{assert_eq, vec};
fn setup_fs() -> TempDir {
let temp = TempDir::new().unwrap();
temp.child("test.txt").touch().unwrap();
temp.child("foo.txt").touch().unwrap();
temp.child("bar.txt").touch().unwrap();
temp.child("baz").child("qux.txt").touch().unwrap();
temp.child("nested")
.child("deeply")
.child("nx.darwin-arm64.node")
.touch()
.unwrap();
temp.child("folder").child("nested-folder").touch().unwrap();
temp.child("packages")
.child("nx")
.child("src")
.child("native")
.child("nx.darwin-arm64.node")
.touch()
.unwrap();
temp
}
#[test]
fn should_expand_outputs() {
let temp = setup_fs();
let entries = vec![
"packages/nx/src/native/*.node".to_string(),
"folder/nested-folder".to_string(),
"test.txt".to_string(),
];
let mut result = expand_outputs(temp.display().to_string(), entries).unwrap();
result.sort();
assert_eq!(
result,
vec![
"folder/nested-folder",
"packages/nx/src/native/nx.darwin-arm64.node",
"test.txt"
]
);
}
}

View File

@ -0,0 +1,65 @@
use std::fs;
use std::path::PathBuf;
#[napi]
pub fn remove(src: String) -> anyhow::Result<()> {
fs_extra::remove_items(&[src]).map_err(anyhow::Error::from)
}
#[napi]
pub fn copy(src: String, dest: String) -> anyhow::Result<()> {
let copy_options = fs_extra::dir::CopyOptions::new()
.overwrite(true)
.skip_exist(false);
let dest: PathBuf = dest.into();
let dest_parent = dest.parent().unwrap_or(&dest);
if !dest_parent.exists() {
fs::create_dir_all(dest_parent)?;
}
fs_extra::copy_items(&[src], dest_parent, &copy_options)?;
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use assert_fs::prelude::*;
use assert_fs::TempDir;
#[test]
fn should_copy_directories() {
let temp = TempDir::new().unwrap();
temp.child("parent")
.child("child")
.child("grand-child")
.child(".config")
.child("file.txt")
.touch()
.unwrap();
let src = temp.join("parent/child/grand-child/.config");
let dest = temp.join("new-parent/child/grand-child/.config");
copy(src.to_string_lossy().into(), dest.to_string_lossy().into()).unwrap();
assert!(temp
.child("new-parent/child/grand-child/.config/file.txt")
.exists());
}
#[test]
fn should_copy_single_files() {
let temp = TempDir::new().unwrap();
temp.child("parent")
.child("file.txt")
.write_str("content")
.unwrap();
let src = temp.join("parent/file.txt");
let dest = temp.join("new-parent/file.txt");
copy(src.to_string_lossy().into(), dest.to_string_lossy().into()).unwrap();
assert!(temp.child("new-parent/file.txt").exists());
}
}

2
packages/nx/src/native/cache/mod.rs vendored Normal file
View File

@ -0,0 +1,2 @@
pub mod expand_outputs;
pub mod file_ops;

View File

@ -1,31 +1,20 @@
#![allow(unused)]
use crate::native::parallel_walker::nx_walker;
use crate::native::types::FileData;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use anyhow::anyhow;
use crossbeam_channel::unbounded;
use globset::{Glob, GlobSetBuilder};
use ignore::WalkBuilder;
use itertools::Itertools;
use std::cmp::Ordering;
use crate::native::walker::nx_walker;
use rayon::prelude::*;
use std::collections::HashMap;
use std::path::Path;
use std::thread::available_parallelism;
use xxhash_rust::xxh3;
type FileHashes = HashMap<String, String>;
#[napi]
fn hash_array(input: Vec<String>) -> String {
pub fn hash_array(input: Vec<String>) -> String {
let joined = input.join(",");
let content = joined.as_bytes();
xxh3::xxh3_64(content).to_string()
}
#[napi]
fn hash_file(file: String) -> Option<FileData> {
pub fn hash_file(file: String) -> Option<FileData> {
let Ok(content) = std::fs::read(&file) else {
return None;
};
@ -36,7 +25,7 @@ fn hash_file(file: String) -> Option<FileData> {
}
#[napi]
fn hash_files(workspace_root: String) -> HashMap<String, String> {
pub fn hash_files(workspace_root: String) -> HashMap<String, String> {
nx_walker(workspace_root, |rec| {
let mut collection: HashMap<String, String> = HashMap::new();
for (path, content) in rec {
@ -74,7 +63,7 @@ fn hash_files_matching_globs(
}
// Sort the file data so that its in deterministically ordered by file path
hashes.sort();
hashes.par_sort();
let sorted_file_hashes: Vec<String> =
hashes.into_iter().map(|file_data| file_data.hash).collect();

View File

@ -3,6 +3,13 @@
/* auto-generated by NAPI-RS */
/**
* Expands the given entries into a list of existing files.
* First checks if the entry exists, if not, it will glob the working directory to find the file.
*/
export function expandOutputs(directory: string, entries: Array<string>): Array<string>
export function remove(src: string): void
export function copy(src: string, dest: string): void
export function hashArray(input: Array<string>): string
export function hashFile(file: string): FileData | null
export function hashFiles(workspaceRoot: string): Record<string, string>

View File

@ -246,8 +246,11 @@ if (!nativeBinding) {
throw new Error(`Failed to load native binding`)
}
const { hashArray, hashFile, hashFiles, hashFilesMatchingGlobs, EventType, Watcher, WorkspaceErrors, getConfigFiles, getWorkspaceFilesNative } = nativeBinding
const { expandOutputs, remove, copy, hashArray, hashFile, hashFiles, hashFilesMatchingGlobs, EventType, Watcher, WorkspaceErrors, getConfigFiles, getWorkspaceFilesNative } = nativeBinding
module.exports.expandOutputs = expandOutputs
module.exports.remove = remove
module.exports.copy = copy
module.exports.hashArray = hashArray
module.exports.hashFile = hashFile
module.exports.hashFiles = hashFiles

View File

@ -1,7 +1,8 @@
pub mod cache;
pub mod hasher;
mod logger;
mod parallel_walker;
mod types;
mod utils;
mod walker;
pub mod watch;
pub mod workspace;

View File

@ -1,14 +1,29 @@
use std::path::Path;
pub trait Normalize {
fn to_normalized_string(&self) -> String;
}
impl Normalize for std::path::Path {
fn to_normalized_string(&self) -> String {
// convert back-slashes in Windows paths, since the js expects only forward-slash path separators
if cfg!(windows) {
self.display().to_string().replace('\\', "/")
} else {
self.display().to_string()
}
normalize_path(self)
}
}
impl Normalize for std::path::PathBuf {
fn to_normalized_string(&self) -> String {
normalize_path(self)
}
}
fn normalize_path<P>(path: P) -> String
where
P: AsRef<Path>,
{
// convert back-slashes in Windows paths, since the js expects only forward-slash path separators
if cfg!(windows) {
path.as_ref().display().to_string().replace('\\', "/")
} else {
path.as_ref().display().to_string()
}
}

View File

@ -7,6 +7,37 @@ use ignore::WalkBuilder;
use crate::native::utils::glob::build_glob_set;
use walkdir::WalkDir;
/// Walks the directory in a single thread and does not ignore any files
/// Should only be used for small directories, and not traversing the whole workspace
pub fn nx_walker_sync<'a, P>(directory: P) -> impl Iterator<Item = PathBuf>
where
P: AsRef<Path> + 'a,
{
let base_dir: PathBuf = directory.as_ref().into();
let ignore_glob_set = build_glob_set(vec![
String::from("**/node_modules"),
String::from("**/.git"),
])
.expect("These static ignores always build");
// Use WalkDir instead of ignore::WalkBuilder because it's faster
WalkDir::new(&base_dir)
.into_iter()
.filter_entry(move |entry| {
let path = entry.path().to_string_lossy();
!ignore_glob_set.is_match(path.as_ref())
})
.filter_map(move |entry| {
entry
.ok()
.and_then(|e| e.path().strip_prefix(&base_dir).ok().map(|p| p.to_owned()))
})
}
/// Walk the directory and ignore files from .gitignore and .nxignore
pub fn nx_walker<P, Fn, Re>(directory: P, f: Fn) -> Re
where
P: AsRef<Path>,
@ -43,8 +74,7 @@ where
Box::new(move |entry| {
use ignore::WalkState::*;
#[rustfmt::skip]
let Ok(dir_entry) = entry else {
let Ok(dir_entry) = entry else {
return Continue;
};
@ -68,11 +98,15 @@ where
#[cfg(test)]
mod test {
use super::*;
use crate::native::utils::path::Normalize;
use std::collections::HashMap;
use std::{assert_eq, vec};
use assert_fs::prelude::*;
use assert_fs::TempDir;
use std::collections::HashMap;
use crate::native::utils::path::Normalize;
use super::*;
///
/// Setup a temporary directory to do testing in

View File

@ -43,11 +43,7 @@ impl Watcher {
let watch_exec = Watchexec::new(InitConfig::default(), RuntimeConfig::default())
.map_err(anyhow::Error::from)?;
let mut globs = if let Some(globs) = additional_globs {
globs
} else {
vec![]
};
let mut globs = additional_globs.unwrap_or_default();
// always ignore the .git and node_modules folder
globs.push(".git/".into());

View File

@ -1,6 +1,6 @@
use crate::native::parallel_walker::nx_walker;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use crate::native::walker::nx_walker;
use globset::GlobSet;
use std::collections::hash_map::Entry;
use std::collections::HashMap;

View File

@ -7,10 +7,10 @@ use tracing::trace;
use xxhash_rust::xxh3;
use crate::native::logger::enable_logger;
use crate::native::parallel_walker::nx_walker;
use crate::native::types::FileData;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use crate::native::walker::nx_walker;
use crate::native::workspace::errors::{InternalWorkspaceErrors, WorkspaceErrors};
use crate::native::workspace::get_config_files::insert_config_file_into_map;
use crate::native::workspace::types::{FileLocation, ProjectConfiguration};
@ -40,7 +40,7 @@ pub fn get_workspace_files_native(
trace!(?root_map);
// Files need to be sorted each time because when we do hashArray in the TaskHasher.js, the order of the files should be deterministic
file_data.sort();
file_data.par_sort();
let file_locations = file_data
.into_par_iter()

View File

@ -1,21 +1,10 @@
import { workspaceRoot } from '../utils/workspace-root';
import {
copy,
lstat,
mkdir,
mkdirSync,
pathExists,
readFile,
remove,
writeFile,
} from 'fs-extra';
import { dirname, join, resolve } from 'path';
import { mkdir, mkdirSync, pathExists, readFile, writeFile } from 'fs-extra';
import { join } from 'path';
import { DefaultTasksRunnerOptions } from './default-tasks-runner';
import { execFile, spawn } from 'child_process';
import { spawn } from 'child_process';
import { cacheDir } from '../utils/cache-directory';
import { platform } from 'os';
import { Task } from '../config/task-graph';
import * as fastGlob from 'fast-glob';
export type CachedResult = {
terminalOutput: string;
@ -29,7 +18,6 @@ export class Cache {
root = workspaceRoot;
cachePath = this.createCacheDir();
terminalOutputsDir = this.createTerminalOutputsDir();
useFsExtraToCopyAndRemove = platform() === 'win32';
constructor(private readonly options: DefaultTasksRunnerOptions) {}
@ -83,7 +71,7 @@ export class Cache {
const tdCommit = join(this.cachePath, `${task.hash}.commit`);
// might be left overs from partially-completed cache invocations
await remove(tdCommit);
await this.remove(tdCommit);
await this.remove(td);
await mkdir(td);
@ -99,11 +87,7 @@ export class Cache {
expandedOutputs.map(async (f) => {
const src = join(this.root, f);
if (await pathExists(src)) {
const isFile = (await lstat(src)).isFile();
const cached = join(td, 'outputs', f);
const directory = isFile ? dirname(cached) : cached;
await mkdir(directory, { recursive: true });
await this.copy(src, cached);
}
})
@ -140,12 +124,8 @@ export class Cache {
expandedOutputs.map(async (f) => {
const cached = join(cachedResult.outputsPath, f);
if (await pathExists(cached)) {
const isFile = (await lstat(cached)).isFile();
const src = join(this.root, f);
await this.remove(src);
// Ensure parent directory is created if src is a file
const directory = isFile ? resolve(src, '..') : src;
await mkdir(directory, { recursive: true });
await this.copy(cached, src);
}
})
@ -168,54 +148,49 @@ export class Cache {
return this._expandOutputs(outputs, cachedResult.outputsPath);
}
private async _expandOutputs(outputs: string[], cwd: string) {
return (
await Promise.all(
outputs.map(async (entry) => {
if (await pathExists(join(cwd, entry))) {
return entry;
}
return fastGlob(entry, { cwd, dot: true });
})
)
).flat();
private async _expandOutputs(
outputs: string[],
cwd: string
): Promise<string[]> {
const { expandOutputs } = require('../native');
performance.mark('expandOutputs:start');
const results = expandOutputs(cwd, outputs);
performance.mark('expandOutputs:end');
performance.measure(
'expandOutputs',
'expandOutputs:start',
'expandOutputs:end'
);
return results;
}
private async copy(src: string, destination: string): Promise<void> {
const { copy } = require('../native');
// 'cp -a /path/dir/ dest/' operates differently to 'cp -a /path/dir dest/'
// --> which means actual build works but subsequent populate from cache (using cp -a) does not
// --> the fix is to remove trailing slashes to ensure consistent & expected behaviour
src = src.replace(/[\/\\]$/, '');
if (this.useFsExtraToCopyAndRemove) {
return copy(src, destination);
}
return new Promise((res, rej) => {
execFile('cp', ['-a', src, dirname(destination)], (error) => {
if (!error) {
res();
} else {
this.useFsExtraToCopyAndRemove = true;
copy(src, destination).then(res, rej);
}
});
try {
copy(src, destination);
res();
} catch (e) {
rej(e);
}
});
}
private async remove(path: string): Promise<void> {
if (this.useFsExtraToCopyAndRemove) {
return remove(path);
}
return new Promise<void>((res, rej) => {
execFile('rm', ['-rf', path], (error) => {
if (!error) {
res();
} else {
this.useFsExtraToCopyAndRemove = true;
remove(path).then(res, rej);
}
});
const { remove } = require('../native');
return new Promise((res, rej) => {
try {
remove(path);
res();
} catch (e) {
rej(e);
}
});
}

View File

@ -349,6 +349,7 @@ export class TaskOrchestrator {
if (doNotSkipCache) {
// cache the results
performance.mark('cache-results-start');
await Promise.all(
results
.filter(
@ -375,6 +376,12 @@ export class TaskOrchestrator {
this.cache.put(task, terminalOutput, outputs, code)
)
);
performance.mark('cache-results-end');
performance.measure(
'cache-results',
'cache-results-start',
'cache-results-end'
);
}
this.options.lifeCycle.endTasks(
results.map((result) => {