Reputation: 4450
This Bash script searches for directories named node_modules
(or a specified folder) within the current working directory and categorizes them based on their size, last modification date, and path.
The problem is that sorting is not working, especially by the size of the files. Sorting by size must be decreasing from the largest to the smallest.
Bash version:
#!/bin/bash
start_time=$(date +%s.%N)
find_dir="node_modules"
sort_by="path"
while [[ "$1" =~ ^- ]]; do
case $1 in
-t|--target)
find_dir="$2"
shift 2
;;
-s|--sort)
sort_by="$2"
shift 2
;;
*)
echo "Invalid option: $1"
exit 1
;;
esac
done
dirs=$(find $(pwd) -type d -name "$find_dir" 2>/dev/null)
json="\"paths\": ["
total_size_kb=0
declare -a results
for dir in $dirs; do
parent_dir=$(dirname "$dir")
if [[ ! "$parent_dir" =~ /$find_dir/ ]]; then
last_mod=$(stat -f "%Sm" -t "%d/%m/%Y %H:%M:%S" "$dir")
size_kb=$(du -sk "$dir" | awk '{print $1}')
total_size_kb=$((total_size_kb + size_kb))
size_mb=$(echo "scale=2; $size_kb/1024" | bc)
if (( $(echo "$size_mb < 1" | bc -l) )); then
size=$(echo "scale=2; $size_kb" | bc)
size="${size} KB"
elif (( $(echo "$size_mb >= 1024" | bc -l) )); then
size=$(echo "scale=2; $size_mb/1024" | bc)
size="${size} GB"
else
size="${size_mb} MB"
fi
results+=("{\"path\": \"$dir\", \"last_mod\": \"$(date -r "$dir" -u +%dd)\", \"size\": \"$size\"}")
fi
done
if [[ "$sort_by" == "size" ]]; then
results=$(for r in "${results[@]}"; do echo "$r"; done | sort -t '"' -k 10 -n -r)
elif [[ "$sort_by" == "path" ]]; then
results=$(for r in "${results[@]}"; do echo "$r"; done | sort -t '"' -k 4)
elif [[ "$sort_by" == "last-mod" ]]; then
results=$(for r in "${results[@]}"; do echo "$r"; done | sort -t '"' -k 8)
fi
json="${json}$(echo "$results" | tr '\n' ',' | sed 's/,$//')"
json="${json}]"
end_time=$(date +%s.%N)
elapsed_time=$(echo "$end_time - $start_time" | bc)
total_size_mb=$(echo "scale=2; $total_size_kb/1024" | bc)
json="{
\"releasable_space\": \"${total_size_mb} MB\",
\"search_completed\": \"$(echo $elapsed_time | cut -d'.' -f1)s\",
${json}
}"
echo "$json"
Js version:
const fs = require('fs').promises;
const path = require('path');
const { execSync } = require('child_process');
const startTime = process.hrtime.bigint();
let targetDir = 'node_modules';
let sortBy = 'path';
let searchPath = '.';
let maxDepth = 5;
let saveFile = null;
let noMaxDepth = false;
const args = process.argv.slice(2);
args.forEach((arg, i) => {
if (arg === '-t' || arg === '--target') targetDir = args[i + 1];
if (arg === '-s' || arg === '--sort') sortBy = args[i + 1];
if (arg === '-p' || arg === '--path') searchPath = args[i + 1];
if (arg === '--save') saveFile = args[i + 1];
if (arg === '--maxDepth') maxDepth = parseInt(args[i + 1], 10);
if (arg === '--noMaxDepth') noMaxDepth = true;
});
(async () => {
if (isNaN(maxDepth) || maxDepth < 0) {
console.error("Error: Invalid value for --maxDepth. It should be a positive integer.");
process.exit(1);
}
const validSortOptions = ['path', 'size', 'last-mod'];
if (!validSortOptions.includes(sortBy)) {
console.error(`Error: Invalid sort option. Valid options are: ${validSortOptions.join(", ")}`);
process.exit(1);
}
try {
await fs.access(searchPath);
} catch (error) {
console.error(`Error: The search path '${searchPath}' does not exist or is not accessible.`);
process.exit(1);
}
if (noMaxDepth) maxDepth = Infinity;
const formatDate = (date) => {
const day = String(date.getDate()).padStart(2, '0');
const month = String(date.getMonth() + 1).padStart(2, '0');
const year = date.getFullYear();
const hours = String(date.getHours()).padStart(2, '0');
const minutes = String(date.getMinutes()).padStart(2, '0');
const seconds = String(date.getSeconds()).padStart(2, '0');
return `${day}/${month}/${year} ${hours}:${minutes}:${seconds}`;
};
const formatSize = (sizeInKB) => {
if (sizeInKB < 1024) {
return `${sizeInKB} KB`;
}
const sizeInMB = sizeInKB / 1024;
if (sizeInMB < 1024) {
return `${sizeInMB.toFixed(2)} MB`;
}
const sizeInGB = sizeInMB / 1024;
return `${sizeInGB.toFixed(2)} GB`;
};
const formatTime = (elapsedTime) => {
const seconds = Number(elapsedTime) / 1e9;
if (seconds < 60) {
return `${seconds.toFixed(2)}s`;
}
const minutes = seconds / 60;
if (minutes < 60) {
return `${minutes.toFixed(2)}m`;
}
const hours = minutes / 60;
return `${hours.toFixed(2)}h`;
};
const getDaysDifference = (lastModDate) => {
const today = new Date();
const diffTime = today - new Date(lastModDate);
const diffDays = Math.floor(diffTime / (1000 * 3600 * 24));
return diffDays;
};
const getDirectories = async (dirPath, depth = 0) => {
if (maxDepth !== Infinity && depth > maxDepth) return [];
const dirs = [];
try {
const items = await fs.readdir(dirPath);
for (const item of items) {
const fullPath = path.resolve(dirPath, item);
try {
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
dirs.push(fullPath);
if (!fullPath.includes(targetDir)) {
dirs.push(...await getDirectories(fullPath, depth + 1));
}
}
} catch (e) {
//console.error(`Error reading directory ${fullPath}:`, e);
}
}
} catch (err) {
//console.error(`Error reading path ${dirPath}:`, err);
}
return dirs;
};
const getDirInfo = async (dir) => {
let size = 0;
const isWindows = process.platform === 'win32';
if (isWindows) {
try {
const result = execSync(`dir /s /a "${dir}"`).toString();
const match = result.match(/bytes free/g);
size = match ? parseInt(match[0].split(" ")[0]) : 0;
} catch (e) {
console.error(`Error calculating size for ${dir}:`, e);
}
} else {
try {
const result = execSync(`du -sk "${dir}"`).toString();
size = parseInt(result.split("\t")[0]);
} catch (e) {
console.error(`Error calculating size for ${dir}:`, e);
}
}
const lastMod = await fs.stat(dir);
const lastModDate = lastMod.mtime;
const lastModDay = getDaysDifference(lastModDate);
return { path: dir, size, lastMod: formatDate(lastModDate), lastModDay };
};
const spinner = ["⠙", "⠘", "⠰", "⠴", "⠤", "⠦", "⠆", "⠃", "⠋", "⠉"];
let spinIndex = 0;
const displaySpinner = () => {
process.stdout.write(`\rLoading ${spinner[spinIndex]} `);
spinIndex = (spinIndex + 1) % spinner.length;
};
const spinnerInterval = setInterval(displaySpinner, 100);
const dirs = (await getDirectories(searchPath))
.filter((dir) => dir.includes(targetDir))
.map(async (dir) => await getDirInfo(dir));
const resolvedDirs = await Promise.all(dirs);
const totalSizeKB = resolvedDirs.reduce((acc, dir) => acc + dir.size, 0);
const sortedDirs = resolvedDirs.sort((a, b) => {
if (sortBy === 'size') return b.size - a.size;
if (sortBy === 'path') return a.path.localeCompare(b.path);
if (sortBy === 'last-mod') return new Date(b.lastMod) - new Date(a.lastMod);
return 0;
});
const resultJson = {
releasable_space: formatSize(totalSizeKB),
search_completed: formatTime(process.hrtime.bigint() - startTime),
num_paths: sortedDirs.length,
paths: sortedDirs.map(dir => ({
path: dir.path,
last_mod: dir.lastMod,
last_mod_day: dir.lastModDay,
size: formatSize(dir.size)
}))
};
clearInterval(spinnerInterval);
console.log();
console.clear();
if (saveFile) {
try {
await fs.writeFile(saveFile, JSON.stringify(resultJson, null, 2), 'utf-8');
console.log(`Results saved to ${saveFile}`);
console.log({
releasable_space: resultJson.releasable_space,
search_completed: resultJson.search_completed,
num_paths: resultJson.num_paths
});
} catch (error) {
console.error(`Failed to save results to ${saveFile}`, error);
}
} else {
console.log(resultJson);
}
})();
Use:
node main.js --sort size --path . --save jsonFile.json
Upvotes: 0
Views: 126
Reputation: 17216
Here's an attempt at refactoring your code with Python 2/3. The dependencies are part of the Standard Library so they're available with any Python installation:
import os, sys, fnmatch, time, json, argparse
The downside of not using any external libraries (on top of being compatible with Python 2 & 3) is that you have to reinvent the wheel. For example "humanizing" a size in bytes or recursively "finding" the files in a directory:
def humanize_date(timestamp):
return time.strftime("%d/%m/%Y %T", time.localtime(timestamp))
def humanize_size(size):
size = float(size);
for unit in ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"):
if size < 1024.0:
return ("%d %s" if size.is_integer() else "%.2f %s") % (size, unit)
size /= 1024.0
def find(path, name = "*"):
if os.path.lexists(path):
if fnmatch.fnmatch(os.path.basename(path), name):
yield path
if not os.path.islink(path) or path.endswith(os.sep):
for (rootpath, dirnames, filenames) in os.walk(path):
for direntry in (dirnames + filenames):
if fnmatch.fnmatch(direntry, name):
yield os.path.join(rootpath, direntry)
Then comes the most important function for implementing the logic; it takes a directory as argument and returns a dict
inspired from os.stat_result
with its st_size
and st_mtime
keys changed to "the sum of the size of all files in the directory" and "the modification time of the most recently modified file" respectively:
def dstat(path):
result = None
for direntry in find(path):
stats = os.lstat(direntry)
if result == None:
result = {k: getattr(stats, k) for k in dir(stats) if k.startswith("st_")}
continue
result["st_size"] += stats.st_size
if stats.st_mtime > result["st_mtime"]:
result["st_mtime"] = stats.st_mtime
return result
note: dstat
stands for "directory stat" and also "dict stat"
Now the "main program" just needs to parse the command-line, sort the results and output a JSON:
cli = argparse.ArgumentParser(description='Dummy npkill implementation that outputs JSON')
cli.add_argument('-d', '--directory', default='.', help='Set the directory from which to begin searching (defaults to ".")')
cli.add_argument('-s', '--sort', required=False, choices=['size', 'path', 'last-mod'], help='Sort results by: "size", "path" or "last-mod"')
cli.add_argument('-t', '--target', default='node_modules', help='Specify the name of the directories you want to search (defaults to "node_modules")')
args = cli.parse_args()
results = [ (p, dstat(p)) for p in find(args.directory, name=args.target) ]
if args.sort != None:
sort_key = (
(lambda path_dstat: path_dstat[0] ) if args.sort == 'path' else
(lambda path_dstat: path_dstat[1]["st_size"] ) if args.sort == 'size' else
(lambda path_dstat: path_dstat[1]["st_mtime"])
)
results = sorted(results, key = sort_key)
results = [
{
"path": path,
"last_mod": humanize_date(stats["st_mtime"]),
"size": humanize_size(stats["st_size"]),
}
for path, stats in results
]
print(json.JSONEncoder().encode(results))
The problem that you have with the sorting of the dates is that you're trying to compare strings that do not reflect the correct ordering; for eg. why would 21/01/2003
be "lesser" than 20/12/2024
? You need to use use numbers (seconds since EPOCH) for the comparisons and convert them to your date format after the sorting.
A difference I can see between du -sb
and dstat_result["st_size"]
is that my dstat
will sum the size of hard-linked files while du
won't.
I didn't implement the elapsed time nor the recoverable size, as it isn't part of the main logic required by the program; though I still added the argument parsing ;-)
Upvotes: 4