* Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
* Distributed under the terms of the MIT License.
*/
#include <algorithm>
#include <string>
#include <vector>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <OS.h>
#include <Path.h>
#include <SHA256.h>
#include "AdaptiveBuffering.h"
#define TRACE(x...) ;
extern const char *__progname;
static const char *kProgramName = __progname;
const size_t kInitialBufferSize = 1 * 1024 * 1024;
const size_t kMaxBufferSize = 10 * 1024 * 1024;
class SHAProcessor : public AdaptiveBuffering {
public:
SHAProcessor()
: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
fFile(-1)
{
}
virtual ~SHAProcessor()
{
Unset();
}
void Unset()
{
if (fFile >= 0)
close(fFile);
}
status_t Process(int file)
{
Unset();
fSHA.Init();
fFile = file;
return Run();
}
virtual status_t Read(uint8* buffer, size_t* _length)
{
ssize_t bytes = read(fFile, buffer, *_length);
if (bytes < B_OK)
return errno;
*_length = bytes;
return B_OK;
}
virtual status_t Write(uint8* buffer, size_t length)
{
fSHA.Update(buffer, length);
return B_OK;
}
const uint8* Digest() { return fSHA.Digest(); }
size_t DigestLength() const { return fSHA.DigestLength(); }
private:
SHA256 fSHA;
int fFile;
};
struct file_entry {
uint8 hash[SHA_DIGEST_LENGTH];
ino_t node;
std::string path;
bool operator<(const struct file_entry& other) const
{
return path < other.path;
}
std::string HashString() const
{
char buffer[128];
for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
sprintf(buffer + i * 2, "%02x", hash[i]);
}
return buffer;
}
};
typedef std::vector<file_entry> FileList;
void process_file(const char* path);
SHAProcessor gSHA;
FileList gFiles;
void
process_directory(const char* path)
{
DIR* dir = opendir(path);
if (dir == NULL)
return;
size_t pathLength = strlen(path);
while (struct dirent* entry = readdir(dir)) {
if (!strcmp(entry->d_name, ".")
|| !strcmp(entry->d_name, ".."))
continue;
char fullPath[1024];
strlcpy(fullPath, path, sizeof(fullPath));
if (path[pathLength - 1] != '/')
strlcat(fullPath, "/", sizeof(fullPath));
strlcat(fullPath, entry->d_name, sizeof(fullPath));
process_file(fullPath);
}
closedir(dir);
}
void
process_file(const char* path)
{
struct stat stat;
if (::lstat(path, &stat) != 0) {
fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
strerror(errno));
return;
}
if (S_ISDIR(stat.st_mode)) {
process_directory(path);
return;
}
if (S_ISLNK(stat.st_mode))
return;
int file = open(path, O_RDONLY);
if (file < 0) {
fprintf(stderr, "Could not open file \"%s\": %s\n", path,
strerror(errno));
return;
}
status_t status = gSHA.Process(file);
if (status != B_OK) {
fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
strerror(status));
return;
}
file_entry entry;
memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
entry.node = stat.st_ino;
entry.path = path;
gFiles.push_back(entry);
static bigtime_t sLastUpdate = -1;
if (system_time() - sLastUpdate > 500000) {
printf("%ld files scanned\33[1A\n", gFiles.size());
sLastUpdate = system_time();
}
}
void
write_hash_file(const char* name, int fileCount, char** files)
{
int file = open(name, O_WRONLY | O_TRUNC | O_CREAT);
if (file < 0) {
fprintf(stderr, "%s: Could not write hash file \"%s\": %s\n",
kProgramName, name, strerror(errno));
return;
}
write(file, "HASH", 4);
write(file, &fileCount, sizeof(int));
for (int i = 0; i < fileCount; i++) {
int length = strlen(files[i]);
write(file, &length, sizeof(int));
write(file, files[i], length + 1);
}
fileCount = gFiles.size();
write(file, &fileCount, sizeof(int));
for (int i = 0; i < fileCount; i++) {
file_entry& entry = gFiles[i];
write(file, entry.hash, SHA_DIGEST_LENGTH);
write(file, &entry.node, sizeof(ino_t));
int length = entry.path.size();
write(file, &length, sizeof(int));
write(file, entry.path.c_str(), length + 1);
}
close(file);
}
int
main(int argc, char** argv)
{
if (argc < 2) {
fprintf(stderr, "usage: %s <hash-file> [<files> ...]\n"
"\tWhen invoked without files, the hash-file is updated only.\n",
kProgramName);
return 1;
}
const char* hashFileName = argv[1];
status_t status = gSHA.Init();
if (status != B_OK) {
fprintf(stderr, "%s: Could not initialize SHA processor: %s\n",
kProgramName, strerror(status));
return 1;
}
int fileCount = argc - 2;
char** files = argv + 2;
if (argc == 2) {
int file = open(hashFileName, O_RDONLY);
if (file < 0) {
fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n",
kProgramName, hashFileName, strerror(status));
return 1;
}
char buffer[2048];
read(file, buffer, 4);
if (memcmp(buffer, "HASH", 4)) {
fprintf(stderr, "%s: \"%s\" is not a hash file\n",
kProgramName, hashFileName);
close(file);
return 1;
}
read(file, &fileCount, sizeof(int));
TRACE("Found %d path(s):\n", fileCount);
files = (char**)malloc(fileCount * sizeof(char*));
if (files == NULL) {
fprintf(stderr, "%s: Could not allocate %ld bytes\n",
kProgramName, fileCount * sizeof(char*));
close(file);
return 1;
}
for (int i = 0; i < fileCount; i++) {
int length;
read(file, &length, sizeof(int));
files[i] = (char*)malloc(length + 1);
if (files[i] == NULL) {
fprintf(stderr, "%s: Could not allocate %d bytes\n",
kProgramName, length + 1);
close(file);
return 1;
}
read(file, files[i], length + 1);
TRACE("\t%s\n", files[i]);
}
close(file);
} else {
char** normalizedFiles = (char**)malloc(fileCount * sizeof(char*));
if (normalizedFiles == NULL) {
fprintf(stderr, "%s: Could not allocate %ld bytes\n",
kProgramName, fileCount * sizeof(char*));
return 1;
}
for (int i = 0; i < fileCount; i++) {
BPath path(files[i], NULL, true);
normalizedFiles[i] = strdup(path.Path());
if (normalizedFiles[i] == NULL) {
fprintf(stderr, "%s: Could not allocate %ld bytes\n",
kProgramName, strlen(path.Path()) + 1);
return 1;
}
}
files = normalizedFiles;
}
bigtime_t start = system_time();
for (int i = 0; i < fileCount; i++) {
process_file(files[i]);
}
sort(gFiles.begin(), gFiles.end());
bigtime_t runtime = system_time() - start;
write_hash_file(hashFileName, fileCount, files);
if (gFiles.size() > 0) {
printf("Generated hashes for %ld files in %g seconds, %g msec per "
"file.\n", gFiles.size(), runtime / 1000000.0,
runtime / 1000.0 / gFiles.size());
}
for (int i = 0; i < fileCount; i++) {
free(files[i]);
}
free(files);
return 0;
}