376 lines
14 KiB
C
376 lines
14 KiB
C
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
// strlen
|
|
#include <string.h>
|
|
// malloc
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <string.h>
|
|
#include "sqlite3.h"
|
|
#include "xxhash.h"
|
|
#include "main.setup.db.sqlite.sql.h"
|
|
|
|
|
|
// needed improvements
|
|
// 1. add more metadata file infos (i.e. file ownership and permissions to the database)
|
|
// (including dev, and inodes) in a sensible way
|
|
// 2.
|
|
|
|
|
|
time_t flush_last=0;
|
|
sqlite3 * db;
|
|
sqlite3_stmt * stmt;
|
|
sqlite3_stmt * stmt_insert_source;
|
|
sqlite3_stmt * stmt_insert_file;
|
|
sqlite3_stmt * stmt_select_source_id;
|
|
sqlite3_stmt * stmt_select_file_id;
|
|
sqlite3_stmt * stmt_update_file_hash;
|
|
sqlite_int64 timestamp;
|
|
|
|
// only needed if we have more than one SQL command (i.e what is separated by ";" in
|
|
// in an input to sqlite3_prepare... function in which case the remaining stuff
|
|
// is stored in the "tail"
|
|
//const char * tail = 0;
|
|
|
|
XXH128_hash_t hashFile(int fd)
|
|
{
|
|
// Allocate a state struct. Do not just use malloc() or new.
|
|
XXH3_state_t* state = XXH3_createState();
|
|
// Reset the state to start a new hashing session.
|
|
XXH3_128bits_reset(state);
|
|
char buffer[4096];
|
|
size_t count;
|
|
// Read the file in chunks
|
|
while ((count = read(fd,buffer,sizeof(buffer))) != 0) {
|
|
// Run update() as many times as necessary to process the data
|
|
XXH3_128bits_update(state, buffer, count);
|
|
}
|
|
// Retrieve the finalized hash. This will not change the state.
|
|
XXH128_hash_t result = XXH3_128bits_digest(state);
|
|
// Free the state. Do not use free().
|
|
XXH3_freeState(state);
|
|
return result;
|
|
}
|
|
|
|
|
|
// at present this function encompasses
|
|
// 1. the opening of the sqlite database
|
|
// 2. (in case "not exists") the creation of databases and indexes
|
|
// 3. the starting of a _transaction_ (for speed, and I reckon -to be tested- also race condition (i.e file locking))
|
|
//
|
|
int sqlite_my_start(){
|
|
|
|
|
|
if(SQLITE_OK != sqlite3_open("sources.sqlite3.db", &db)) {
|
|
return -1;
|
|
}
|
|
|
|
int fd_initsql;
|
|
struct stat statbuf;
|
|
char* buffer_sqlite = NULL;
|
|
ssize_t readresult = 0;
|
|
size_t toread;
|
|
char* errmsg = NULL;
|
|
int sqlresult;
|
|
fd_initsql = open("main.setup.db.sqlite.sql",O_RDONLY);
|
|
if(fd_initsql!=-1){
|
|
if( -1 == fstat(fd_initsql,&statbuf)){
|
|
close(fd_initsql);
|
|
return -2;
|
|
}
|
|
buffer_sqlite = malloc(statbuf.st_size);
|
|
toread = statbuf.st_size;
|
|
if( buffer_sqlite == NULL)
|
|
{
|
|
close(fd_initsql);
|
|
return -3;
|
|
}
|
|
while(toread > 0){
|
|
readresult = read(fd_initsql,buffer_sqlite+(statbuf.st_size-toread),toread);
|
|
if(readresult == -1){
|
|
free(buffer_sqlite);
|
|
close(fd_initsql);
|
|
return -4;
|
|
}
|
|
toread -= readresult;
|
|
}
|
|
close(fd_initsql);
|
|
}
|
|
if(SQLITE_OK != sqlite3_exec(db,buffer_sqlite == NULL ? MAIN_SETUP_DB_SQLITE_SQL : buffer_sqlite,
|
|
NULL,NULL,&errmsg)){
|
|
return -5;
|
|
}
|
|
if(buffer_sqlite!=NULL)
|
|
{
|
|
free(buffer_sqlite);
|
|
}
|
|
//
|
|
//
|
|
// // those execs appear to return SQLITE_OK (0) value. probably should hence be checked against that
|
|
if(SQLITE_OK != sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL)){
|
|
return -6;
|
|
}
|
|
if(SQLITE_OK != sqlite3_prepare_v2(db, "INSERT OR IGNORE INTO sources (parent_id, "
|
|
"name, timestamp) VALUES (?,?,?)", -1, &stmt_insert_source, NULL)) {
|
|
return -7;
|
|
}
|
|
if(SQLITE_OK != sqlite3_prepare_v2(db, "INSERT OR IGNORE INTO files (source_id, "
|
|
"name, size, mtime, timestamp) VALUES (?,?,?,?,?)", -1, &stmt_insert_file, NULL)) {
|
|
return -8;
|
|
}
|
|
if(SQLITE_OK != sqlite3_prepare_v2(db, "SELECT id FROM sources WHERE parent_id = ? AND name = ?",
|
|
-1, &stmt_select_source_id, NULL)) {
|
|
return -9;
|
|
}
|
|
if(SQLITE_OK != sqlite3_prepare_v2(db, "SELECT id FROM files WHERE source_id = ? AND name = ? AND timestamp = ? ",
|
|
-1, &stmt_select_file_id, NULL)) {
|
|
return -10;
|
|
}
|
|
if(SQLITE_OK != sqlite3_prepare_v2(db, "UPDATE files SET hash = ? WHERE id = ?",
|
|
-1, &stmt_update_file_hash, NULL)) {
|
|
return -11;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int sqlite_my_end(){
|
|
sqlite3_exec(db, "END TRANSACTION", NULL, NULL, NULL);
|
|
sqlite3_close(db);
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int list(int dirfd, char* curdir, int length, sqlite3_int64 source_id){
|
|
if(time(NULL)-flush_last > 10){
|
|
flush_last=time(NULL);
|
|
sqlite3_exec(db, "END TRANSACTION", NULL, NULL, NULL);
|
|
fprintf(stderr, " NEW TA\n");
|
|
sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL);
|
|
}
|
|
char* next_curdirnext = NULL;
|
|
int next_length = 0;
|
|
int sqlite_result;
|
|
int filefd;
|
|
sqlite3_int64 next_source_id = -1;
|
|
sqlite3_int64 file_id = -1;
|
|
// putchar
|
|
|
|
DIR* pdir = fdopendir(dirfd);
|
|
if(pdir==NULL){
|
|
perror("fehler fopendir");
|
|
return 1;
|
|
}
|
|
struct dirent * pdirent = NULL;
|
|
struct stat statbuf;
|
|
while(1)
|
|
{
|
|
errno = 0;
|
|
pdirent = readdir(pdir);
|
|
if(pdirent==NULL){
|
|
if(errno==0)
|
|
{
|
|
// printf("end reached\n");
|
|
closedir(pdir);
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr," readdir errno = %d\n",errno);
|
|
return 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(pdirent->d_name[0]=='.'){
|
|
if(pdirent->d_name[1]==0){
|
|
//printf("ignore SELF\n");
|
|
continue;
|
|
}
|
|
if(pdirent->d_name[1]=='.'){
|
|
if(pdirent->d_name[2]==0){
|
|
// printf("ignore PARENT\n");
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
// ALEXTODO: determine if openat + fstat is the better option than fstatat here
|
|
// reason: the fstat-at already works (within a folder - using fd file discriptor)
|
|
// reason: not all opened files need to be opened maybe...
|
|
if( 0== fstatat(dirfd,pdirent->d_name,&statbuf,AT_SYMLINK_NOFOLLOW) )
|
|
{
|
|
printf("%s/%s\n",curdir,pdirent->d_name);
|
|
if(S_ISDIR(statbuf.st_mode)){
|
|
//sqlite3_bind_null(stmt_insert, 1);
|
|
sqlite3_bind_int64(stmt_insert_source, 1,(sqlite3_int64) source_id);// pdirent->d_ino);
|
|
sqlite3_bind_text(stmt_insert_source, 2, pdirent->d_name, -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_int64(stmt_insert_source, 3,(sqlite3_int64) timestamp);
|
|
sqlite3_step(stmt_insert_source);
|
|
sqlite3_clear_bindings(stmt_insert_source);
|
|
sqlite3_reset(stmt_insert_source);
|
|
|
|
|
|
// printf("%s\n",pdirent->d_name);
|
|
int subdirfd = openat(dirfd,pdirent->d_name,O_RDONLY);
|
|
if(-1 == subdirfd)
|
|
{
|
|
perror("subdirfd");
|
|
} else {
|
|
|
|
sqlite3_bind_int64(stmt_select_source_id, 1,(sqlite3_int64) source_id);// pdirent->d_ino);
|
|
sqlite3_bind_text(stmt_select_source_id, 2, pdirent->d_name, -1, SQLITE_TRANSIENT);
|
|
sqlite_result = sqlite3_step(stmt_select_source_id);
|
|
|
|
if(SQLITE_ROW == sqlite_result){
|
|
next_source_id = sqlite3_column_int64(stmt_select_source_id, 0);
|
|
} else {
|
|
fprintf(stderr,"error getting source_id %d %s\n",sqlite_result,pdirent->d_name);
|
|
sqlite3_clear_bindings(stmt_select_source_id);
|
|
sqlite3_reset(stmt_select_source_id);
|
|
return -8;
|
|
}
|
|
sqlite3_clear_bindings(stmt_select_source_id);
|
|
sqlite3_reset(stmt_select_source_id);
|
|
next_length=(length+1+strlen(pdirent->d_name));
|
|
next_curdirnext = malloc(next_length);
|
|
if(next_curdirnext==NULL)
|
|
{
|
|
perror("malloc");
|
|
}
|
|
else
|
|
{
|
|
sprintf(next_curdirnext,"%s/%s",curdir,pdirent->d_name);
|
|
list(subdirfd,next_curdirnext,next_length,next_source_id);
|
|
free(next_curdirnext);
|
|
close(subdirfd);
|
|
}
|
|
}
|
|
}
|
|
else if(S_ISREG(statbuf.st_mode)){
|
|
sqlite3_bind_int64(stmt_insert_file, 1,(sqlite3_int64) source_id);
|
|
sqlite3_bind_text(stmt_insert_file, 2, pdirent->d_name, -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_int64(stmt_insert_file, 3,(sqlite3_int64) statbuf.st_size);
|
|
sqlite3_bind_int64(stmt_insert_file, 4,(sqlite3_int64) statbuf.st_mtim.tv_sec);
|
|
sqlite3_bind_int64(stmt_insert_file, 5,(sqlite3_int64) timestamp);
|
|
sqlite3_step(stmt_insert_file);
|
|
sqlite3_clear_bindings(stmt_insert_file);
|
|
sqlite3_reset(stmt_insert_file);
|
|
|
|
sqlite3_bind_int64(stmt_select_file_id, 1,(sqlite3_int64) source_id);
|
|
sqlite3_bind_text(stmt_select_file_id, 2, pdirent->d_name, -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_int64(stmt_select_file_id, 3,(sqlite3_int64) timestamp);
|
|
sqlite_result = sqlite3_step(stmt_select_file_id);
|
|
if(sqlite_result == SQLITE_ROW){
|
|
// insertion merits hashing
|
|
file_id = sqlite3_column_int64(stmt_select_file_id, 0);
|
|
sqlite3_clear_bindings(stmt_select_file_id);
|
|
sqlite3_reset(stmt_select_file_id);
|
|
//fprintf(stderr,"file %s was inserted\n",pdirent->d_name);
|
|
filefd = openat(dirfd,pdirent->d_name,O_RDONLY);
|
|
if( filefd < 0){
|
|
perror("open");
|
|
|
|
//ALEXTODO: handle more gracefully
|
|
continue;
|
|
}
|
|
XXH128_hash_t hash = hashFile(filefd);
|
|
close(filefd);
|
|
// fprintf(stderr,"%016llx%016llx %s sizeof=%d\n",(unsigned long long ) hash.high64,(unsigned long long)hash.low64,pdirent->d_name,sizeof(XXH128_hash_t));
|
|
sqlite3_bind_int64(stmt_update_file_hash, 2,(sqlite3_int64) file_id);
|
|
sqlite3_bind_blob(stmt_update_file_hash,1,(const void*)&hash,sizeof(XXH128_hash_t),SQLITE_TRANSIENT);
|
|
sqlite3_step(stmt_update_file_hash);
|
|
sqlite3_clear_bindings(stmt_update_file_hash);
|
|
sqlite3_reset(stmt_update_file_hash);
|
|
|
|
} else if(sqlite_result == SQLITE_DONE) {
|
|
// no insertion -> assumpotion is no change -> no hash
|
|
//fprintf(stderr,"file %s EXISTED\n",pdirent->d_name);
|
|
} else {
|
|
// general error
|
|
// fprintf(stderr,"file %s result was %d\n",sqlite_result);
|
|
sqlite3_clear_bindings(stmt_select_source_id);
|
|
sqlite3_reset(stmt_select_source_id);
|
|
return -9;
|
|
}
|
|
// sqlite3_clear_bindings(stmt_select_file_id);
|
|
// sqlite3_reset(stmt_select_file_id);
|
|
}
|
|
|
|
//else
|
|
//{
|
|
//// printf("%s/%s\n",curdir,pdirent->d_name);
|
|
//}
|
|
//struct stat {
|
|
// dev_t st_dev; /* ID of device containing file */
|
|
// ino_t st_ino; /* Inode number */
|
|
// mode_t st_mode; /* File type and mode */
|
|
// nlink_t st_nlink; /* Number of hard links */
|
|
// uid_t st_uid; /* User ID of owner */
|
|
// gid_t st_gid; /* Group ID of owner */
|
|
// dev_t st_rdev; /* Device ID (if special file) */
|
|
// off_t st_size; /* Total size, in bytes */
|
|
// blksize_t st_blksize; /* Block size for filesystem I/O */
|
|
// blkcnt_t st_blocks; /* Number of 512 B blocks allocated */
|
|
|
|
// /* Since POSIX.1-2008, this structure supports nanosecond
|
|
// precision for the following timestamp fields.
|
|
// For the details before POSIX.1-2008, see VERSIONS. */
|
|
|
|
// struct timespec st_atim; /* Time of last access */
|
|
// struct timespec st_mtim; /* Time of last modification */
|
|
// struct timespec st_ctim; /* Time of last status change */
|
|
|
|
//#define st_atime st_atim.tv_sec /* Backward compatibility */
|
|
//#define st_mtime st_mtim.tv_sec
|
|
//#define st_ctime st_ctim.tv_sec
|
|
//};
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr,"fstatat errno = %d\n",errno);
|
|
return 2;
|
|
}
|
|
//printf("%d %s\n",pdirent->d_type ,pdirent->d_name);
|
|
}
|
|
}
|
|
// printf("dirfd = %d %p\n",dirfd,pdir);
|
|
return 0;
|
|
}
|
|
|
|
|
|
int main(int argc, char* argv[]){
|
|
// DIR current;
|
|
int dirfd;
|
|
char* curdir= ".";
|
|
int result = 0;
|
|
timestamp = (sqlite_uint64) time(NULL);
|
|
result = sqlite_my_start();
|
|
if(0!=result)
|
|
{
|
|
fprintf(stderr,"error sqlite_my_start() %d\n",result);
|
|
return -1;
|
|
}
|
|
puts(".");
|
|
dirfd = openat(AT_FDCWD,".",O_RDONLY);
|
|
if (dirfd > -1){
|
|
list(dirfd,curdir,1+strlen(curdir),0);
|
|
}
|
|
result = sqlite_my_end();
|
|
if(0!=result)
|
|
{
|
|
fprintf(stderr,"error sqlite_my_end() %d\n",result);
|
|
return -2;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|