#include #include #include #include #include #include #include // strlen #include // malloc #include #include #include #include "sqlite3.h" #include "xxhash.h" #include "main.setup.db.sqlite.sql.h" // needed improvements // 1. add more metadata file infos (i.e. file ownership and permissions to the database) // (including dev, and inodes) in a sensible way // 2. time_t flush_last=0; sqlite3 * db; sqlite3_stmt * stmt; sqlite3_stmt * stmt_insert_source; sqlite3_stmt * stmt_insert_file; sqlite3_stmt * stmt_select_source_id; sqlite3_stmt * stmt_select_file_id; sqlite3_stmt * stmt_update_file_hash; sqlite_int64 timestamp; // only needed if we have more than one SQL command (i.e what is separated by ";" in // in an input to sqlite3_prepare... function in which case the remaining stuff // is stored in the "tail" //const char * tail = 0; XXH128_hash_t hashFile(int fd) { // Allocate a state struct. Do not just use malloc() or new. XXH3_state_t* state = XXH3_createState(); // Reset the state to start a new hashing session. XXH3_128bits_reset(state); char buffer[4096]; size_t count; // Read the file in chunks while ((count = read(fd,buffer,sizeof(buffer))) != 0) { // Run update() as many times as necessary to process the data XXH3_128bits_update(state, buffer, count); } // Retrieve the finalized hash. This will not change the state. XXH128_hash_t result = XXH3_128bits_digest(state); // Free the state. Do not use free(). XXH3_freeState(state); return result; } // at present this function encompasses // 1. the opening of the sqlite database // 2. (in case "not exists") the creation of databases and indexes // 3. the starting of a _transaction_ (for speed, and I reckon -to be tested- also race condition (i.e file locking)) // int sqlite_my_start(){ if(SQLITE_OK != sqlite3_open("sources.sqlite3.db", &db)) { return -1; } int fd_initsql; struct stat statbuf; char* buffer_sqlite = NULL; ssize_t readresult = 0; size_t toread; char* errmsg = NULL; int sqlresult; fd_initsql = open("main.setup.db.sqlite.sql",O_RDONLY); if(fd_initsql!=-1){ if( -1 == fstat(fd_initsql,&statbuf)){ close(fd_initsql); return -2; } buffer_sqlite = malloc(statbuf.st_size); toread = statbuf.st_size; if( buffer_sqlite == NULL) { close(fd_initsql); return -3; } while(toread > 0){ readresult = read(fd_initsql,buffer_sqlite+(statbuf.st_size-toread),toread); if(readresult == -1){ free(buffer_sqlite); close(fd_initsql); return -4; } toread -= readresult; } close(fd_initsql); } if(SQLITE_OK != sqlite3_exec(db,buffer_sqlite == NULL ? MAIN_SETUP_DB_SQLITE_SQL : buffer_sqlite, NULL,NULL,&errmsg)){ return -5; } if(buffer_sqlite!=NULL) { free(buffer_sqlite); } // // // // those execs appear to return SQLITE_OK (0) value. probably should hence be checked against that if(SQLITE_OK != sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL)){ return -6; } if(SQLITE_OK != sqlite3_prepare_v2(db, "INSERT OR IGNORE INTO sources (parent_id, " "name, timestamp) VALUES (?,?,?)", -1, &stmt_insert_source, NULL)) { return -7; } if(SQLITE_OK != sqlite3_prepare_v2(db, "INSERT OR IGNORE INTO files (source_id, " "name, size, mtime, timestamp) VALUES (?,?,?,?,?)", -1, &stmt_insert_file, NULL)) { return -8; } if(SQLITE_OK != sqlite3_prepare_v2(db, "SELECT id FROM sources WHERE parent_id = ? AND name = ?", -1, &stmt_select_source_id, NULL)) { return -9; } if(SQLITE_OK != sqlite3_prepare_v2(db, "SELECT id FROM files WHERE source_id = ? AND name = ? AND timestamp = ? ", -1, &stmt_select_file_id, NULL)) { return -10; } if(SQLITE_OK != sqlite3_prepare_v2(db, "UPDATE files SET hash = ? WHERE id = ?", -1, &stmt_update_file_hash, NULL)) { return -11; } return 0; } int sqlite_my_end(){ sqlite3_exec(db, "END TRANSACTION", NULL, NULL, NULL); sqlite3_close(db); return 0; } int list(int dirfd, char* curdir, int length, sqlite3_int64 source_id){ if(time(NULL)-flush_last > 10){ flush_last=time(NULL); sqlite3_exec(db, "END TRANSACTION", NULL, NULL, NULL); fprintf(stderr, " NEW TA\n"); sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL); } char* next_curdirnext = NULL; int next_length = 0; int sqlite_result; int filefd; sqlite3_int64 next_source_id = -1; sqlite3_int64 file_id = -1; // putchar DIR* pdir = fdopendir(dirfd); if(pdir==NULL){ perror("fehler fopendir"); return 1; } struct dirent * pdirent = NULL; struct stat statbuf; while(1) { errno = 0; pdirent = readdir(pdir); if(pdirent==NULL){ if(errno==0) { // printf("end reached\n"); closedir(pdir); break; } else { fprintf(stderr," readdir errno = %d\n",errno); return 1; } } else { if(pdirent->d_name[0]=='.'){ if(pdirent->d_name[1]==0){ //printf("ignore SELF\n"); continue; } if(pdirent->d_name[1]=='.'){ if(pdirent->d_name[2]==0){ // printf("ignore PARENT\n"); continue; } } } // ALEXTODO: determine if openat + fstat is the better option than fstatat here // reason: the fstat-at already works (within a folder - using fd file discriptor) // reason: not all opened files need to be opened maybe... if( 0== fstatat(dirfd,pdirent->d_name,&statbuf,AT_SYMLINK_NOFOLLOW) ) { printf("%s/%s\n",curdir,pdirent->d_name); if(S_ISDIR(statbuf.st_mode)){ //sqlite3_bind_null(stmt_insert, 1); sqlite3_bind_int64(stmt_insert_source, 1,(sqlite3_int64) source_id);// pdirent->d_ino); sqlite3_bind_text(stmt_insert_source, 2, pdirent->d_name, -1, SQLITE_TRANSIENT); sqlite3_bind_int64(stmt_insert_source, 3,(sqlite3_int64) timestamp); sqlite3_step(stmt_insert_source); sqlite3_clear_bindings(stmt_insert_source); sqlite3_reset(stmt_insert_source); // printf("%s\n",pdirent->d_name); int subdirfd = openat(dirfd,pdirent->d_name,O_RDONLY); if(-1 == subdirfd) { perror("subdirfd"); } else { sqlite3_bind_int64(stmt_select_source_id, 1,(sqlite3_int64) source_id);// pdirent->d_ino); sqlite3_bind_text(stmt_select_source_id, 2, pdirent->d_name, -1, SQLITE_TRANSIENT); sqlite_result = sqlite3_step(stmt_select_source_id); if(SQLITE_ROW == sqlite_result){ next_source_id = sqlite3_column_int64(stmt_select_source_id, 0); } else { fprintf(stderr,"error getting source_id %d %s\n",sqlite_result,pdirent->d_name); sqlite3_clear_bindings(stmt_select_source_id); sqlite3_reset(stmt_select_source_id); return -8; } sqlite3_clear_bindings(stmt_select_source_id); sqlite3_reset(stmt_select_source_id); next_length=(length+1+strlen(pdirent->d_name)); next_curdirnext = malloc(next_length); if(next_curdirnext==NULL) { perror("malloc"); } else { sprintf(next_curdirnext,"%s/%s",curdir,pdirent->d_name); list(subdirfd,next_curdirnext,next_length,next_source_id); free(next_curdirnext); close(subdirfd); } } } else if(S_ISREG(statbuf.st_mode)){ sqlite3_bind_int64(stmt_insert_file, 1,(sqlite3_int64) source_id); sqlite3_bind_text(stmt_insert_file, 2, pdirent->d_name, -1, SQLITE_TRANSIENT); sqlite3_bind_int64(stmt_insert_file, 3,(sqlite3_int64) statbuf.st_size); sqlite3_bind_int64(stmt_insert_file, 4,(sqlite3_int64) statbuf.st_mtim.tv_sec); sqlite3_bind_int64(stmt_insert_file, 5,(sqlite3_int64) timestamp); sqlite3_step(stmt_insert_file); sqlite3_clear_bindings(stmt_insert_file); sqlite3_reset(stmt_insert_file); sqlite3_bind_int64(stmt_select_file_id, 1,(sqlite3_int64) source_id); sqlite3_bind_text(stmt_select_file_id, 2, pdirent->d_name, -1, SQLITE_TRANSIENT); sqlite3_bind_int64(stmt_select_file_id, 3,(sqlite3_int64) timestamp); sqlite_result = sqlite3_step(stmt_select_file_id); if(sqlite_result == SQLITE_ROW){ // insertion merits hashing file_id = sqlite3_column_int64(stmt_select_file_id, 0); sqlite3_clear_bindings(stmt_select_file_id); sqlite3_reset(stmt_select_file_id); //fprintf(stderr,"file %s was inserted\n",pdirent->d_name); filefd = openat(dirfd,pdirent->d_name,O_RDONLY); if( filefd < 0){ perror("open"); //ALEXTODO: handle more gracefully continue; } XXH128_hash_t hash = hashFile(filefd); close(filefd); // fprintf(stderr,"%016llx%016llx %s sizeof=%d\n",(unsigned long long ) hash.high64,(unsigned long long)hash.low64,pdirent->d_name,sizeof(XXH128_hash_t)); sqlite3_bind_int64(stmt_update_file_hash, 2,(sqlite3_int64) file_id); sqlite3_bind_blob(stmt_update_file_hash,1,(const void*)&hash,sizeof(XXH128_hash_t),SQLITE_TRANSIENT); sqlite3_step(stmt_update_file_hash); sqlite3_clear_bindings(stmt_update_file_hash); sqlite3_reset(stmt_update_file_hash); } else if(sqlite_result == SQLITE_DONE) { // no insertion -> assumpotion is no change -> no hash //fprintf(stderr,"file %s EXISTED\n",pdirent->d_name); } else { // general error // fprintf(stderr,"file %s result was %d\n",sqlite_result); sqlite3_clear_bindings(stmt_select_source_id); sqlite3_reset(stmt_select_source_id); return -9; } // sqlite3_clear_bindings(stmt_select_file_id); // sqlite3_reset(stmt_select_file_id); } //else //{ //// printf("%s/%s\n",curdir,pdirent->d_name); //} //struct stat { // dev_t st_dev; /* ID of device containing file */ // ino_t st_ino; /* Inode number */ // mode_t st_mode; /* File type and mode */ // nlink_t st_nlink; /* Number of hard links */ // uid_t st_uid; /* User ID of owner */ // gid_t st_gid; /* Group ID of owner */ // dev_t st_rdev; /* Device ID (if special file) */ // off_t st_size; /* Total size, in bytes */ // blksize_t st_blksize; /* Block size for filesystem I/O */ // blkcnt_t st_blocks; /* Number of 512 B blocks allocated */ // /* Since POSIX.1-2008, this structure supports nanosecond // precision for the following timestamp fields. // For the details before POSIX.1-2008, see VERSIONS. */ // struct timespec st_atim; /* Time of last access */ // struct timespec st_mtim; /* Time of last modification */ // struct timespec st_ctim; /* Time of last status change */ //#define st_atime st_atim.tv_sec /* Backward compatibility */ //#define st_mtime st_mtim.tv_sec //#define st_ctime st_ctim.tv_sec //}; } else { fprintf(stderr,"fstatat errno = %d\n",errno); return 2; } //printf("%d %s\n",pdirent->d_type ,pdirent->d_name); } } // printf("dirfd = %d %p\n",dirfd,pdir); return 0; } int main(int argc, char* argv[]){ // DIR current; int dirfd; char* curdir= "."; int result = 0; timestamp = (sqlite_uint64) time(NULL); result = sqlite_my_start(); if(0!=result) { fprintf(stderr,"error sqlite_my_start() %d\n",result); return -1; } puts("."); dirfd = openat(AT_FDCWD,".",O_RDONLY); if (dirfd > -1){ list(dirfd,curdir,1+strlen(curdir),0); } result = sqlite_my_end(); if(0!=result) { fprintf(stderr,"error sqlite_my_end() %d\n",result); return -2; } return 0; }