Skip to content

Commit 16a558f

Browse files
committed
xxhash impl prototype
1 parent 356beb7 commit 16a558f

File tree

17 files changed

+249
-42
lines changed

17 files changed

+249
-42
lines changed

.github/workflows/cmake.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
make -j $(nproc)
2626
working-directory: ${{github.workspace}}/dep/jemalloc
2727
- name: Configure CMake
28-
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=${{matrix.compiler}}
28+
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=${{matrix.compiler}} -DCMAKE_POLICY_VERSION_MINIMUM=3.5
2929
- name: Build
3030
run: cmake --build ${{github.workspace}}/build
3131
- name: CTest

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@
77
[submodule "dep/jemalloc"]
88
path = dep/jemalloc
99
url = https://github.com/jemalloc/jemalloc
10+
[submodule "dep/xxHash"]
11+
path = dep/xxHash
12+
url = https://github.com/Cyan4973/xxHash

CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ set(ENABLE_CJSON_TEST OFF)
99
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
1010
add_subdirectory(dep/cJSON)
1111

12+
# link xxHash statically
13+
set(XXHASH_BUILD_XXHSUM OFF)
14+
option(BUILD_SHARED_LIBS OFF)
15+
add_subdirectory(dep/xxHash/build/cmake xxhash_build EXCLUDE_FROM_ALL)
16+
1217
# use strict C11
1318
set(CMAKE_C_STANDARD 11)
1419
set(CMAKE_C_STANDARD_REQUIRED ON)
@@ -29,16 +34,16 @@ add_executable(fsautoproc ${SOURCES} ${HEADERS})
2934
add_compile_definitions(JEMALLOC_NO_DEMANGLE)
3035

3136
target_link_directories(fsautoproc PRIVATE dep dep/jemalloc/lib)
32-
target_link_libraries(fsautoproc PRIVATE cjson pthread libjemalloc.a)
37+
target_link_libraries(fsautoproc PRIVATE cjson pthread libjemalloc.a xxhash)
3338
target_include_directories(fsautoproc PRIVATE include dep dep/jemalloc/include)
3439

3540
install(TARGETS fsautoproc DESTINATION bin)
3641

3742
# libdeng shared library for unit tests
38-
add_library(deng STATIC src/deng.c src/index.c src/fs.c src/je.c)
43+
add_library(deng STATIC src/deng.c src/index.c src/fs.c src/je.c src/xx.c)
3944

4045
target_link_directories(deng PUBLIC dep/jemalloc/lib)
41-
target_link_libraries(deng PUBLIC libjemalloc.a)
46+
target_link_libraries(deng PUBLIC libjemalloc.a xxhash)
4247
target_include_directories(deng PUBLIC include dep dep/jemalloc/include)
4348

4449
# unit tests
@@ -47,3 +52,8 @@ enable_testing()
4752
add_executable(test_deng test/test_deng.c)
4853
target_link_libraries(test_deng PRIVATE deng)
4954
add_test(NAME deng COMMAND test_deng)
55+
56+
add_executable(test_xx test/test_xx.c src/xx.c)
57+
target_link_libraries(test_xx PRIVATE xxhash)
58+
target_include_directories(test_xx PRIVATE include dep)
59+
add_test(NAME xx COMMAND test_xx)

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ C11 is used for basic atomic boolean operations when scheduling work across thre
3535
Git submodules provide:
3636

3737
- [cJSON](https://github.com/DaveGamble/cJSON)
38+
- [xxHash](https://github.com/Cyan4973/xxHash)
3839
- [jemalloc](https://github.com/jemalloc/jemalloc) (requires autoconf during build)
3940

4041
### Usage
@@ -57,6 +58,8 @@ Options:
5758
-x <file> Exclusive lock file path
5859
```
5960

61+
When first indexing a new file structure, you should use the `-u` flag to avoid applying any newly configured rules to the existing file structure (unless desired, i.e. an initial "clean up" mutation of an unmaintained directory).
62+
6063
### Basic Configuration
6164

6265
`example.fsautoproc.json` provides a basic example configuration file. The configuration file is a JSON array of objects, each object representing a desired "action" at a grouping level of your choosing. Each action object has the following properties:

dep/xxHash

Submodule xxHash added at c961fbe

include/index.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ struct inode_s {
1414
char* fp; ///< File path (string duplicated)
1515
uint64_t fphash; ///< File path hash value
1616
struct fsstat_s st; ///< File stat info structure
17+
uint64_t xx; ///< xxHash64 hash value
1718
struct inode_s* next;///< Next node in the index map
1819
};
1920

@@ -78,10 +79,11 @@ int indexread(struct index_s* idx, FILE* s);
7879
/// @param fp The file path to use for the new node, duplicated internally
7980
/// @param fphash The file path hash value to use for the new node
8081
/// @param st The file stat info to use for the new node
82+
/// @param xx The xxHash64 hash value to use for the new node
8183
/// @return The pointer to the new node in the index map, otherwise NULL is
8284
/// returned and `errno` is set.
8385
struct inode_s* indexput(struct index_s* idx, const char* fp, uint64_t fphash,
84-
const struct fsstat_s* st);
86+
const struct fsstat_s* st, uint64_t xx);
8587

8688
/// @brief Frees all nodes in the index map.
8789
/// @param idx The index to free

include/xx.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/// @file xx.h
2+
/// @brief xxHash64 file hashing functions.
3+
#ifndef FSAUTOPROC_XX_H
4+
#define FSAUTOPROC_XX_H
5+
6+
#include <stddef.h>
7+
#include <stdint.h>
8+
9+
/// @def XXBUFSZE
10+
/// @brief Size of the buffer used for reading files to hash.
11+
#define XXBUFSZE 65536
12+
13+
/// @struct xxreq_s
14+
/// @brief File hash request structure for processing file hash changes.
15+
struct xxreq_s {
16+
uint8_t* b; ///< Buffer for reading file data
17+
const char* fp; ///< Filepath to hash
18+
const struct fsstat_s* pst;///< Previous file stat info, may be NULL
19+
const struct fsstat_s* cst;///< Current file stat info
20+
uint64_t pxx; ///< Previous xxHash64 value, or 0 if unknown
21+
const char* cause; ///< Optional cause string for logging, may be NULL
22+
};
23+
24+
/// @brief Processes a file hash request. If the file's last modified time or
25+
/// size differ from the previous stat info \p pst, the file is re-hashed
26+
/// and the current hash value is returned. Otherwise the previous \p pxx hash
27+
/// value is returned indicating no change.
28+
/// @param req The file hash request structure containing the filepath, previous
29+
/// and current stat info, and previous hash value (if known).
30+
/// @return 0 if an error occurred, otherwise the current hash value.
31+
uint64_t xxupdate(const struct xxreq_s* req);
32+
33+
#endif//FSAUTOPROC_XX_H

src/deng.c

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
#include "fs.h"
1111
#include "index.h"
1212
#include "log.h"
13+
#include "xx.h"
14+
15+
/// @def XXBUFSZE
16+
/// @brief Size of the buffer used for reading files to hash.
17+
#define XXBUFSZE 65536
18+
19+
static uint8_t xxbuf[XXBUFSZE];///< Shared buffer for reading files to hash
1320

1421
/// @struct deng_state_s
1522
/// @brief Search state context provided to the diff engine as user data which
@@ -58,15 +65,25 @@ static int stagepre(const char* fp, const struct fsstat_s* st, void* udata) {
5865
// attempt to match file in previous index
5966
struct inode_s* prev = indexfind(mach->lastmap, fp, fphash);
6067

61-
// lookup from previous iteration or insert new record and lookup
62-
struct inode_s* curr = indexfind(mach->thismap, fp, fphash);
63-
if (curr == NULL)
64-
if ((curr = indexput(mach->thismap, fp, fphash, st)) == NULL) return -1;
68+
// insert node for current index
69+
struct inode_s* curr = indexput(mach->thismap, fp, fphash, st, 0);
70+
if (!curr) return -1;
71+
72+
struct xxreq_s xx = {0};
73+
xx.b = xxbuf;
74+
xx.fp = fp;
75+
xx.cst = st;
6576

6677
if (prev != NULL) {
67-
if (prev->st.lmod != st->lmod || prev->st.fsze != st->fsze)
68-
callevent(mach, DENG_FEVENT_MOD, curr);
78+
// file existed in previous index, check if modified
79+
xx.pst = &prev->st;
80+
xx.pxx = prev->xx;
81+
xx.cause = "stage=pre,node=existing";
82+
curr->xx = xxupdate(&xx);
83+
if (xx.pxx != curr->xx) callevent(mach, DENG_FEVENT_MOD, curr);
6984
} else {
85+
xx.cause = "stage=pre,node=new";
86+
curr->xx = xxupdate(&xx);// hash new record
7087
callevent(mach, DENG_FEVENT_NEW, curr);
7188
}
7289

@@ -86,16 +103,27 @@ static int stagepost(const char* fp, const struct fsstat_s* st, void* udata) {
86103

87104
if (mach->ffn != NULL && mach->ffn(fp)) return 0;// skip filtered files
88105

89-
const uint64_t fphash = indexhash(fp);
106+
struct xxreq_s xx = {0};
107+
xx.b = xxbuf;
108+
xx.fp = fp;
109+
xx.cst = st;
90110

111+
const uint64_t fphash = indexhash(fp);
91112
struct inode_s* curr = indexfind(mach->thismap, fp, fphash);
92113
if (curr != NULL) {
93-
curr->st = *st;// update the file info in the current index
94-
return 0;
95-
}
114+
xx.pst = &curr->st;
115+
xx.pxx = curr->xx;
116+
xx.cause = "stage=post,node=existing";
117+
curr->xx = xxupdate(&xx);// update hash of existing record
118+
} else {
119+
xx.cause = "stage=post,node=new";
120+
const uint64_t cxx = xxupdate(&xx);// hash new record
96121

97-
if ((curr = indexput(mach->thismap, fp, fphash, st)) == NULL) return -1;
98-
callevent(mach, DENG_FEVENT_NEW, curr);
122+
// file was not found in previous stage, insert new record
123+
if ((curr = indexput(mach->thismap, fp, fphash, st, cxx)) == NULL)
124+
return -1;
125+
callevent(mach, DENG_FEVENT_NEW, curr);
126+
}
99127

100128
return 0;
101129
}
@@ -151,6 +179,7 @@ int dengsearch(const char* sd, deng_filter_t filter,
151179
if ((err = execstage(&mach, sd, stagepre))) goto ret;
152180
if ((err = checkremoved(&mach))) goto ret;
153181
if ((err = execstage(&mach, sd, stagepost))) goto ret;
182+
if ((err = checkremoved(&mach))) goto ret;
154183
ret:
155184
return err;
156185
}

src/index.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,9 @@ int indexwrite(struct index_s* idx, FILE* s) {
6464
int err = 0;
6565
for (long i = 0; i < idx->size; i++) {
6666
struct inode_s* node = fl[i];
67-
const int n = snprintf(lbuf, sizeof(lbuf), "%s,%" PRIu64 ",%" PRIu64 "\n",
68-
node->fp, node->st.lmod, node->st.fsze);
67+
const int n = snprintf(lbuf, sizeof(lbuf),
68+
"%s,%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n", node->fp,
69+
node->st.lmod, node->st.fsze, node->xx);
6970
if (fwrite(lbuf, n, 1, s) != 1) {
7071
err = -1;
7172
break;
@@ -78,10 +79,11 @@ int indexwrite(struct index_s* idx, FILE* s) {
7879
int indexread(struct index_s* idx, FILE* s) {
7980
char fp[INDEXMAXFP] = {0}; /* fscanf filepath string buffer */
8081
struct fsstat_s st = {0}; /* fscanf file stat structure */
81-
while (fscanf(s, "%[^,],%" PRIu64 ",%" PRIu64 "\n", fp, &st.lmod, &st.fsze) ==
82-
3) {
82+
uint64_t xx = 0; /* fscanf xxHash64 value */
83+
while (fscanf(s, "%[^,],%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n", fp, &st.lmod,
84+
&st.fsze, &xx) == 4) {
8385
const uint64_t fphash = indexhash(fp);
84-
if (indexput(idx, fp, fphash, &st) == NULL) return -1;
86+
if (indexput(idx, fp, fphash, &st, xx) == NULL) return -1;
8587
}
8688
return 0;
8789
}
@@ -101,14 +103,15 @@ static void indexappend(struct ibucket_s* bucket, struct inode_s* node) {
101103
}
102104

103105
struct inode_s* indexput(struct index_s* idx, const char* fp,
104-
const uint64_t fphash, const struct fsstat_s* st) {
106+
const uint64_t fphash, const struct fsstat_s* st,
107+
const uint64_t xx) {
105108
struct inode_s* node = je_malloc(sizeof(struct inode_s));
106109
if (node == NULL) return NULL;
107110
if ((fp = je_strdup(fp)) == NULL) {// duplicate filepath string
108111
je_free(node);
109112
return NULL;
110113
}
111-
*node = (struct inode_s) {(char*) fp, fphash, *st, NULL};
114+
*node = (struct inode_s) {(char*) fp, fphash, *st, xx, NULL};
112115
struct ibucket_s* bucket = &idx->buckets[indexbucket(node->fphash)];
113116
indexappend(bucket, node);
114117
idx->size++;

src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ static bool filterjunk(const char* fp) {
252252
if (initargs.verbose) log_info("[j] %s", fp);
253253
} else {
254254
const struct fsstat_s st = {0};
255-
indexput(&goodmap, fp, fphash, &st);// mark as known good
255+
indexput(&goodmap, fp, fphash, &st, 0);// mark as known good
256256
}
257257
return junk;
258258
}

0 commit comments

Comments
 (0)