C-Menu 0.2.9
A User Interface Toolkit
Loading...
Searching...
No Matches
lf.c
Go to the documentation of this file.
1/** NOTICE: This file is part of the lf project, which is currently under
2 * development. There are two reasons not to use it at this time. First,
3 * it is not yet fully functional and may contain bugs or incomplete,
4 * unoptimized code. Second, the API and features are still being finalized,
5 * so using it now may lead to compatibility issues in the future as changes
6 * are made. Once the project is more mature and stable, this file will be
7 * ready for use. In the meantime, it serves as a work in progress and may
8 * be subject to significant changes as development continues.
9 *
10 * Thank you for your patience and understanding.
11 */
12
13/** @file lf.c
14 @brief list files matching a regular expression
15 @author Bill Waller
16 Copyright (c) 2025
17 MIT License
18 billxwaller@gmail.com
19 @date 2026-02-09
20 */
21#define _GNU_SOURCE
22#include <argp.h>
23#include <cm.h>
24#include <dirent.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <grp.h>
28#include <linux/limits.h>
29#include <pthread.h>
30#include <pwd.h>
31#include <regex.h>
32#include <stdatomic.h>
33#include <stdbool.h>
34#include <stdint.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <sys/stat.h>
39#include <sys/sysinfo.h>
40#include <sys/types.h>
41#include <sys/wait.h>
42#include <time.h>
43#include <unistd.h>
44
45#define print_file_type(mask, lf_type, dt_type, name)
46 {
47 fprintf(stderr, "%c %08b (%3d) %08b (%2d) %s\n",
48 (mask & lf_type) ? '*' : ' ', lf_type, lf_type,
49 dt_type, dt_type, name);
50 }
51
52struct tm tm_info;
54const char *argp_program_bug_address = "billxwaller@gmail.com";
55const char doc[] = "lf list files\vIf specified, DIRECTORY is the top-level "
56 "directory to search. REGULAR_EXPRESSION is a properly "
57 "formatted regular expression for which matching files "
58 "will be listed.";
59static char args_doc[] = "[DIRECTORY] [REGULAR_EXPRESSION]";
61
62// Search Filters
63typedef struct {
64 uintmax_t user_id;
65 off_t file_size_min;
66 long flags;
67 time_t after;
68 time_t before;
69 int max_depth;
70 int reg_flags;
71 char *base_path;
72 char *re;
73 char *ere;
74 char *user_name;
75 regex_t compiled_re;
76 regex_t compiled_ere;
77 unsigned char include_perms;
78 unsigned char include_types;
79 unsigned char suppress_types;
80 bool ignore_case;
81 bool sort;
82 bool sort_reverse;
83 bool include_hidden;
84 bool follow_links;
85 bool debug;
86 bool report_config;
87 bool report_info;
88 bool report_warnings;
89 bool report_errors;
90 bool report_badlinks;
91 bool report_trace;
92 bool report_all;
93 bool only_errors;
94} SearchFilters;
95
96#define DT_LNK_DIR 14
97
98typedef struct {
99 dev_t dev;
100 ino_t ino;
101} History;
102
103typedef struct TaskNode TaskNode;
104struct TaskNode {
105 History *history; /**< Array of dev/ino pairs for cycle detection */
106 TaskNode *next_task; /**< Pointer to the next node in the queue */
107 int depth; /**< Current depth in the directory tree */
108 char *dir_path; /**< Directory path to process */
109}; /**< Queue TaskNode (for work-stealing) */
110
111// typedef struct { /** not used yet */
112// TaskNode *qhead;
113// TaskNode *qtail;
114// } TaskQueue;
115// ---------------------------------------------------------------
116// ╭───────────╮
117// ╭───────────╮ ╭──────────╯ dir_path ╰───────────╮
118// │ TaskQueue ├─────┤ TaskNode history dev/inode │
119// ╰───────────╯ ╰──────────╮ depth ╭───────────╯
120// │ next_task │
121// ╰───────────╯
122// ---------------------------------------------------------------
123TaskNode *qhead = NULL;
124TaskNode *qtail = NULL;
125pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER;
126pthread_cond_t cond_var = PTHREAD_COND_INITIALIZER;
127atomic_int active_tasks = 0;
128int shutdown = 0;
129int termination_status = EXIT_SUCCESS;
131char *lfargs[3];
132char *exec;
136void debug_out(SearchFilters *, int, char **, int);
137bool init_find(SearchFilters *, int, char **);
138void sort_lf_output(SearchFilters *, int, char **);
139void enqueue_dir(TaskNode *);
140TaskNode *dequeue_dir();
141void *finder(void *);
142int scan_file(char *, const SearchFilters *, const unsigned char);
143// ---------------------------------------------------------------
144
145static struct argp_option options[] = {
146 {"after", 'a', "time", 0, "Modified after YYYY-MM-DDTHH:MM:SS", 0},
147 {"before", 'b', "time", 0, "Modified before YYYY-MM-DDTHH:MM:SS", 0},
148 {"max_depth", 'd', "number", 0, "Depth into directory tree", 0},
149 {"ere", 'e', "regex", 0, "Exclude regular expression", 0},
150 {"ignore_case", 'i', 0, 0, "Search ignore case", 0},
151 {"include_perms", 'p', "sgrwx", 0,
152 "s-setuid, g-setgid, r-read, w-write, x-execute", 0},
153 {"re", 'r', "regex", 0, "Regular expression to search for", 0},
154 {"include_types", 't', "bcdplrsu", 0,
155 "b-block, c-character, d-directory, p-pipe, l-link, r-regular, s-"
156 "socket, u-unknown",
157 0},
158 {"file_size_min", 's', "size", 0,
159 "No Suffix-bytes, K-kilobytes, M-Megabytes, or G-Gigabytes", 0},
160 {"user", 'u', "user name", 0, "User Name of file owner ", 0},
161#ifdef EXPERIMENTAL
162 {"exec", 'x', "command", 0, "execute external command", 0},
163#endif
164 {"debug", 'D', "12345678", 0,
165 "1-config, 2-info, 3-warnings, 4-errors, 5-badlinks, 6-trace, 7-all, "
166 "8-only_errors",
167 0},
168 {"include_hidden", 'H', 0, 0, "Include hidden files", 0},
169 {"follow_links", 'L', 0, 0, "Follow symbolic links", 0},
170 {"sort_reverse", 'R', 0, 0, "Sort in Reverse order", 0},
171 {"sort", 'S', 0, 0, "Sort in Ascending order", 0},
172 {"nthreads", 'T', "threads", 0, "Number of nthreads", 0},
173 {0}};
174
175/** @brief Parse a single option. */
176static error_t parse_opt(int key, char *arg, struct argp_state *state) {
177 SearchFilters *f = state->input;
178 int i = 0;
179 bool a_toi_error = false;
180
181 switch (key) {
182 case 'a':
183 parse_local_timestamp(arg, &f->after);
184 if (f->after && f->before && f->before < f->after) {
185 fprintf(stderr, "-b time must be greater than -a time.\n");
186 f->after = 0;
187 }
188 break;
189 case 'b':
190 parse_local_timestamp(arg, &f->before);
191 if (f->after && f->before && f->before < f->after) {
192 fprintf(stderr, "-b time must be greater than -a time.\n");
193 f->before = 0;
194 }
195 break;
196 case 'd':
197 if (arg && arg[0] != '\0')
198 f->max_depth = a_toi(arg, &a_toi_error);
199 break;
200 case 'D':
201 f->debug = true;
202 if (arg && arg[0] != '\0') {
203 debug_p = strdup(arg);
204 i = 0;
205 while (debug_p[i]) {
206 switch (debug_p[i]) {
207 case '1':
208 f->report_config = true;
209 break;
210 case '2':
211 f->report_info = true;
212 break;
213 case '3':
214 f->report_warnings = true;
215 break;
216 case '4':
217 f->report_errors = true;
218 break;
219 case '5':
220 f->report_badlinks = true;
221 break;
222 case '6':
223 f->report_trace = true;
224 break;
225 case '7':
226 f->report_config = true;
227 f->report_info = true;
228 f->report_warnings = true;
229 f->report_errors = true;
230 f->report_badlinks = true;
231 f->report_all = true;
232 break;
233 case '8':
234 f->only_errors = true;
235 break;
236 default:
237 break;
238 }
239 i++;
240 }
241 free(debug_p);
242 }
243 break;
244 case 'e':
245 f->ere = strdup(arg);
246 f->flags |= LF_EXC_REGEX;
247 break;
248 case 'H':
249 f->include_hidden = true; // Include hidden files
250 f->flags &= ~(LF_HIDE); // Turn hide flag off
251 // LF_HIDE = 0 - include hidden files,
252 // LF_HIDE = 1 - suppress hidden files
253 break;
254 case 'i':
255 f->flags |= LF_ICASE;
256 break;
257 case 'L':
258 f->follow_links = true; // follow symbolic links
259 break;
260 case 'p':
261 perms_p = arg;
262 while (perms_p[i]) {
263 switch (perms_p[i++]) {
264 case 'g':
265 f->include_perms |= LF_ISGID;
266 break;
267 case 'r':
268 f->include_perms |= LF_IRUSR;
269 break;
270 case 's':
271 f->include_perms |= LF_ISUID;
272 break;
273 case 'w':
274 f->include_perms |= LF_IWUSR;
275 break;
276 case 'x':
277 f->include_perms |= LF_IXUSR;
278 break;
279 default:
280 break;
281 }
282 }
283 break;
284 case 'R':
285 f->sort_reverse = true;
286 break;
287 case 'r':
288 f->re = strdup(arg);
289 f->flags |= LF_REGEX;
290 break;
291 case 'S':
292 f->sort = true;
293 break;
294 case 's':
295 f->file_size_min = (intmax_t)(a_to_ul(arg));
296 break;
297 case 'T':
298 if (arg && arg[0] != '\0')
299 nthreads = a_toi(arg, &a_toi_error);
300 break;
301 case 't':
302 file_types_p = arg;
303 while (file_types_p[i]) {
304 switch (file_types_p[i++]) {
305 case 'b':
306 f->include_types |= LF_BLK;
307 break;
308 case 'c':
309 f->include_types |= LF_CHR;
310 break;
311 case 'd':
312 f->include_types |= LF_DIR;
313 break;
314 case 'p':
315 f->include_types |= LF_FIFO;
316 break;
317 case 'l':
318 f->include_types |= LF_LNK;
319 break;
320 case 'f': // for regular files, 'f' is more intuitive than 'r'
321 case 'r': // regular files are the most common type, so 'r' is also
322 // accepted
323 f->include_types |= LF_REG;
324 break;
325 case 's':
326 f->include_types |= LF_SOCK;
327 break;
328 case 'u':
329 f->include_types |= LF_UNKNOWN;
330 break;
331 default:
332 break;
333 }
334 }
335 break;
336 case 'u':
337 f->user_name = strdup(arg);
338 struct passwd *pwd = getpwnam(arg);
339 if (pwd) {
340 f->user_id = (uintmax_t)pwd->pw_uid;
341 f->flags |= LF_USER;
342 } else {
343 fprintf(stderr, "User '%s' not found.\n", arg);
344 exit(EXIT_FAILURE);
345 }
346 break;
347#ifdef EXPERIMENTAL
348 case 'x':
349 f->exec = strdup(arg);
350 break;
351#endif
352 case ARGP_KEY_ARG:
353 if (state->arg_num == 0 || state->arg_num == 1) {
354 lfargs[state->arg_num] = arg;
355 lfargc = state->arg_num + 1;
356 } else {
357 argp_usage(state);
358 }
359 break;
360 case ARGP_KEY_END:
361 break;
362 default:
363 return ARGP_ERR_UNKNOWN;
364 }
365 return 0;
366}
367static struct argp argp = {options, parse_opt, args_doc, doc,
369
370int main(int argc, char **argv) {
371
372 SearchFilters *f = (SearchFilters *)calloc(1, sizeof(SearchFilters));
373 f->ignore_case = false;
374 f->sort = false;
375 f->sort_reverse = false;
376 f->include_hidden = false; // By default, hidden files are suppressed. Use
377 // -H to include them.
378 // LF_HIDE = 0 - include hidden files,
379 // LF_HIDE = 1 - suppress hidden files
380 f->flags |= LF_HIDE; // Turn hide flag on
381 f->follow_links = false; // By default, symbolic links are not followed. Use
382 // -L to follow them.
383 char tmp_str[PATH_MAX];
384 argp_parse(&argp, argc, argv, 0, 0, f);
385 if (lfargc > 0) {
386 strnz__cpy(tmp_str, lfargs[0], MAXLEN - 1);
387 expand_tilde(tmp_str, MAXLEN - 1);
388 if (is_directory(tmp_str) || is_symlink_to_dir(tmp_str)) {
389 f->base_path = strdup(tmp_str);
390 } else if (is_valid_regex(lfargs[0])) {
391 f->re = strdup(lfargs[0]);
392 f->flags |= LF_REGEX;
393 } else {
394 fprintf(
395 stderr,
396 "lf: arg1: '%s' is neither a directory nor a valid regex.\n",
397 lfargs[0]);
398 exit(EXIT_FAILURE);
399 }
400 }
401 if (lfargc > 1) {
402 if (!f->base_path || f->base_path[0] == '\0') {
403 strnz__cpy(tmp_str, lfargs[1], MAXLEN - 1);
404 expand_tilde(tmp_str, MAXLEN - 1);
405 if (is_directory(tmp_str) || is_symlink_to_dir(tmp_str))
406 f->base_path = strdup(tmp_str);
407 }
408 if ((!(f->flags & LF_REGEX)) && is_valid_regex(lfargs[1])) {
409 f->re = strdup(lfargs[1]);
410 f->flags |= LF_REGEX;
411 } else {
412 fprintf(stderr,
413 "lf: '%s' is neither a directory nor a valid regular "
414 "expression.\n",
415 lfargs[1]);
416 exit(EXIT_FAILURE);
417 }
418 }
419 if (f->base_path == nullptr || f->base_path[0] == '\0')
420 f->base_path = strdup(".");
421 if (!f->sort) {
422 init_find(f, argc, argv);
423 } else
424 sort_lf_output(f, argc, argv);
425 exit(termination_status);
426}
427// Initialize and transfer control to the finder
428/** If sorting is requested, execute the finder and pipe its output
429 * to the sort command. We can achieve this by creating a child
430 * process that runs the sort command, and redirecting the output of
431 * the file finder to the input of the sort command using a pipe.
432 * The parent process will run the finder and write its output to
433 * the pipe, while the child process will read from the pipe and
434 * execute the sort command. */
435void sort_lf_output(SearchFilters *f, int argc, char **argv) {
436 char *eargv[MAXARGS];
437 int eargc = 0;
438 eargv[eargc++] = strdup("sort");
439 if (f->sort_reverse)
440 eargv[eargc++] = strdup("-r");
441 eargv[eargc] = nullptr;
442 int wstatus;
443 int save_fd = dup(STDOUT_FILENO); // save current STDOUT
444 int fds[2];
445 pipe(fds); // Create the pipes
446 pid_t pid1 = fork();
447 if (pid1 == 0) { // Child process
448 close(fds[1]); // Close child write pipe
449 dup2(fds[0],
450 STDIN_FILENO); // Clone child read pipe to STDIN_FILENO
451 close(fds[0]); // Close child read pipe after cloning
452 execvp(eargv[0], eargv); // Execute the external command
453 }
454 // Parent process
455 close(fds[0]); // Close read end of pipe
456 dup2(fds[1], STDOUT_FILENO); // Clone write pipe to STDOUT_FILENO
457 close(fds[1]); // Close duplicated write pipe
458 setvbuf(stdout, NULL, _IOLBF, 0);
459 // line buffering is essential to ensure that output is flushed to
460 // the sort process in a timely manner, preventing deadlocks and
461 // ensuring that the sort process can start processing input as soon
462 // as it is available. Without line buffering, the output may be
463 // buffered until the buffer is full, which could lead to delays in
464 // processing and potential deadlocks if the sort process is waiting
465 // for input that hasn't been flushed yet.
466 dup2(save_fd, STDOUT_FILENO); // restore STDOUT
467 waitpid(pid1, &wstatus, 0);
468 init_find(f, argc, argv); // Initialize and transfer control to the finder
469}
470/** @brief Initialize the file search based on the provided SearchFilters
471 and start finder threads.
472 @param f A pointer to a SearchFilters struct containing the options and
473 flags for filtering.
474 @param argc The number of command-line arguments.
475 @param argv The array of command-line argument strings.
476 @return true if the search was initialized successfully, false if an
477 error occurred during initialization (e.g., regex compilation failure).
478 @details This function processes the flags and options specified in the
479 SearchFilters struct to set up the search criteria. It compiles any
480 regular expressions provided by the user and initializes the queue with
481 the base directory. It then creates a number of finder threads equal to
482 the number of CPU cores to perform the directory traversal and file
483 scanning concurrently. The function waits for all finder threads to
484 complete before cleaning up resources and returning.
485 */
486bool init_find(SearchFilters *f, int argc, char **argv) {
487 /** suppress file types that aren't included */
488 if (!f->include_types)
489 f->include_types = 0xff;
490 if (f->include_types)
491 f->suppress_types = f->include_types ^ 0xff;
492 // LF_HIDE = 0 - include hidden files,
493 // LF_HIDE = 1 - suppress hidden files
494 f->include_hidden = !(f->flags & LF_HIDE);
495 int reti = 0;
496 f->reg_flags = REG_EXTENDED;
497 if (f->flags & LF_ICASE)
498 f->reg_flags |= REG_ICASE;
499 if (f->flags & LF_REGEX) {
500 reti = regcomp(&f->compiled_re, f->re, f->reg_flags);
501 if (reti) {
502 fprintf(stderr, "lf: '%s' Invalid pattern\n", f->re);
503 regfree(&f->compiled_re);
504 return false;
505 }
506 }
507 if (f->flags & LF_EXC_REGEX) {
508 reti = regcomp(&f->compiled_ere, f->ere, f->reg_flags);
509 if (reti) {
510 fprintf(stderr, "lf: '%s' Invalid exclude pattern\n", f->ere);
511 regfree(&f->compiled_ere);
512 return false;
513 }
514 }
515 int n = get_nprocs();
516 if (nthreads < 0) {
517 if (n + nthreads > 1)
518 n += nthreads;
519 } else if (nthreads == 0)
520 nthreads = max(1, n / 2 - 1);
521 else if (nthreads > n)
522 nthreads = n - 1;
523 debug_out(f, argc, argv, nthreads);
524 //--------------------------------------------------------------------
525 // Create and enqueue the first TaskNode
526 struct stat st;
527 if (stat(f->base_path, &st) == 0) {
528 if (S_ISDIR(st.st_mode)) {
529 TaskNode *child_task = malloc(sizeof(TaskNode));
530 child_task->dir_path = strdup(f->base_path);
531 child_task->depth = 0;
532 child_task->next_task = NULL;
533 child_task->history = malloc(sizeof(History));
534 child_task->history[0].dev = st.st_dev;
535 child_task->history[0].ino = st.st_ino;
536 enqueue_dir(child_task);
537 pthread_t threads[nthreads];
538 for (int i = 0; i < nthreads; i++) {
539 pthread_create(&threads[i], NULL, finder, f);
540 }
541 pthread_mutex_lock(&queue_mutex);
542 while (!shutdown) {
543 pthread_cond_wait(&cond_var, &queue_mutex);
544 }
545 pthread_mutex_unlock(&queue_mutex);
546 for (int i = 0; i < nthreads; i++)
547 pthread_join(threads[i], NULL);
548 } else {
549 fprintf(stderr,
550 "Warning: Base path '%s' is not a directory. No "
551 "files will be found.\n",
552 f->base_path);
553 }
554 }
555 //--------------------------------------------------------------------
556 // End Program
557 if (f->flags & LF_REGEX) {
558 regfree(&f->compiled_re);
559 }
560 if (f->flags & LF_EXC_REGEX) {
561 regfree(&f->compiled_ere);
562 }
563 free(f->base_path);
564 free(f->user_name);
565 free(f->re);
566 free(f->ere);
567 free(f);
568 if (reti)
569 return false;
570 return true;
571}
572/** @brief Output debug information about the search filters and configuration.
573 @param f A pointer to a SearchFilters struct containing the options and
574 flags for filtering.
575 @param argc The number of command-line arguments.
576 @param argv The array of command-line argument strings.
577 @param nthreads The number of threads being used for the search.
578 @details This function prints detailed information about the configuration and search filters as well as each of the options and arguments used on the command line invoking lf. The output is sent to the standard error stream, which can be redirected to standard output making it suitable as documentation for an audit trail.
579 */
580
581void debug_out(SearchFilters *f, int argc, char **argv, int nthreads) {
582 char user_str[100];
583 char ip_str[MAXLEN];
584 int len;
585 int i;
586 bool addspace_before = false;
587 if (f->debug && (f->report_config || f->report_info || f->report_all)) {
588 fprintf(stderr, "%s,%s,%s,", get_local_timestamp(), get_user_str(user_str, 100), get_ip_addresses(ip_str, MAXLEN));
589 for (i = 0; i < argc; i++) {
590 len = len + strlen(argv[i]);
591 if (len > 72) {
592 fprintf(stderr, "\n");
593 len = strlen(argv[i]);
594 addspace_before = false;
595 }
596 if (addspace_before) {
597 fprintf(stderr, " ");
598 len++;
599 }
600 fprintf(stderr, "%s", argv[i]);
601 addspace_before = true;
602 }
603 fprintf(stderr, "\n\n");
604 fprintf(stderr, "%s\n\n", CM_VERSION);
605 fprintf(stderr, "lf debug %s\n",
606 f->debug ? "true" : " false");
607 fprintf(stderr, " 1-config %s\n",
608 f->report_config ? "true" : "| false");
609 fprintf(stderr, " 2-info %s\n",
610 f->report_info ? "true" : "| false");
611 fprintf(stderr, " 3-warnings %s\n",
612 f->report_warnings ? "true" : "| false");
613 fprintf(stderr, " 4-errors %s\n",
614 f->report_errors ? "true" : "| false");
615 fprintf(stderr, " 5-badlinks %s\n",
616 f->report_trace ? "true" : "| false");
617 fprintf(stderr, " 6-trace %s\n",
618 f->report_trace ? "true" : "| false");
619 fprintf(stderr, " 7-all %s\n",
620 f->report_all ? "true" : "| false");
621 fprintf(stderr, " 8-only_errors %s\n",
622 f->only_errors ? "true" : "| false");
623 fprintf(stderr, "\n");
624 fprintf(stderr, "Search directory: %s\n\n", f->base_path);
625 fprintf(stderr, "Using %d threads\n\n", nthreads);
626 fprintf(stderr, "File types preceeded by an asterisk (\"*\") will be included:\n\n");
627 fprintf(stderr, " LF type DT type\n");
628 print_file_type(f->include_types, LF_FIFO, DT_FIFO, "FIFO named pipe");
629 print_file_type(f->include_types, LF_CHR, DT_CHR, "CHR character device");
630 print_file_type(f->include_types, LF_DIR, DT_DIR, "DIR directory");
631 print_file_type(f->include_types, LF_BLK, DT_BLK, "BLK block device");
632 print_file_type(f->include_types, LF_REG, DT_REG, "REG regular file");
633 print_file_type(f->include_types, LF_LNK, DT_LNK, "LINK symbolic link");
634 print_file_type(f->include_types, LF_SOCK, DT_SOCK, "SOCK socket");
635 print_file_type(f->include_types, LF_UNKNOWN, DT_UNKNOWN, "UNKNOWN unknown");
636 fprintf(stderr, "\n");
637 if (f->flags & LF_USER)
638 fprintf(stderr, "User: %s (%ju)\n", f->user_name, f->user_id);
639 if (f->include_perms) {
640 if (f->include_perms & LF_IXUSR)
641 fprintf(stderr, " %08b Execute\n", LF_IXUSR);
642 if (f->include_perms & LF_IWUSR)
643 fprintf(stderr, " %08b Write\n", LF_IWUSR);
644 if (f->include_perms & LF_IRUSR)
645 fprintf(stderr, " %08b Read\n", LF_IRUSR);
646 if (f->include_perms & LF_ISUID)
647 fprintf(stderr, " %08b SETUID\n", LF_ISUID);
648 if (f->include_perms & LF_ISGID)
649 fprintf(stderr, " %08b SETGID\n", LF_ISGID);
650 }
651 fprintf(stderr, "\n");
652 if (f->flags & LF_REGEX)
653 fprintf(stderr, "Include regex: %s\n\n", f->re);
654 if (f->flags & LF_EXC_REGEX)
655 fprintf(stderr, "Exclude regex: %s\n\n", f->ere);
656
657 if (f->after) {
658 char buf[32];
659 format_local_timestamp(f->after, buf, sizeof(buf));
660 fprintf(stderr, "Modified after: %s\n\n", buf);
661 }
662
663 if (f->before) {
664 char buf[32];
665 format_local_timestamp(f->before, buf, sizeof(buf));
666 fprintf(stderr, "Modified before: %s\n\n", buf);
667 }
668
669 if (f->file_size_min) {
670 const char *units[] = {"b", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb"};
671 off_t size = f->file_size_min;
672 int i = 0;
673 while (size >= 1024 && i < 6) {
674 size /= 1024;
675 i++;
676 }
677 char buffer[32];
678 ssnprintf(buffer, 32, "%ld %s", size, units[i]);
679 fprintf(stderr, "Minimum file size: %s\n\n", buffer);
680 }
681 if (f->max_depth)
682 fprintf(stderr, "Max depth: %d\n\n", f->max_depth);
683 if (f->ignore_case)
684 fprintf(stderr, "Ignore case in regex matching.\n\n");
685 if (f->include_hidden)
686 fprintf(stderr, "Include hidden files.\n\n");
687 if (f->follow_links)
688 fprintf(stderr, "Follow symbolic links.\n\n");
689 if (f->sort)
690 fprintf(stderr, "Sort output in ascending order.\n\n");
691 if (f->sort_reverse)
692 fprintf(stderr, "Sort output in reverse order.\n\n");
693 if (f->report_config && !f->report_all)
694 exit(EXIT_SUCCESS);
695 }
696 return;
697}
698/** @brief Enqueue a directory dir_path for processing by finder threads.
699 @param new_task A pointer to a TaskNode containing the directory path
700 and depth to be enqueued for processing by finder threads.
701 @details This function adds a new TaskNode to the global queue of
702 directories to be processed by finder threads.
703 // It uses a mutex to ensure thread-safe access to the queue and signals
704 one waiting thread that a new task is available.
705 // If shutdown has been initiated, the signal will wake up any waiting
706 threads so they can check the shutdown condition and exit gracefully.
707 */
708void enqueue_dir(TaskNode *new_task) {
709 pthread_mutex_lock(&queue_mutex);
710 if (qtail)
711 // Add the new task to the end of the queue. If qtail is not NULL,
712 // it means there are already tasks in the queue, so we set the
713 // next_task pointer of the current tail to point to the new task.
714 // This effectively adds the new task to the end of the queue.
715 qtail->next_task = new_task;
716 else
717 // If qtail is NULL, it means the queue is currently empty, so we
718 // set qhead to point to the new task, making it the first and only
719 // task in the queue
720 qhead = new_task;
721 // Finally, we update qtail to point to the new task, ensuring that it
722 // always points to the last task in the queue.
723 qtail = new_task;
724 // Signal one waiting thread that a new task is available. If shutdown
725 // hasn't been initiated, this will wake up a finder thread to process
726 // the new task. If shutdown has been initiated, the signal will wake up
727 // any waiting threads so they can check the shutdown condition and exit
728 // gracefully.
729 pthread_cond_signal(&cond_var);
730 pthread_mutex_unlock(&queue_mutex);
731}
732/** @brief Dequeue a directory dir_path for processing by finder threads.
733 @return A pointer to a TaskNode containing the directory dir_path and
734 depth, or NULL if the queue is empty and shutdown has been initiated.
735 @details This function removes and returns the next TaskNode from the
736 global queue. It uses a mutex to ensure thread-safe access to the queue,
737 and waits on a condition variable if the queue is empty. The function
738 also checks for shutdown conditions to allow finder threads to exit
739 gracefully when there is no more work to process.
740 */
741TaskNode *dequeue_dir() {
742 pthread_mutex_lock(&queue_mutex);
743
744 // Wait until there is a task in the queue or shutdown has been
745 // initiated. The loop condition checks if the queue is empty (qhead ==
746 // NULL) and if shutdown has not been initiated (!shutdown). If both
747 // conditions are true, it means there are no tasks to process and the
748 // thread should wait. The thread will be woken up when a new task is
749 // enqueued (via pthread_cond_signal in enqueue_dir) or when shutdown is
750 // initiated (via pthread_cond_broadcast in enqueue_dir or when
751 // active_tasks count reaches zero). This ensures that threads do not
752 // wait indefinitely when there are no tasks left to process and allows
753 // for a graceful shutdown of the program.
754 while (qhead == NULL && !shutdown) {
755 // If there are no active tasks and the queue is empty, we can
756 // safely initiate shutdown. This check is necessary to prevent a
757 // potential race condition where a thread could be waiting
758 // indefinitely on the condition variable if all tasks have been
759 // completed and no new tasks will be enqueued. By checking the
760 // active_tasks count, we can determine when it's safe to signal
761 // shutdown
762 if (atomic_load(&active_tasks) == 0) {
763 shutdown = 1;
764 // and wake up any waiting threads so they can exit gracefully.
765 pthread_cond_broadcast(&cond_var);
766 break;
767 }
768 // Wait for a task to be enqueued or shutdown initiated. The thread
769 // will be woken up when a new task is added to the queue (via
770 // pthread_cond_signal in enqueue_dir) or when shutdown is initiated
771 // (via pthread_cond_broadcast in enqueue_dir or when active_tasks
772 // count reaches zero). This allows the thread to check the
773 // conditions again and either process a new task or exit if
774 // shutdown has been initiated.
775 pthread_cond_wait(&cond_var, &queue_mutex);
776 }
777 if (shutdown && qhead == NULL) {
778 pthread_mutex_unlock(&queue_mutex);
779 return NULL;
780 }
781 TaskNode *temp = qhead;
782 // Move the head pointer to the next task in the queue. If the queue
783 // becomes empty after this operation (qhead becomes NULL), we also set
784 // qtail to NULL to indicate that the queue is empty. This ensures that
785 // both qhead and qtail accurately reflect the state of the queue,
786 // preventing potential issues with enqueuing new tasks or checking for
787 // an empty queue in future operations.
788 qhead = qhead->next_task;
789 if (!qhead)
790 qtail = NULL;
791 atomic_fetch_add(&active_tasks, 1);
792 pthread_mutex_unlock(&queue_mutex);
793 return temp;
794}
795/** @brief Worker thread function to process directories from the queue.
796 @param arg Pointer to the SearchFilters struct containing the options
797 and flags for filtering.
798 @return NULL
799 @details This function continuously dequeues directory dir_path from the
800 global queue and processes them. For each directory, it lists its
801 contents and applies the specified filters to each file. If a
802 subdirectory is found and it meets the criteria for further searching
803 (e.g., not hidden if hidden files are suppressed, and within max depth),
804 it is enqueued for processing. The function uses atomic operations to
805 track active tasks and condition variables to manage thread
806 synchronization and shutdown when all work is complete.
807 */
808void *finder(void *arg) {
809 SearchFilters *f = (SearchFilters *)arg;
810 char lnk_path[PATH_MAX] = {'\0'};
811 // regmatch_t pmatch;
812
813 while (1) {
814 TaskNode *current_task = dequeue_dir();
815 if (!current_task)
816 break;
817
818 //-------------------------------------------------------------
819 // Open the directory for reading. We use openat with AT_FDCWD to
820 // open the directory specified by current_task->dir_path, and then
821 // use fdopendir to get a DIR* stream for reading the directory
822 // entries. This approach allows us to handle directories with
823 // special characters in their names more robustly, as it avoids
824 // issues that can arise with functions like opendir that take a
825 // path string directly. If openat or fdopendir fails, we log the
826 // error (if debugging is enabled), clean up resources for the
827 // current task, and continue to the next iteration of the loop to
828 // process another task.
829 int dir_fd =
830 openat(AT_FDCWD, current_task->dir_path, O_RDONLY | O_DIRECTORY);
831 if (dir_fd == -1) {
832 if (f->debug && (f->report_warnings || f->report_errors ||
833 f->report_badlinks || f->report_all))
834 fprintf(stderr, "OPEN_FAIL,%s,%s\n", current_task->dir_path,
835 strerror(errno));
836 termination_status = EXIT_FAILURE;
837 free(current_task->dir_path);
838 free(current_task->history);
839 free(current_task);
840 atomic_fetch_sub(&active_tasks, 1);
841 pthread_cond_broadcast(&cond_var);
842 continue;
843 }
844 DIR *dir = fdopendir(dir_fd);
845 if (dir == NULL) {
846 if (f->debug && (f->report_warnings || f->report_errors ||
847 f->report_badlinks || f->report_all))
848 fprintf(stderr, "\nFDOPENDIR_FAIL,%s,%s\n",
849 current_task->dir_path, strerror(errno));
850 close(dir_fd);
851 termination_status = EXIT_FAILURE;
852 free(current_task->dir_path);
853 free(current_task->history);
854 free(current_task);
855 atomic_fetch_sub(&active_tasks, 1);
856 pthread_cond_broadcast(&cond_var);
857 continue;
858 }
859 // unsigned char real_type;
860 unsigned char effective_type;
861 //-------------------------------------------------------------
862 struct dirent *entry;
863 while ((entry = readdir(dir)) != NULL) {
864 struct stat st;
865 // real_type = '\0';
866 effective_type = '\0';
867 if (entry->d_name[0] == '.') {
868 if (entry->d_name[1] == '\0')
869 continue;
870 if (entry->d_name[1] == '.' && entry->d_name[2] == '\0')
871 continue;
872 if (!f->include_hidden)
873 continue;
874 }
875 char full_path[PATH_MAX] = {'\0'};
876 stpcpy(stpcpy(stpcpy(full_path, current_task->dir_path), "/"),
877 entry->d_name);
878 // Get link's metadata
879 int rc;
880 // We use fstatat with AT_SYMLINK_NOFOLLOW to get the metadata
881 // of the symbolic link itself, rather than the target it points
882 // to. This allows us to determine if the entry is a symbolic
883 // link and handle it according to the user's options (e.g.,
884 // whether to follow links or not). If fstatat fails, we log the
885 // error (if debugging is enabled) and continue to the next
886 // entry without processing this one further.
887 rc = fstatat(AT_FDCWD, full_path, &st, AT_SYMLINK_NOFOLLOW);
888 if (rc == -1) {
889 if (f->debug && (f->report_errors || f->report_warnings ||
890 f->report_badlinks || f->report_all))
891 fprintf(stderr, "LSTAT_FAIL,%s,%s\n", full_path,
892 strerror(errno));
893 termination_status = EXIT_FAILURE;
894 continue;
895 }
896 effective_type = (st.st_mode & S_IFMT) >> 12;
897 // Determine the real type of the entry. If the entry is a
898 // symbolic link, we set real_type to DT_LNK and then attempt to
899 // get the metadata of the target it points to using fstatat
900 // without AT_SYMLINK_NOFOLLOW. This allows us to determine the
901 // effective type of the entry based on the target's metadata,
902 // which is important for deciding how to process it (e.g.,
903 // whether it's a directory that we should enqueue for further
904 // searching). If fstatat fails when trying to get the target's
905 // metadata, we log the error (if debugging is enabled) but
906 // continue processing the entry based on its symbolic link
907 // metadata.
908 if (S_ISLNK(st.st_mode)) {
909 // Get the target's metadata
910 rc = fstatat(AT_FDCWD, full_path, &st, 0);
911 if (rc == -1) {
912 if (f->debug && (f->report_all || f->report_warnings ||
913 f->report_errors || f->report_badlinks)) {
914 fprintf(stderr, "STAT_FAIL,%s,%s\n", full_path,
915 strerror(errno));
916 }
917 termination_status = EXIT_FAILURE;
918 continue;
919 }
920 if (f->follow_links)
921 effective_type = (st.st_mode & S_IFMT) >> 12;
922 }
923 // Determine the effective type of the entry. We use the st_mode
924 // field from the stat struct to determine the file type by
925 // applying the S_IFMT mask and shifting it to get a value that
926 // corresponds to the DT_* constants. If the entry is a symbolic
927 // link and the user has chosen not to follow links, we treat it
928 // as a directory for the purpose of deciding whether to enqueue
929 // it for further searching. This allows us to handle symbolic
930 // links that point to directories in a way that respects the
931 // user's options while still allowing for traversal of linked
932 // directories if desired.
933 if (effective_type == DT_DIR) {
934 if (f->max_depth != 0 && current_task->depth == f->max_depth)
935 continue;
936 // Check for cycles by comparing the current
937 // directory's dev/inode with the history of dev/inode pairs
938 // from parent directories. If a match is found, it
939 // indicates a cycle and we skip processing this directory.
940 bool cycle_found = false;
941 if (f->debug && (f->report_trace || f->report_all))
942 fprintf(stderr, "Checking for cycles in: %s\n", full_path);
943 for (int i = 0; i < current_task->depth; i++) {
944 if (f->debug && (f->report_trace || f->report_all)) {
945
946 if (current_task->history[i].ino == st.st_ino)
947 fprintf(stderr, "%3d %ju %ju<===========\n", i,
948 current_task->history[i].ino, st.st_ino);
949 else
950 fprintf(stderr, "%3d %ju %ju\n", i,
951 current_task->history[i].ino, st.st_ino);
952 }
953 if (current_task->history[i].dev == st.st_dev &&
954 current_task->history[i].ino == st.st_ino) {
955 cycle_found = true;
956 break;
957 }
958 }
959 if (cycle_found) {
960 if (f->debug && (f->report_warnings || f->report_errors ||
961 f->report_trace || f->report_badlinks ||
962 f->report_all)) {
963 ssize_t len =
964 readlink(full_path, lnk_path, sizeof(lnk_path) - 1);
965 if (len != -1) {
966 lnk_path[len] = '\0';
967 fprintf(stderr, "CYCLIC_LINK,%s,%s\n", full_path,
968 lnk_path);
969 } else
970 fprintf(stderr, "CYCLIC_LINK,%s\n", full_path);
971 }
972 termination_status = EXIT_FAILURE;
973 continue;
974 }
975 //-------------------------------------------------------
976 // Create a new TaskNode for the subdirectory and enqueue it
977 // for processing by finder threads. We duplicate the
978 // current history of dev/inode pairs and add the current
979 // directory's dev/inode to the new history for the child
980 // task. This allows us to maintain a record of the
981 // directories we've visited in the current path, which is
982 // essential for cycle detection. By checking this history
983 // for each new directory we encounter, we can effectively
984 // prevent infinite loops caused by symbolic links or hard
985 // links that create cycles in the directory structure.
986 //
987 // What about using realloc? NO! All the directories in this
988 // subdirectory must share the same history array.
989 TaskNode *child_task = malloc(sizeof(TaskNode));
990 child_task->dir_path = strdup(full_path);
991 child_task->depth = current_task->depth + 1;
992 child_task->next_task = NULL;
993 child_task->history =
994 malloc((child_task->depth) * sizeof(History));
995 if (current_task->depth > 0) {
996 memcpy(child_task->history, current_task->history,
997 sizeof(History) * current_task->depth);
998 }
999 child_task->history[current_task->depth].dev = st.st_dev;
1000 child_task->history[current_task->depth].ino = st.st_ino;
1001 enqueue_dir(child_task);
1002 }
1003 scan_file(full_path, f, effective_type);
1004 }
1005 closedir(dir);
1006 free(current_task->dir_path);
1007 free(current_task->history);
1008 free(current_task);
1009 atomic_fetch_sub(&active_tasks, 1);
1010 }
1011 return NULL;
1012}
1013/** @brief scan a single file against search filters
1014 * @param file_spec specification of file being scanned
1015 * @param f SearchFilters struct
1016 * @param effective_type type of file being scanned
1017 * @return true if file selected, false otherwise
1018 */
1019int scan_file(char *file_spec, const SearchFilters *f,
1020 const unsigned char effective_type) {
1021 regmatch_t pmatch[2];
1022 bool stat_cached = false;
1023
1024 while (1) {
1025 if (f->suppress_types & effective_type)
1026 break;
1027 // Exclude non-matching files
1028 if (f->flags & LF_REGEX) {
1029 int reti =
1030 regexec(&f->compiled_re, file_spec, f->compiled_re.re_nsub + 1,
1031 pmatch, f->reg_flags);
1032 if (reti == REG_NOMATCH) {
1033 if (f->debug && (f->report_info || f->report_all))
1034 fprintf(stderr, "Regex no match: %s\n", file_spec);
1035 break;
1036 } else if (reti) {
1037 char errbuf[MAXLEN];
1038 regerror(reti, &f->compiled_re, errbuf, sizeof(errbuf));
1039 if (f->debug && (f->report_errors || f->report_all))
1040 fprintf(stderr, "regex error: %s\n", errbuf);
1041 termination_status = EXIT_FAILURE;
1042 return 0;
1043 }
1044 }
1045 // Exclude matching files
1046 if (f->flags & LF_EXC_REGEX) {
1047 int reti =
1048 regexec(&f->compiled_ere, file_spec, f->compiled_re.re_nsub + 1,
1049 pmatch, f->reg_flags);
1050 if (reti == 0) // Match
1051 break;
1052 if (reti != REG_NOMATCH) {
1053 if (reti) {
1054 char errbuf[MAXLEN];
1055 regerror(reti, &f->compiled_ere, errbuf, sizeof(errbuf));
1056 if (f->debug && (f->report_errors || f->report_all))
1057 fprintf(stderr, "Exclude regex error: %s\n", errbuf);
1058 termination_status = EXIT_FAILURE;
1059 return 0;
1060 }
1061 }
1062 }
1063 stat_cached = false;
1064 struct stat sb;
1065 // Exclude files not owned by specified user
1066 if ((f->flags & LF_USER) && stat(file_spec, &sb) == 0) {
1067 stat_cached = true;
1068 if (sb.st_uid != f->user_id)
1069 break;
1070 }
1071 if (f->include_perms) {
1072 if (!stat_cached) {
1073 if (stat(file_spec, &sb) == 0)
1074 stat_cached = true;
1075 }
1076 if ((f->include_perms & LF_IRUSR) && !(sb.st_mode & S_IRUSR))
1077 break;
1078 else if ((f->include_perms & LF_IWUSR) && !(sb.st_mode & S_IWUSR))
1079 break;
1080 else if ((f->include_perms & LF_IXUSR) && !(sb.st_mode & S_IXUSR))
1081 break;
1082 else if ((f->include_perms & LF_ISUID) && !(sb.st_mode & S_ISUID))
1083 break;
1084 else if ((f->include_perms & LF_ISGID) && !(sb.st_mode & S_ISGID))
1085 break;
1086 }
1087 if (f->before) {
1088 if (!stat_cached) {
1089 if (stat(file_spec, &sb) == 0)
1090 stat_cached = true;
1091 }
1092 if (stat_cached && sb.st_mtime > f->before)
1093 break;
1094 }
1095 if (f->after) {
1096 if (!stat_cached) {
1097 if (stat(file_spec, &sb) == 0)
1098 stat_cached = true;
1099 }
1100 if (stat_cached && sb.st_mtime < f->after)
1101 break;
1102 }
1103 if (f->file_size_min) {
1104 if (!stat_cached) {
1105 if (stat(file_spec, &sb) == 0)
1106 stat_cached = true;
1107 }
1108 if (stat_cached && sb.st_size < f->file_size_min)
1109 break;
1110 }
1111 if (effective_type == DT_DIR) {
1112 char *file_p = file_spec;
1113 while (*file_p++ != '\0')
1114 ;
1115 *file_p = '/';
1116 }
1117 if (f->only_errors)
1118 break;
1119 if (file_spec[0] == '.' && file_spec[1] == '/')
1120 printf("%s\n", &file_spec[2]);
1121 else
1122 printf("%s\n", file_spec);
1123 break;
1124 }
1125 return true;
1126}
#define MAXARGS
Definition cm.h:30
@ LF_EXC_REGEX
Definition cm.h:161
@ LF_HIDE
Definition cm.h:159
@ LF_ISUID
Definition cm.h:171
@ LF_IRUSR
Definition cm.h:169
@ LF_ICASE
Definition cm.h:160
@ LF_ISGID
Definition cm.h:170
@ LF_REGEX
Definition cm.h:162
@ LF_USER
Definition cm.h:164
@ LF_IXUSR
Definition cm.h:167
@ LF_IWUSR
Definition cm.h:168
@ LF_FIFO
Definition cm.h:176
@ LF_DIR
Definition cm.h:178
@ LF_UNKNOWN
Definition cm.h:183
@ LF_SOCK
Definition cm.h:182
@ LF_BLK
Definition cm.h:179
@ LF_LNK
Definition cm.h:181
@ LF_REG
Definition cm.h:180
@ LF_CHR
Definition cm.h:177
#define nullptr
Definition cm.h:25
#define max(a, b)
max macro evaluates two expressions, returning greatest result.
Definition cm.h:49
#define CM_VERSION
Definition version.h:7
#define MAXLEN
Definition curskeys.c:15
int termination_status
Definition lf.c:129
void sort_lf_output(SearchFilters *, int, char **)
Definition lf.c:435
void * finder(void *)
Worker thread function to process directories from the queue.
Definition lf.c:808
TaskNode * dequeue_dir()
Dequeue a directory dir_path for processing by finder threads.
Definition lf.c:741
int nthreads
Definition lf.c:60
char * exec
Definition lf.c:132
int shutdown
Definition lf.c:128
atomic_int active_tasks
Definition lf.c:127
TaskNode * qtail
Definition lf.c:124
char * debug_p
Definition lf.c:135
void debug_out(SearchFilters *, int, char **, int)
Output debug information about the search filters and configuration.
Definition lf.c:581
TaskNode * qhead
Definition lf.c:123
pthread_cond_t cond_var
Definition lf.c:126
const char doc[]
Definition lf.c:55
#define print_file_type(mask, lf_type, dt_type, name)
Definition lf.c:45
int lfargc
Definition lf.c:130
void enqueue_dir(TaskNode *)
Enqueue a directory dir_path for processing by finder threads.
Definition lf.c:708
struct tm tm_info
Definition lf.c:52
pthread_mutex_t queue_mutex
Definition lf.c:125
char * lfargs[3]
Definition lf.c:131
int scan_file(char *, const SearchFilters *, const unsigned char)
scan a single file against search filters
Definition lf.c:1019
char * perms_p
Definition lf.c:134
bool init_find(SearchFilters *, int, char **)
Initialize the file search based on the provided SearchFilters and start finder threads.
Definition lf.c:486
char * file_types_p
Definition lf.c:133
const char * argp_program_version
Definition init.c:84
const char * argp_program_bug_address
Definition init.c:85
int main(int argc, char **argv)
Definition amort.c:34
size_t strnz__cpy(char *, const char *, size_t)
safer alternative to strncpy
Definition futil.c:435
bool parse_local_timestamp(const char *, time_t *)
Parses an ISO 8601 timestamp string in local time and converts it to time_t.
Definition futil.c:171
char * get_local_timestamp()
Returns the current local time as an ISO 8601 formatted string.
Definition futil.c:205
bool is_directory(const char *)
Checks if the given path is a directory.
Definition futil.c:1233
size_t ssnprintf(char *, size_t, const char *,...)
ssnprintf was designed to be a safer alternative to snprintf.
Definition futil.c:311
bool expand_tilde(char *, int)
Replaces "~/" in string with the user's home directory.
Definition futil.c:904
bool is_valid_regex(const char *)
Checks if the given regular expression pattern is valid.
Definition futil.c:1261
bool is_symlink_to_dir(const char *)
Checks if the given path is a symbolic link to a directory.
Definition futil.c:1246
int a_toi(char *, bool *)
a safer alternative to atoi() for converting ASCII strings to integers.
Definition futil.c:671
char * get_ip_addresses(char *, int)
Retrieves the IP addresses of the local machine and formats them into a string.
Definition futil.c:245
char * format_local_timestamp(time_t, char *, size_t)
Formats a time_t as an ISO 8601 string in local time.
Definition futil.c:194
char * get_user_str(char *, size_t)
Retrieves the current user's name and UID, and formats it into a string.
Definition futil.c:218
unsigned long a_to_ul(const char *)
Converts a string to an unsigned long long integer, with support for suffixes K, M,...
Definition futil.c:695