Index: support/htcacheclean.c =================================================================== --- support/htcacheclean.c (revision 1022280) +++ support/htcacheclean.c (working copy) @@ -32,8 +32,10 @@ #include "apr_thread_proc.h" #include "apr_signal.h" #include "apr_getopt.h" +#include "apr_md5.h" #include "apr_ring.h" #include "apr_date.h" +#include "apr_buckets.h" #include "../modules/cache/mod_disk_cache.h" #if APR_HAVE_UNISTD_H @@ -97,10 +99,13 @@ static int dryrun; /* flag: true means dry run, don't actually delete anything */ static int deldirs; /* flag: true means directories should be deleted */ +static int listurls; /* flag: true means list cached urls */ +static int listextended;/* flag: true means list cached urls */ static int baselen; /* string length of the path to the proxy directory */ static apr_time_t now; /* start time of this processing run */ static apr_file_t *errfile; /* stderr file handle */ +static apr_file_t *outfile; /* stdout file handle */ static apr_off_t unsolicited; /* file size summary for deleted unsolicited files */ static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */ @@ -108,6 +113,22 @@ /* short program name as called */ static const char *shortname = "htcacheclean"; +/* what did we clean? */ +struct stats { + apr_off_t total; + apr_off_t sum; + apr_off_t max; + apr_off_t ntotal; + apr_off_t nodes; + apr_off_t inodes; + apr_off_t etotal; + apr_off_t entries; + apr_off_t dfuture; + apr_off_t dexpired; + apr_off_t dfresh; +}; + + #ifdef DEBUG /* * fake delete for debug purposes @@ -153,8 +174,7 @@ /* * print purge statistics */ -static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max, - apr_off_t etotal, apr_off_t entries) +static void printstats(char *path, struct stats *s) { char ttype, stype, mtype, utype; apr_off_t tfrag, sfrag, ufrag; @@ -164,31 +184,31 @@ } ttype = 'K'; - tfrag = ((total * 10) / KBYTE) % 10; - total /= KBYTE; - if (total >= KBYTE) { + tfrag = ((s->total * 10) / KBYTE) % 10; + s->total /= KBYTE; + if (s->total >= KBYTE) { ttype = 'M'; - tfrag = ((total * 10) / KBYTE) % 10; - total /= KBYTE; + tfrag = ((s->total * 10) / KBYTE) % 10; + s->total /= KBYTE; } stype = 'K'; - sfrag = ((sum * 10) / KBYTE) % 10; - sum /= KBYTE; - if (sum >= KBYTE) { + sfrag = ((s->sum * 10) / KBYTE) % 10; + s->sum /= KBYTE; + if (s->sum >= KBYTE) { stype = 'M'; - sfrag = ((sum * 10) / KBYTE) % 10; - sum /= KBYTE; + sfrag = ((s->sum * 10) / KBYTE) % 10; + s->sum /= KBYTE; } mtype = 'K'; - max /= KBYTE; - if (max >= KBYTE) { + s->max /= KBYTE; + if (s->max >= KBYTE) { mtype = 'M'; - max /= KBYTE; + s->max /= KBYTE; } - apr_file_printf(errfile, "Statistics:" APR_EOL_STR); + apr_file_printf(errfile, "Cleaned %s. Statistics:" APR_EOL_STR, path); if (unsolicited) { utype = 'K'; ufrag = ((unsolicited * 10) / KBYTE) % 10; @@ -203,33 +223,120 @@ } apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR, (int)(unsolicited), (int)(ufrag), utype); - } - apr_file_printf(errfile, "size limit %d.0%c" APR_EOL_STR, - (int)(max), mtype); - apr_file_printf(errfile, "total size was %d.%d%c, total size now " - "%d.%d%c" APR_EOL_STR, - (int)(total), (int)(tfrag), ttype, (int)(sum), - (int)(sfrag), stype); - apr_file_printf(errfile, "total entries was %d, total entries now %d" - APR_EOL_STR, (int)(etotal), (int)(entries)); + } + apr_file_printf(errfile, "size limit %" APR_OFF_T_FMT ".0%c" APR_EOL_STR, + s->max, mtype); + apr_file_printf(errfile, "inodes limit %" APR_OFF_T_FMT APR_EOL_STR, + s->inodes); + apr_file_printf( + errfile, + "total size was %" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c, total size now " + "%" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c" APR_EOL_STR, s->total, + tfrag, ttype, s->sum, sfrag, stype); + apr_file_printf(errfile, "total inodes was %" APR_OFF_T_FMT + ", total %sinodes now " + "%" APR_OFF_T_FMT APR_EOL_STR, s->ntotal, dryrun && deldirs ? "estimated " + : "", s->nodes); + apr_file_printf( + errfile, + "total entries was %" APR_OFF_T_FMT ", total entries now %" APR_OFF_T_FMT + APR_EOL_STR, s->etotal, s->entries); + apr_file_printf( + errfile, + "%" APR_OFF_T_FMT " entries deleted (%" APR_OFF_T_FMT " from future, %" + APR_OFF_T_FMT " expired, %" APR_OFF_T_FMT " fresh)" APR_EOL_STR, + (s->etotal - s->entries), s->dfuture, s->dexpired, s->dfresh); } +/** + * Round the value up to the given threshold. + */ +static apr_size_t round_up(apr_size_t val, apr_off_t round) { + if (round > 1) { + return ((val + round - 1) / round) * round; + } + return val; +} + /* + * delete parent directories + */ +static void delete_parent(const char *path, const char *basename, + apr_off_t *nodes, apr_pool_t *pool) +{ + char *nextpath, *name; + apr_pool_t *p; + + /* temp pool, otherwise lots of memory could be allocated */ + apr_pool_create(&p, pool); + name = apr_pstrdup(p, basename); + + /* If asked to delete dirs, do so now. We don't care if it fails. + * If it fails, it likely means there was something else there. + */ + if (deldirs && !dryrun) { + const char *vary; + char *end = strrchr(name, '/'); + while (end) { + *end = 0; + + /* remove the directory */ + nextpath = apr_pstrcat(p, path, "/", name, NULL); + if (!apr_dir_remove(nextpath, p)) { + (*nodes)--; + + /* vary directory found? */ + vary = strstr(name, CACHE_VDIR_SUFFIX); + if (vary && !vary[sizeof(CACHE_VDIR_SUFFIX) - 1]) { + nextpath = apr_pstrcat(p, path, "/", apr_pstrndup(p, name, vary + - name), NULL); + if (!apr_file_remove(nextpath, p)) { + (*nodes)--; + } + } + + } + else { + break; + } + end = strrchr(name, '/'); + } + } + + apr_pool_destroy(p); + + if (benice) { + if (++delcount >= DELETE_NICE) { + apr_sleep(NICE_DELAY); + delcount = 0; + } + } + +} + +/* * delete a single file */ -static void delete_file(char *path, char *basename, apr_pool_t *pool) +static void delete_file(char *path, char *basename, apr_off_t *nodes, + apr_pool_t *pool) { char *nextpath; apr_pool_t *p; + /* temp pool, otherwise lots of memory could be allocated */ + apr_pool_create(&p, pool); + nextpath = apr_pstrcat(p, path, "/", basename, NULL); + if (dryrun) { - return; + apr_finfo_t finfo; + if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) { + (*nodes)--; + } } + else if (!apr_file_remove(nextpath, p)) { + (*nodes)--; + } - /* temp pool, otherwise lots of memory could be allocated */ - apr_pool_create(&p, pool); - nextpath = apr_pstrcat(p, path, "/", basename, NULL); - apr_file_remove(nextpath, p); apr_pool_destroy(p); if (benice) { @@ -238,28 +345,44 @@ delcount = 0; } } + + delete_parent(path, basename, nodes, pool); + } /* * delete cache file set */ -static void delete_entry(char *path, char *basename, apr_pool_t *pool) +static void delete_entry(char *path, char *basename, apr_off_t *nodes, + apr_pool_t *pool) { char *nextpath; apr_pool_t *p; - if (dryrun) { - return; - } - /* temp pool, otherwise lots of memory could be allocated */ apr_pool_create(&p, pool); nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL); - apr_file_remove(nextpath, p); + if (dryrun) { + apr_finfo_t finfo; + if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) { + (*nodes)--; + } + } + else if (!apr_file_remove(nextpath, p)) { + (*nodes)--; + } nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL); - apr_file_remove(nextpath, p); + if (dryrun) { + apr_finfo_t finfo; + if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) { + (*nodes)--; + } + } + else if (!apr_file_remove(nextpath, p)) { + (*nodes)--; + } apr_pool_destroy(p); @@ -270,12 +393,204 @@ delcount = 0; } } + + delete_parent(path, basename, nodes, pool); + } /* + * list the cache directory tree + */ +static int list_urls(char *path, apr_pool_t *pool, apr_off_t round) +{ + apr_dir_t *dir; + apr_finfo_t info; + apr_size_t len; + apr_pool_t *p; + apr_file_t *fd; + const char *ext, *nextpath; + char *url; + apr_uint32_t format; + disk_cache_info_t disk_info; + + apr_pool_create(&p, pool); + + if (apr_dir_open(&dir, path, p) != APR_SUCCESS) { + return 1; + } + + while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) { + + if (info.filetype == APR_DIR) { + if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) { + continue; + } + + if (list_urls(apr_pstrcat(p, path, "/", info.name, NULL), pool, round)) { + return 1; + } + } + + else if (info.filetype == APR_REG) { + + ext = strchr(info.name, '.'); + + if (ext && !strcasecmp(ext, CACHE_HEADER_SUFFIX)) { + + nextpath = apr_pstrcat(p, path, "/", info.name, NULL); + + if (apr_file_open(&fd, nextpath, APR_FOPEN_READ + | APR_FOPEN_BINARY, APR_OS_DEFAULT, p) == APR_SUCCESS) { + len = sizeof(format); + if (apr_file_read_full(fd, &format, len, &len) + == APR_SUCCESS) { + if (format == DISK_FORMAT_VERSION) { + apr_off_t offset = 0; + + apr_file_seek(fd, APR_SET, &offset); + + len = sizeof(disk_cache_info_t); + + if (apr_file_read_full(fd, &disk_info, len, &len) + == APR_SUCCESS) { + len = disk_info.name_len; + url = apr_palloc(p, len + 1); + url[len] = 0; + + if (apr_file_read_full(fd, url, len, &len) + == APR_SUCCESS) { + + if (listextended) { + apr_finfo_t hinfo, dinfo; + + /* stat the header file */ + if (APR_SUCCESS != apr_file_info_get( + &hinfo, APR_FINFO_SIZE, fd)) { + /* ignore the file */ + } + else if (disk_info.has_body && APR_SUCCESS + != apr_stat( + &dinfo, + apr_pstrcat( + p, + path, + "/", + apr_pstrndup( + p, + info.name, + ext + - info.name), + CACHE_DATA_SUFFIX, + NULL), + APR_FINFO_SIZE + | APR_FINFO_IDENT, + p)) { + /* ignore the file */ + } + else if (disk_info.has_body && (dinfo.device + != disk_info.device + || dinfo.inode + != disk_info.inode)) { + /* ignore the file */ + } + else { + + apr_file_printf( + outfile, + "%s %" APR_SIZE_T_FMT + " %" APR_SIZE_T_FMT + " %d %" APR_SIZE_T_FMT + " %" APR_TIME_T_FMT + " %" APR_TIME_T_FMT + " %" APR_TIME_T_FMT + " %" APR_TIME_T_FMT + " %d %d\n", + url, + round_up(hinfo.size, round), + round_up( + disk_info.has_body ? dinfo.size + : 0, round), + disk_info.status, + disk_info.entity_version, + disk_info.date, + disk_info.expire, + disk_info.request_time, + disk_info.response_time, + disk_info.has_body, + disk_info.header_only); + } + } + else { + apr_finfo_t dinfo; + + /* stat the data file */ + if (disk_info.has_body && APR_SUCCESS + != apr_stat( + &dinfo, + apr_pstrcat( + p, + path, + "/", + apr_pstrndup( + p, + info.name, + ext + - info.name), + CACHE_DATA_SUFFIX, + NULL), + APR_FINFO_SIZE + | APR_FINFO_IDENT, + p)) { + /* ignore the file */ + } + else if (disk_info.has_body && (dinfo.device + != disk_info.device + || dinfo.inode + != disk_info.inode)) { + /* ignore the file */ + } + else { + apr_file_printf(outfile, "%s\n", + url); + } + } + } + + break; + } + } + } + apr_file_close(fd); + + } + } + } + + } + + apr_dir_close(dir); + + if (interrupted) { + return 1; + } + + apr_pool_destroy(p); + + if (benice) { + apr_sleep(NICE_DELAY); + } + + if (interrupted) { + return 1; + } + + return 0; +} + +/* * walk the cache directory tree */ -static int process_dir(char *path, apr_pool_t *pool) +static int process_dir(char *path, apr_pool_t *pool, apr_off_t *nodes) { apr_dir_t *dir; apr_pool_t *p; @@ -286,7 +601,7 @@ apr_finfo_t info; apr_size_t len; apr_time_t current, deviation; - char *nextpath, *base, *ext, *orig_basename; + char *nextpath, *base, *ext; APR_RING_ENTRY(_direntry) anchor; DIRENTRY *d, *t, *n; ENTRY *e; @@ -311,6 +626,7 @@ d = apr_pcalloc(p, sizeof(DIRENTRY)); d->basename = apr_pstrcat(p, path, "/", info.name, NULL); APR_RING_INSERT_TAIL(&anchor, d, _direntry, link); + (*nodes)++; } apr_dir_close(dir); @@ -341,7 +657,7 @@ } } - /* this may look strange but apr_stat() may return errno which + /* this may look strange but apr_stat() may return an error which * is system dependent and there may be transient failures, * so just blindly retry for a short while */ @@ -367,18 +683,9 @@ } if (info.filetype == APR_DIR) { - /* Make a copy of the basename, as process_dir modifies it */ - orig_basename = apr_pstrdup(pool, d->basename); - if (process_dir(d->basename, pool)) { + if (process_dir(d->basename, pool, nodes)) { return 1; } - - /* If asked to delete dirs, do so now. We don't care if it fails. - * If it fails, it likely means there was something else there. - */ - if (deldirs && !dryrun) { - apr_dir_remove(orig_basename, pool); - } continue; } @@ -474,6 +781,11 @@ e->hsize = d->hsize; e->dsize = d->dsize; e->basename = apr_pstrdup(pool, d->basename); + if (!disk_info.has_body) { + delete_file(path, apr_pstrcat(p, path, "/", + d->basename, CACHE_DATA_SUFFIX, NULL), + nodes, p); + } break; } else { @@ -481,14 +793,31 @@ } } else if (format == VARY_FORMAT_VERSION) { + apr_finfo_t finfo; + /* This must be a URL that added Vary headers later, * so kill the orphaned .data file */ apr_file_close(fd); - apr_file_remove(apr_pstrcat(p, path, "/", d->basename, - CACHE_DATA_SUFFIX, NULL), - p); + + if (apr_stat(&finfo, apr_pstrcat(p, nextpath, + CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p) + || finfo.filetype != APR_DIR) { + delete_entry(path, d->basename, nodes, p); + } + else { + delete_file(path, apr_pstrcat(p, path, "/", + d->basename, CACHE_DATA_SUFFIX, NULL), + nodes, p); + } + break; } + else { + /* We didn't recognise the format, kill the files */ + apr_file_close(fd); + delete_entry(path, d->basename, nodes, p); + break; + } } else { apr_file_close(fd); @@ -505,7 +834,7 @@ current = apr_time_now(); if (realclean || d->htime < current - deviation || d->htime > current + deviation) { - delete_entry(path, d->basename, p); + delete_entry(path, d->basename, nodes, p); unsolicited += d->hsize; unsolicited += d->dsize; } @@ -531,22 +860,63 @@ len = sizeof(expires); - apr_file_read_full(fd, &expires, len, &len); + if (apr_file_read_full(fd, &expires, len, + &len) == APR_SUCCESS) { + apr_finfo_t finfo; - apr_file_close(fd); + apr_file_close(fd); - if (expires < current) { - delete_entry(path, d->basename, p); + if (apr_stat(&finfo, apr_pstrcat(p, nextpath, + CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p) + || finfo.filetype != APR_DIR) { + delete_entry(path, d->basename, nodes, p); + } + else if (expires < current) { + delete_entry(path, d->basename, nodes, p); + } + + break; } + } + else if (format == DISK_FORMAT_VERSION) { + apr_off_t offset = 0; + + apr_file_seek(fd, APR_SET, &offset); + + len = sizeof(disk_cache_info_t); + + if (apr_file_read_full(fd, &disk_info, len, + &len) == APR_SUCCESS) { + apr_file_close(fd); + e = apr_palloc(pool, sizeof(ENTRY)); + APR_RING_INSERT_TAIL(&root, e, _entry, link); + e->expire = disk_info.expire; + e->response_time = disk_info.response_time; + e->htime = d->htime; + e->dtime = d->dtime; + e->hsize = d->hsize; + e->dsize = d->dsize; + e->basename = apr_pstrdup(pool, d->basename); + break; + } + else { + apr_file_close(fd); + } + } + else { + apr_file_close(fd); + delete_entry(path, d->basename, nodes, p); break; } } - apr_file_close(fd); + else { + apr_file_close(fd); + } } if (realclean || d->htime < current - deviation || d->htime > current + deviation) { - delete_entry(path, d->basename, p); + delete_entry(path, d->basename, nodes, p); unsolicited += d->hsize; } break; @@ -555,7 +925,7 @@ current = apr_time_now(); if (realclean || d->dtime < current - deviation || d->dtime > current + deviation) { - delete_entry(path, d->basename, p); + delete_entry(path, d->basename, nodes, p); unsolicited += d->dsize; } break; @@ -564,7 +934,7 @@ * is asserted above if a tempfile is in the hash array */ case TEMP: - delete_file(path, d->basename, p); + delete_file(path, d->basename, nodes, p); unsolicited += d->dsize; break; } @@ -590,27 +960,35 @@ /* * purge cache entries */ -static void purge(char *path, apr_pool_t *pool, apr_off_t max) +static void purge(char *path, apr_pool_t *pool, apr_off_t max, + apr_off_t inodes, apr_off_t nodes, apr_off_t round) { - apr_off_t sum, total, entries, etotal; ENTRY *e, *n, *oldest; - sum = 0; - entries = 0; + struct stats s; + s.sum = 0; + s.entries = 0; + s.dfuture = 0; + s.dexpired = 0; + s.dfresh = 0; + s.max = max; + s.nodes = nodes; + s.inodes = inodes; + s.ntotal = nodes; for (e = APR_RING_FIRST(&root); e != APR_RING_SENTINEL(&root, _entry, link); e = APR_RING_NEXT(e, link)) { - sum += e->hsize; - sum += e->dsize; - entries++; + s.sum += round_up(e->hsize, round); + s.sum += round_up(e->dsize, round); + s.entries++; } - total = sum; - etotal = entries; + s.total = s.sum; + s.etotal = s.entries; - if (sum <= max) { - printstats(total, sum, max, etotal, entries); + if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) { + printstats(path, &s); return; } @@ -622,14 +1000,15 @@ e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) { n = APR_RING_NEXT(e, link); if (e->response_time > now || e->htime > now || e->dtime > now) { - delete_entry(path, e->basename, pool); - sum -= e->hsize; - sum -= e->dsize; - entries--; + delete_entry(path, e->basename, &s.nodes, pool); + s.sum -= round_up(e->hsize, round); + s.sum -= round_up(e->dsize, round); + s.entries--; + s.dfuture++; APR_RING_REMOVE(e, link); - if (sum <= max) { + if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) { if (!interrupted) { - printstats(total, sum, max, etotal, entries); + printstats(path, &s); } return; } @@ -646,14 +1025,15 @@ e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) { n = APR_RING_NEXT(e, link); if (e->expire != APR_DATE_BAD && e->expire < now) { - delete_entry(path, e->basename, pool); - sum -= e->hsize; - sum -= e->dsize; - entries--; + delete_entry(path, e->basename, &s.nodes, pool); + s.sum -= round_up(e->hsize, round); + s.sum -= round_up(e->dsize, round); + s.entries--; + s.dexpired++; APR_RING_REMOVE(e, link); - if (sum <= max) { + if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) { if (!interrupted) { - printstats(total, sum, max, etotal, entries); + printstats(path, &s); } return; } @@ -670,7 +1050,8 @@ * corrupt 64bit arithmetics which happend to me once, so better safe * than sorry */ - while (sum > max && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) { + while (!((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) + && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) { oldest = APR_RING_FIRST(&root); for (e = APR_RING_NEXT(oldest, link); @@ -681,18 +1062,212 @@ } } - delete_entry(path, oldest->basename, pool); - sum -= oldest->hsize; - sum -= oldest->dsize; - entries--; + delete_entry(path, oldest->basename, &s.nodes, pool); + s.sum -= round_up(oldest->hsize, round); + s.sum -= round_up(oldest->dsize, round); + s.entries--; + s.dfresh++; APR_RING_REMOVE(oldest, link); } if (!interrupted) { - printstats(total, sum, max, etotal, entries); + printstats(path, &s); } } +static apr_status_t remove_directory(apr_pool_t *pool, const char *dir) +{ + apr_status_t rv; + apr_dir_t *dirp; + apr_finfo_t dirent; + + rv = apr_dir_open(&dirp, dir, pool); + if (rv == APR_ENOENT) { + return rv; + } + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR, + dir, apr_strerror(rv, errmsg, sizeof errmsg)); + return rv; + } + + while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp) + == APR_SUCCESS) { + if (dirent.filetype == APR_DIR) { + if (strcmp(dirent.name, ".") && strcmp(dirent.name, "..")) { + rv = remove_directory(pool, apr_pstrcat(pool, dir, "/", + dirent.name, NULL)); + /* tolerate the directory not being empty, the cache may have + * attempted to recreate the directory in the mean time. + */ + if (APR_SUCCESS != rv && APR_ENOTEMPTY != rv) { + break; + } + } + } else { + const char *file = apr_pstrcat(pool, dir, "/", dirent.name, NULL); + rv = apr_file_remove(file, pool); + if (APR_SUCCESS != rv) { + char errmsg[120]; + apr_file_printf(errfile, + "Could not remove file '%s': %s" APR_EOL_STR, file, + apr_strerror(rv, errmsg, sizeof errmsg)); + break; + } + } + } + + apr_dir_close(dirp); + + if (rv == APR_SUCCESS) { + rv = apr_dir_remove(dir, pool); + if (APR_ENOTEMPTY == rv) { + rv = APR_SUCCESS; + } + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove directory %s: %s" APR_EOL_STR, + dir, apr_strerror(rv, errmsg, sizeof errmsg)); + } + } + + return rv; +} + +static apr_status_t find_directory(apr_pool_t *pool, const char *base, + const char *rest) +{ + apr_status_t rv; + apr_dir_t *dirp; + apr_finfo_t dirent; + int found = 0, files = 0; + const char *header = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, NULL); + const char *data = apr_pstrcat(pool, rest, CACHE_DATA_SUFFIX, NULL); + const char *vdir = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, + CACHE_VDIR_SUFFIX, NULL); + const char *dirname = NULL; + + rv = apr_dir_open(&dirp, base, pool); + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR, + base, apr_strerror(rv, errmsg, sizeof errmsg)); + return rv; + } + + rv = APR_ENOENT; + + while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp) + == APR_SUCCESS) { + int len = strlen(dirent.name); + int restlen = strlen(rest); + if (dirent.filetype == APR_DIR && !strncmp(rest, dirent.name, len)) { + dirname = apr_pstrcat(pool, base, "/", dirent.name, NULL); + rv = find_directory(pool, dirname, rest + (len < restlen ? len + : restlen)); + if (APR_SUCCESS == rv) { + found = 1; + } + } + if (dirent.filetype == APR_DIR) { + if (!strcmp(dirent.name, vdir)) { + files = 1; + } + } + if (dirent.filetype == APR_REG) { + if (!strcmp(dirent.name, header) || !strcmp(dirent.name, data)) { + files = 1; + } + } + } + + apr_dir_close(dirp); + + if (files) { + rv = APR_SUCCESS; + if (!dryrun) { + const char *remove; + apr_status_t status; + + remove = apr_pstrcat(pool, base, "/", header, NULL); + status = apr_file_remove(remove, pool); + if (status != APR_SUCCESS && status != APR_ENOENT) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR, + remove, apr_strerror(status, errmsg, sizeof errmsg)); + rv = status; + } + + remove = apr_pstrcat(pool, base, "/", data, NULL); + status = apr_file_remove(remove, pool); + if (status != APR_SUCCESS && status != APR_ENOENT) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR, + remove, apr_strerror(status, errmsg, sizeof errmsg)); + rv = status; + } + + status = remove_directory(pool, apr_pstrcat(pool, base, "/", vdir, NULL)); + if (status != APR_SUCCESS && status != APR_ENOENT) { + rv = status; + } + } + } + + /* If asked to delete dirs, do so now. We don't care if it fails. + * If it fails, it likely means there was something else there. + */ + if (dirname && deldirs && !dryrun) { + apr_dir_remove(dirname, pool); + } + + if (found) { + return APR_SUCCESS; + } + + return rv; +} + +/** + * Delete a specific URL from the cache. + */ +static apr_status_t delete_url(apr_pool_t *pool, const char *proxypath, const char *url) +{ + apr_md5_ctx_t context; + unsigned char digest[16]; + char tmp[23]; + int i, k; + unsigned int x; + static const char enc_table[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@"; + + apr_md5_init(&context); + apr_md5_update(&context, (const unsigned char *) url, strlen(url)); + apr_md5_final(digest, &context); + + /* encode 128 bits as 22 characters, using a modified uuencoding + * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is + * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters + */ + for (i = 0, k = 0; i < 15; i += 3) { + x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2]; + tmp[k++] = enc_table[x >> 18]; + tmp[k++] = enc_table[(x >> 12) & 0x3f]; + tmp[k++] = enc_table[(x >> 6) & 0x3f]; + tmp[k++] = enc_table[x & 0x3f]; + } + + /* one byte left */ + x = digest[15]; + tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */ + tmp[k++] = enc_table[(x << 4) & 0x3f]; + tmp[k] = 0; + + /* automatically find the directory levels */ + return find_directory(pool, proxypath, tmp); +} + /* * usage info */ @@ -700,12 +1275,13 @@ static void usage(const char *error) { if (error) { - apr_file_printf(errfile, "%s error: %s\n", shortname, error); + apr_file_printf(errfile, "%s error: %s\n", shortname, error); } - apr_file_printf(errfile, + apr_file_printf(errfile, "%s -- program for cleaning the disk cache." NL - "Usage: %s [-Dvtrn] -pPATH -lLIMIT [-PPIDFILE]" NL - " %s [-nti] -dINTERVAL -pPATH -lLIMIT [-PPIDFILE]" NL + "Usage: %s [-Dvtrn] -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL + " %s [-nti] -dINTERVAL -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL + " %s [-Dvt] -pPATH URL ..." NL NL "Options:" NL " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL @@ -713,7 +1289,10 @@ " options." NL NL " -D Do a dry run and don't delete anything. This option is mutually" NL - " exclusive with the -d option." NL + " exclusive with the -d option. When doing a dry run and deleting" NL + " directories with -t, the inodes reported deleted in the stats" NL + " cannot take into account the directories deleted, and will be" NL + " marked as an estimate." NL NL " -v Be verbose and print statistics. This option is mutually" NL " exclusive with the -d option." NL @@ -733,14 +1312,34 @@ NL " -P Specify PIDFILE as the file to write the pid to." NL NL + " -R Specify amount to round sizes up to." NL + NL " -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL " or 'M' to the number for specifying KBytes or MBytes." NL NL + " -L Specify LIMIT as the total disk cache inode limit." NL + NL " -i Be intelligent and run only when there was a modification of" NL " the disk cache. This option is only possible together with the" NL - " -d option." NL, + " -d option." NL + NL + " -a List the URLs currently stored in the cache. Variants of the" NL + " same URL will be listed once for each variant." NL + NL + " -A List the URLs currently stored in the cache, along with their" NL + " attributes in the following order: url, header size, body size," NL + " status, entity version, date, expiry, request time," NL + " response time, body present, head request." NL + NL + "Should an URL be provided on the command line, the URL will be" NL + "deleted from the cache. A reverse proxied URL is made up as follows:" NL + "http://:?[query]. So, for the path \"/\" on the" NL + "host \"localhost\" and port 80, the URL to delete becomes" NL + "\"http://localhost:80/?\". Note the '?' in the URL must always be" NL + "specified explicitly, whether a query string is present or not." NL, shortname, shortname, + shortname, shortname ); @@ -748,6 +1347,12 @@ } #undef NL +static void usage_repeated_arg(apr_pool_t *pool, char option) { + usage(apr_psprintf(pool, + "The option '%c' cannot be specified more than once", + option)); +} + static void log_pid(apr_pool_t *pool, const char *pidfilename, apr_file_t **pidfile) { apr_status_t status; @@ -776,14 +1381,14 @@ */ int main(int argc, const char * const argv[]) { - apr_off_t max; + apr_off_t max, inodes, round; apr_time_t current, repeat, delay, previous; apr_status_t status; apr_pool_t *pool, *instance; apr_getopt_t *o; apr_finfo_t info; apr_file_t *pidfile; - int retries, isdaemon, limit_found, intelligent, dowork; + int retries, isdaemon, limit_found, inodes_found, intelligent, dowork; char opt; const char *arg; char *proxypath, *path, *pidfilename; @@ -794,7 +1399,10 @@ isdaemon = 0; dryrun = 0; limit_found = 0; + inodes_found = 0; max = 0; + inodes = 0; + round = 0; verbose = 0; realclean = 0; benice = 0; @@ -818,13 +1426,14 @@ } apr_pool_abort_set(oom, pool); apr_file_open_stderr(&errfile, pool); + apr_file_open_stdout(&outfile, pool); apr_signal(SIGINT, setterm); apr_signal(SIGTERM, setterm); apr_getopt_init(&o, pool, argc, argv); while (1) { - status = apr_getopt(o, "iDnvrtd:l:L:p:P:", &opt, &arg); + status = apr_getopt(o, "iDnvrtd:l:L:p:P:R:aA", &opt, &arg); if (status == APR_EOF) { break; } @@ -832,45 +1441,47 @@ usage(NULL); } else { + char *end; + apr_status_t rv; switch (opt) { case 'i': if (intelligent) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } intelligent = 1; break; case 'D': if (dryrun) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } dryrun = 1; break; case 'n': if (benice) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } benice = 1; break; case 't': if (deldirs) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } deldirs = 1; break; case 'v': if (verbose) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } verbose = 1; break; case 'r': if (realclean) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } realclean = 1; deldirs = 1; @@ -878,7 +1489,7 @@ case 'd': if (isdaemon) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } isdaemon = 1; repeat = apr_atoi64(arg); @@ -888,14 +1499,11 @@ case 'l': if (limit_found) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } limit_found = 1; do { - apr_status_t rv; - char *end; - rv = apr_strtoff(&max, arg, &end, 10); if (rv == APR_SUCCESS) { if ((*end == 'K' || *end == 'k') && !end[1]) { @@ -919,9 +1527,54 @@ } while(0); break; + case 'L': + if (inodes_found) { + usage_repeated_arg(pool, opt); + } + inodes_found = 1; + + do { + rv = apr_strtoff(&inodes, arg, &end, 10); + if (rv == APR_SUCCESS) { + if ((*end == 'K' || *end == 'k') && !end[1]) { + inodes *= KBYTE; + } + else if ((*end == 'M' || *end == 'm') && !end[1]) { + inodes *= MBYTE; + } + else if ((*end == 'G' || *end == 'g') && !end[1]) { + inodes *= GBYTE; + } + else if (*end && /* neither empty nor [Bb] */ + ((*end != 'B' && *end != 'b') || end[1])) { + rv = APR_EGENERAL; + } + } + if (rv != APR_SUCCESS) { + usage(apr_psprintf(pool, "Invalid limit: %s" + APR_EOL_STR APR_EOL_STR, arg)); + } + } while(0); + break; + + case 'a': + if (listurls) { + usage_repeated_arg(pool, opt); + } + listurls = 1; + break; + + case 'A': + if (listurls) { + usage_repeated_arg(pool, opt); + } + listurls = 1; + listextended = 1; + break; + case 'p': if (proxypath) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } proxypath = apr_pstrdup(pool, arg); if ((status = apr_filepath_set(proxypath, pool)) != APR_SUCCESS) { @@ -932,11 +1585,32 @@ case 'P': if (pidfilename) { - usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt)); + usage_repeated_arg(pool, opt); } pidfilename = apr_pstrdup(pool, arg); break; + case 'R': + if (round) { + usage_repeated_arg(pool, opt); + } + rv = apr_strtoff(&round, arg, &end, 10); + if (rv == APR_SUCCESS) { + if (*end) { + usage(apr_psprintf(pool, "Invalid round value: %s" + APR_EOL_STR APR_EOL_STR, arg)); + } + else if (round < 0) { + usage(apr_psprintf(pool, "Round value must be positive: %s" + APR_EOL_STR APR_EOL_STR, arg)); + } + } + if (rv != APR_SUCCESS) { + usage(apr_psprintf(pool, "Invalid round value: %s" + APR_EOL_STR APR_EOL_STR, arg)); + } + break; + } /* switch */ } /* else */ } /* while */ @@ -945,16 +1619,51 @@ usage(NULL); } - if (o->ind != argc) { - usage("Additional parameters specified on the command line, aborting"); + if (o->ind < argc) { + int deleted = 0; + int error = 0; + if (isdaemon) { + usage("Option -d cannot be used with URL arguments, aborting"); + } + if (intelligent) { + usage("Option -i cannot be used with URL arguments, aborting"); + } + if (limit_found) { + usage("Option -l cannot be used with URL arguments, aborting"); + } + while (o->ind < argc) { + status = delete_url(pool, proxypath, argv[o->ind]); + if (APR_SUCCESS == status) { + if (verbose) { + apr_file_printf(errfile, "Removed: %s" APR_EOL_STR, + argv[o->ind]); + } + deleted = 1; + } + else if (APR_ENOENT == status) { + if (verbose) { + apr_file_printf(errfile, "Not cached: %s" APR_EOL_STR, + argv[o->ind]); + } + } + else { + if (verbose) { + apr_file_printf(errfile, "Error while removed: %s" APR_EOL_STR, + argv[o->ind]); + } + error = 1; + } + o->ind++; + } + return error ? 1 : deleted ? 0 : 2; } if (isdaemon && repeat <= 0) { usage("Option -d must be greater than zero"); } - if (isdaemon && (verbose || realclean || dryrun)) { - usage("Option -d cannot be used with -v, -r or -D"); + if (isdaemon && (verbose || realclean || dryrun || listurls)) { + usage("Option -d cannot be used with -v, -r, -L or -D"); } if (!isdaemon && intelligent) { @@ -965,8 +1674,8 @@ usage("Option -p must be specified"); } - if (max <= 0) { - usage("Option -l must be greater than zero"); + if (!listurls && max <= 0 && inodes <= 0) { + usage("At least one of option -l or -L must be greater than zero"); } if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) { @@ -981,6 +1690,11 @@ */ } + if (listurls) { + list_urls(path, pool, round); + return (interrupted != 0); + } + #ifndef DEBUG if (isdaemon) { apr_file_close(errfile); @@ -1051,8 +1765,9 @@ } if (dowork && !interrupted) { - if (!process_dir(path, instance) && !interrupted) { - purge(path, instance, max); + apr_off_t nodes = 0; + if (!process_dir(path, instance, &nodes) && !interrupted) { + purge(path, instance, max, inodes, nodes, round); } else if (!isdaemon && !interrupted) { apr_file_printf(errfile, "An error occurred, cache cleaning "