From 5774fe14b7203c06873e29d13c9520c9a32321cb Mon Sep 17 00:00:00 2001 From: Nathan Wagner Date: Fri, 12 Oct 2018 07:15:52 +0000 Subject: [PATCH] add program to generate a hash of package contents renamed checksum to hash in the package table --- Makefile | 7 +- db.sql | 12 +-- lib/integ.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++ zpm-packagehash.c | 60 +++++++++++++++ zpm.h | 2 + 5 files changed, 260 insertions(+), 12 deletions(-) create mode 100644 lib/integ.c create mode 100644 zpm-packagehash.c diff --git a/Makefile b/Makefile index ac3a332..9cfc921 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ lib/jsw/jsw_rbtree.c JSWOBJ=$(JSWSRC:%.c=%.o) LIBZPMSRC=sha256.c db.c compress.c uncompress.c zpm.c zpm_hash.c \ foreach_path.c vercmp.c findpkg.c quote.c dbquery.c script_hash.c \ - parse.c + parse.c integ.c LIBZPMOBJ=$(addprefix lib/, $(LIBZPMSRC:%.c=%.o)) @@ -28,7 +28,7 @@ curdir=$(shell pwd) ZPKGBIN=zpm-addfile zpm-extract zpm-init zpm-vercmp zpm-stat zpm-hash \ zpm-findpkg zpm-shell zpm-soneed zpm-foreach-path zpm-parse \ - zpm-runscript zpm-soname zpm-syncfs + zpm-runscript zpm-soname zpm-syncfs zpm-packagehash SCRIPTS=zpm zpm-install zpm-merge zpm-list zpm-preserve zpm-test zpm-log \ zpm-contents zpm-uninstall zpm-pathmod @@ -126,6 +126,9 @@ zpm-extract: zpm-extract.o libzpm.a zpm-runscript: zpm-runscript.o libzpm.a $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lzpm -lelf +zpm-packagehash: zpm-packagehash.o libzpm.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lzpm -lelf + zpm-foreach-path.o: CFLAGS+=-Wno-unused-parameter zpm-foreach-path: zpm-foreach-path.o libzpm.a sqlite/sqlite3.h diff --git a/db.sql b/db.sql index 041c06e..d74803d 100644 --- a/db.sql +++ b/db.sql @@ -33,7 +33,7 @@ create table packages ( packager text, build_time integer default (strftime('%s', 'now')), install_time integer, - checksum text, -- checksum of package contents. null for incompleted packages + hash text, -- see integ.c for package hash primary key (package,version,release), check (typeof(package) = 'text'), check (typeof(version) = 'text'), @@ -74,7 +74,7 @@ begin packager = NEW.packager, build_time = NEW.build_time, install_time = NEW.install_time, - checksum = NEW.checksum + hash = NEW.hash where package = OLD.package and version = OLD.version and release = OLD.release @@ -100,14 +100,6 @@ create table packagetags ( foreign key (package,version,release) references packages (package,version,release) on delete cascade on update cascade ); --- packagefile hash is columns as text, joined with null bytes, then --- sha256 sum of that --- package checksum is package columns as text, joined with null bytes, --- other than the checksum and install_time column --- then that hashed. finally, that hash, plus the ascii sorted --- hashes of the package files all joined with newlines, hashed. --- really don't like this. - -- files contained in a package create table packagefiles ( -- package id triple diff --git a/lib/integ.c b/lib/integ.c new file mode 100644 index 0000000..016302e --- /dev/null +++ b/lib/integ.c @@ -0,0 +1,191 @@ +#define _POSIX_C_SOURCE 200809L +#include + +#include "zpm.h" +#include "sqlite3.h" +#include "sha256.h" + +static void hash_byte(struct sha256_state *h, int ch) { + unsigned char buf[1]; + + buf[0] = ch & 0xff; + sha256_process(h, buf, 1); +} + +/* i will be positive, we are hashing column sizes */ +static void hash_int(struct sha256_state *h, int i) { + int n; + uint64_t z; + + z = i; + + n = sizeof i; + while (n--) { + hash_byte(h, (int)(z & 0xff)); + z = z>>8; + } +} + +/* + * Implementation of the sha3_query(SQL,SIZE) function. + * + * This function compiles and runs the SQL statement(s) given in the argument. + * The results are hashed using a SIZE-bit SHA3. The default size is 256. + * + * The format of the byte stream that is hashed is summarized as follows: + * + * R + * N + * I + * F + * B: + * T: + * + * is the original SQL text for each statement run and is the size of + * that text. The SQL text is UTF-8. A single R character occurs before the + * start of each row. N means a NULL value. I mean an 8-byte little-endian + * integer . F is a floating point number with an 8-byte little-endian + * IEEE floating point value . B means blobs of bytes. T + * means text rendered as bytes of UTF-8. The values are + * expressed as little endian 8 byte integers. + * + * + * There are zero or more R segments, one for each row in the + * result set. After each R, there are one or more N, I, F, B, or T segments, + * one for each column in the result set. Segments are concatentated directly + * with no delimiters of any kind. + */ + +static void hash_query(struct zpm *zpm, const char *zSql, struct sha256_state *h) { + sqlite3 *db; + sqlite3_stmt *pStmt = 0; + int nCol; /* Number of columns in the result set */ + int i, rc; + + const unsigned char *data; + int bytes; + + double r; + sqlite3_uint64 u; + sqlite3_int64 v; + int j; + unsigned char x[9]; + + if (!zSql) return; + if (!zpm) return; + + db = zpm->db; + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zSql); + if (rc) { + zpm->dberrmsg = strdup(sqlite3_errmsg(db)); + sqlite3_finalize(pStmt); + return; + } + + nCol = sqlite3_column_count(pStmt); + + while (sqlite3_step(pStmt) == SQLITE_ROW) { + sha256_process(h, (const unsigned char *)"R", 1); + for (i = 0; i < nCol; i++) { + switch (sqlite3_column_type(pStmt, i)) { + case SQLITE_NULL: + hash_byte(h, 'N'); + continue; + break; + case SQLITE_INTEGER: + v = sqlite3_column_int64(pStmt, i); + memcpy(&u, &v, 8); + for (j = 8; j >= 1; j--) { + x[j] = u & 0xff; + u >>= 8; + } + x[0] = 'I'; + data = x; + bytes = 9; + break; + case SQLITE_FLOAT: + r = sqlite3_column_double(pStmt, i); + memcpy(&u, &r, 8); + for (j = 8; j >= 1; j--) { + x[j] = u & 0xff; + u >>= 8; + } + x[0] = 'F'; + data = x; + bytes = 9; + break; + case SQLITE_TEXT: + bytes = sqlite3_column_bytes(pStmt, i); + data = sqlite3_column_text(pStmt, i); + hash_byte(h, 'T'); + hash_int(h, bytes); + break; + case SQLITE_BLOB: + bytes = sqlite3_column_bytes(pStmt, i); + data = sqlite3_column_blob(pStmt, i); + hash_byte(h, 'B'); + hash_int(h, bytes); + break; + } + sha256_process(h, data, bytes); + } + } + sqlite3_finalize(pStmt); +} + +int zpm_package_hash(struct zpm *zpm, char *pkgid, char *hash) { + struct sha256_state d; + char *sql; + int i; + unsigned char tmp[32]; + + if (!hash) { + return 0; + } + + sha256_init(&d); + /* find package */ + + sql = sqlite3_mprintf("select package,version,release,description,architecture,url,licenses,packager,build_time from packages_pkgid where pkgid = %Q", pkgid); + + hash_query(zpm, sql, &d); + sqlite3_free(sql); + + /* hash package files */ + + sql = sqlite3_mprintf("select path, mode, username, groupname, configuration, " + "filetype, target, devmajor, devminor, mtime, hash " + "from packagefiles_pkgid where pkgid = %Q order by path", + pkgid); + hash_query(zpm, sql, &d); + sqlite3_free(sql); + + sha256_done(&d, tmp); + for (i=0; i<32; i++) { + sprintf(hash+i*2, "%02x", (unsigned)tmp[i]); + } + hash[64] = 0; + + return 1; + +} + +int zpm_package_sethash(struct zpm *zpm, char *pkgid, char *hash) { + char buf[ZPM_HASH_STRLEN + 1]; + char *sql; + + if (!hash) { + hash = buf; + } + + zpm_package_hash(zpm, pkgid, hash); + + sql = sqlite3_mprintf("update packages_pkgid set hash = %Q where pkgid = %Q", hash, pkgid); + + zpm_exec(zpm, sql, NULL, NULL, NULL); + sqlite3_free(sql); + + return 1; + +} diff --git a/zpm-packagehash.c b/zpm-packagehash.c new file mode 100644 index 0000000..b158780 --- /dev/null +++ b/zpm-packagehash.c @@ -0,0 +1,60 @@ +#define _POSIX_C_SOURCE 2 + +#include +#include +#include +#include + +#include "zpm.h" + +static int found = 0; + +void usage(void) { + fprintf(stderr, "zpm-findpkg [-I] [-s ...] [-S ] [package]\n"); +} + +int main(int ac, char **av){ + int opt; + struct zpm pkg; + char *dbfile; + + int set = 0; + + dbfile = getenv("ZPMDB"); + if (!dbfile) { + dbfile = "/var/lib/zpm/local.db"; + } + + while ((opt = getopt(ac, av, "f:s")) != -1) { + switch (opt) { + case 'f': dbfile = optarg; break; + case 's': set = 1; break; + default: + usage(); + exit(EXIT_FAILURE); + break; + } + } + int argn = optind; + + if (!dbfile) { + fprintf(stderr, "must specify db\n"); + return 1; + } + + char *pkgid = av[argn]; + char hash[ZPM_HASH_STRLEN+1]; + + if (zpm_open(&pkg, dbfile)) { + if (set) { + found = zpm_package_sethash(&pkg, pkgid, hash); + } else { + found = zpm_package_hash(&pkg, pkgid, hash); + } + } + zpm_close(&pkg); + if (found) { + printf("%s\n", hash); + } + return found ? 0 : 1; +} diff --git a/zpm.h b/zpm.h index 08ae23b..f3e3734 100644 --- a/zpm.h +++ b/zpm.h @@ -176,6 +176,8 @@ int (*callback)(void *f, int ncols, char **vals, char **cols), void *data, char **errmsg); int zpm_script_hash(struct zpm *zpm, char *pkgstr, char *phase, char *hash); +int zpm_package_hash(struct zpm *zpm, char *pkgid, char *hash); +int zpm_package_sethash(struct zpm *zpm, char *pkgid, char *hash); sqlite3_stmt *zpm_dbquery(struct zpm *zpm, char *query, ...); struct zpm *zpm_clearmem(struct zpm *zpm); -- 2.40.0