const std = @import("std"); const sqlite = @import("sqlite"); const clap = @import("clap"); const curl = @cImport({ @cInclude("curl/curl.h"); }); const uri = @import("uri"); const json = @import("json"); const log = std.log.scoped(.derploader); const params = [_]clap.Param(clap.Help){ clap.parseParam("-h, --help Display this help and exit.") catch unreachable, clap.parseParam("-c, --create= Create new database at PATH.") catch unreachable, clap.parseParam("-i ... Operate on ID.") catch unreachable, clap.parseParam("-a Operate on all IDs in the database.") catch unreachable, clap.parseParam("-l ... Operate on IDs from a file.") catch unreachable, clap.parseParam("-m Download metadata for ID image.") catch unreachable, clap.parseParam("-d Download image data of ID.") catch unreachable, clap.parseParam("-e Extract image to local subfolder.") catch unreachable, clap.parseParam("-t Time between requests.") catch unreachable, clap.parseParam("-r Register ID.") catch unreachable, clap.parseParam("-k Include key in searches from environment variable DERPI_KEY.") catch unreachable, clap.parseParam("-s ... Iterate over the results of searches.") catch unreachable, clap.parseParam("-o Order searches by ORDER, descending.") catch unreachable, clap.parseParam("-O Order searches by ORDER, ascending.") catch unreachable, clap.parseParam("-p Start from page PAGE.") catch unreachable, clap.parseParam("-P Stop after PAGES pages.") catch unreachable, clap.parseParam("--migrate Update an old database.") catch unreachable, clap.parseParam("--yolo PRAGMA synchronous = OFF") catch unreachable, }; fn printFullUsage(w: anytype) !void { _ = try w.print("{s} ", .{std.os.argv[0]}); try clap.usage(w, ¶ms); _ = try w.writeByte('\n'); try clap.help(w, ¶ms); return; } fn sqliteErrorReport(str: []const u8, db: *sqlite.Db) void { log.err("{s}: {}", .{ str, db.getDetailedError() }); } fn curlErrorReport(str: []const u8, code: curl.CURLcode) void { log.err("{s}: {s} {s}", .{ str, curl.curl_easy_strerror(code), curlerr[0.. :0] }); } const create = \\CREATE TABLE IF NOT EXISTS image( \\ iid INTEGER PRIMARY KEY, \\ eid INTEGER UNIQUE, \\ metadata TEXT, \\ full_url TEXT GENERATED ALWAYS AS \\ (json_extract(metadata, '$.image.representations.full')) VIRTUAL, \\ thumb_url TEXT GENERATED ALWAYS AS \\ (json_extract(metadata, '$.image.representations.thumb')) VIRTUAL, \\ extension TEXT GENERATED ALWAYS AS \\ (json_extract(metadata, '$.image.format')) VIRTUAL, \\ hash_meta TEXT, \\ image_id INTEGER, \\ thumb_id INTEGER \\); ; const metatable = \\CREATE TABLE IF NOT EXISTS derpiloader( \\ name TEXT, \\ value \\); ; pub fn insertMeta(db: *sqlite.Db, id: u64, meta: []const u8) !void { const q = \\INSERT OR ROLLBACK INTO image (id, metadata) VALUES (?, ?); ; try db.exec(q, .{}, .{ .id = id, .metadata = meta }); } const api_base = "https://derpibooru.org/api/v1/json"; var urlbuf = [_:0]u8{0} ** 512; var curlerr = [_:0]u8{0} ** (curl.CURL_ERROR_SIZE); const hash_prefix = "blake3-"; var hash_buf = [_]u8{0} ** (std.crypto.hash.Blake3.digest_length); var hash_buf2 = [_]u8{0} ** (std.crypto.hash.Blake3.digest_length * 2 + hash_prefix[0..].len); fn hashit(input: []const u8) !void { std.crypto.hash.Blake3.hash(input, hash_buf[0..], .{}); _ = try std.fmt.bufPrint( hash_buf2[0..], hash_prefix ++ "{s}", .{std.fmt.fmtSliceHexLower(hash_buf[0..])}, ); } var fetch_timer: ?std.time.Timer = null; var fetch_wait: u64 = 0; var first_fetch = true; pub fn main() anyerror!void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const alloc = &gpa.allocator; var diag = clap.Diagnostic{}; var args = clap.parse( clap.Help, ¶ms, .{ .diagnostic = &diag, .allocator = alloc }, ) catch |err| { // Report useful error and exit diag.report(std.io.getStdErr().writer(), err) catch {}; return; }; defer args.deinit(); if (args.flag("-h")) { var w = std.io.getStdOut().writer(); try printFullUsage(w); return; } const filename = "test.db3"; var db = try sqlite.Db.init(.{ .mode = sqlite.Db.Mode{ .File = filename }, .open_flags = .{ .write = true, .create = true, }, .threading_mode = .SingleThread, }); db.exec(create, .{}, .{ 0, 0, 0 }) catch sqliteErrorReport("Couldn't create table", &db); if (args.flag("--migrate")) { if (args.flag("--yolo")) try db.exec("PRAGMA synchronous=0;", .{}, .{}); try db.exec("BEGIN IMMEDIATE;", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch std.debug.panic("SQLite database errored trying to roll back. Have fun!", .{}); var stmt = try db.prepare("SELECT ROWID FROM image;"); defer stmt.deinit(); var iter = try stmt.iterator(u64, .{}); while (try iter.next(.{})) |id| { try db.exec( \\INSERT INTO blob (data, hash) \\ SELECT image, hash_full \\ FROM image WHERE ROWID = ?; , .{}, .{id}); try db.exec( \\UPDATE image \\ SET image_id = last_insert_rowid() \\ WHERE ROWID = ? , .{}, .{id}); try db.exec( \\INSERT INTO blob (data, hash) \\ SELECT thumb, hash_thumb \\ FROM image WHERE ROWID = ?; , .{}, .{id}); try db.exec( \\UPDATE image \\ SET thumb_id = last_insert_rowid() \\ WHERE ROWID = ? , .{}, .{id}); } db.exec("COMMIT;", .{}, .{}) catch std.debug.panic("SQLite database errored trying to commit. Not *terrible*, I guess.", .{}); return; } var ret = curl.curl_global_init(curl.CURL_GLOBAL_ALL); if (ret != curl.CURLE_OK) { log.err("cURL global init failure: {s}", .{curl.curl_easy_strerror(ret)}); return; } defer curl.curl_global_cleanup(); const handle = curl.curl_easy_init() orelse return error.CURLHandleInitFailed; defer curl.curl_easy_cleanup(handle); var response_buffer = std.ArrayList(u8).init(alloc); defer response_buffer.deinit(); _ = curl.curl_easy_setopt(handle, curl.CURLOPT_ERRORBUFFER, &curlerr); const key = std.os.getenv("DERPI_KEY") orelse null; if (args.option("-t")) |millis_str| { const millis = std.fmt.parseInt(u64, millis_str, 10) catch { log.err("Fetch wait time must be a positive integer denoting some number of milliseconds.", .{}); return; }; fetch_wait = millis; } for (args.options("-i")) |id_str| { log.info("Iterating over all specified command line IDs.", .{}); const id = std.fmt.parseInt(u64, id_str, 10) catch { log.err("Image ID must be a positive integer.", .{}); continue; }; log.info("Operating on ID {d}, per -i.", .{id}); runActions(&db, id, &response_buffer, alloc, handle, &args, null); } if (args.flag("-a")) { log.info("Iterating over all registered IDs.", .{}); var stmt = try db.prepare("SELECT id FROM image WHERE id IS NOT NULL"); defer stmt.deinit(); var iter = try stmt.iterator(u64, .{}); while (try iter.next(.{})) |id| { log.info("Operating on ID {d}, per -a.", .{id}); runActions(&db, id, &response_buffer, alloc, handle, &args, null); } } for (args.options("-l")) |path| { log.info("Iterating over IDs listed in {s}.", .{path}); var file = std.fs.cwd().openFile(path, .{ .read = true }) catch |err| { log.err("Couldn't open file {s}: {}", .{ path, err }); continue; }; defer file.close(); var reader = file.reader(); var buffer: [128]u8 = undefined; while (try reader.readUntilDelimiterOrEof(&buffer, '\n')) |id_str| { const id = std.fmt.parseInt(u64, id_str, 10) catch { log.err("Image ID must be a positive integer.", .{}); continue; }; log.info("Operating on ID {d}, per -l.", .{id}); runActions(&db, id, &response_buffer, alloc, handle, &args, null); } } const searches = args.options("-s"); if (searches.len > 0) { //var reader = std.io.fixedBufferStream( // @embedFile("/etc/ssl/certs/ca-certificates.crt"), //).reader(); //const trust = try tls.x509.CertificateChain.from_pem(alloc, reader); // catch |a| { // log.err("Something dun fucked with the certs: {}", .{a}); // return; // }; const sort_ascending = args.option("-O"); const sort_descending = args.option("-o"); const sort_order = if (sort_ascending) |_| blk: { if (sort_descending) |_| { log.err("Can't sort up *and* down, dummy.", .{}); return; } else { break :blk "asc"; } } else blk: { if (sort_descending) |_| { break :blk "desc"; } else { break :blk "desc"; } }; const sort_by = sort_descending orelse sort_ascending orelse "id"; var page = if (args.option("-p")) |page| blk: { break :blk std.fmt.parseInt(u64, page, 10) catch { log.err("Page must be a positive integer.", .{}); return; }; } else 1; var maxPages = if (args.option("-P")) |nr| blk: { break :blk std.fmt.parseInt(u64, nr, 10) catch { log.err("Pages maximum must be a positive integer.", .{}); return; }; } else 0; const kkey: []const u8 = key orelse ""; const aaaa: []const u8 = if (key) |_| "&key=" else ""; var fuckme = std.ArrayList(u8).init(alloc); defer fuckme.deinit(); for (searches) |search| { const esearch = try uri.escapeString(alloc, search); var pages: u64 = 0; log.info("Iterating over search \"{s}\", starting on page {d}.", .{ search, page }); fuck: while (true) { pages += 1; if (maxPages > 0 and pages == maxPages) { return; } log.info("Doing page {d}, {d}/{d}.", .{ page, pages, maxPages }); _ = try std.fmt.bufPrintZ( urlbuf[0..], api_base ++ "/search/images?q={s}&page={d}&sd={s}&sf={s}&per_page=50{s}{s}", .{ esearch, page, sort_order, sort_by, aaaa, kkey, }, ); fuckme.clearRetainingCapacity(); try easyFetch(handle, &urlbuf, &fuckme); const val = try json.parse(alloc, fuckme.items); if (val.get(.{"images"})) |aa| { if (unwrap(aa, .Array)) |images| { for (images) |i| { var buffer: [1024 * 20]u8 = undefined; const pid = unwrap(i.get("id") orelse { log.err("Malformed reply from Derpi.", .{}); return; }, .Int) orelse { log.err("Malformed reply from Derpi, but in a different way.", .{}); return; }; const id = if (pid >= 0) @intCast(u64, pid) else { log.err("Malformed reply from Derpi, but in a third way.", .{}); return; }; var aaa = std.io.fixedBufferStream(buffer[0..]).writer(); const data = json.Value{ .Object = &[_]json.Member{ json.Member{ .key = "image", .value = i, }, } }; try data.format("", .{}, aaa); const jason = buffer[0..aaa.context.pos]; runActions( &db, id, &response_buffer, alloc, handle, &args, jason, ); } if (images.len == 50) { page += 1; } else { break :fuck; } } } } page = 1; } } } pub fn unwrap( un: anytype, comptime tag: std.meta.Tag(@TypeOf(un)), ) ?std.meta.TagPayload(@TypeOf(un), tag) { if (un != tag) return null; return @field(un, @tagName(tag)); } fn runActions( db: *sqlite.Db, id: u64, resp: *std.ArrayList(u8), alloc: *std.mem.Allocator, handle: *curl.CURL, args: anytype, meta: ?[]const u8, ) void { if (args.flag("-r")) { registerID(db, id) catch |e| switch (e) { error.GO_ON => {}, error.FATAL => { db.deinit(); std.os.exit(1); }, else => { db.deinit(); std.os.exit(2); }, }; } if (args.flag("-m")) { if (meta) |m| { storeMetadata(db, id, m) catch |e| switch (e) { error.GO_ON => {}, error.FATAL => { db.deinit(); std.os.exit(1); }, else => { db.deinit(); std.os.exit(2); }, }; } else { getMetadata(db, id, resp, handle) catch |e| switch (e) { error.GO_ON => {}, error.FATAL => { db.deinit(); std.os.exit(1); }, else => { db.deinit(); std.os.exit(2); }, }; } } resp.clearRetainingCapacity(); std.mem.set(u8, hash_buf[0..], 0); std.mem.set(u8, hash_buf2[0..], 0); if (args.flag("-d")) { getImage(db, id, resp, alloc, handle) catch |e| switch (e) { error.GO_ON => {}, error.FATAL => { db.deinit(); std.os.exit(1); }, else => { db.deinit(); std.os.exit(2); }, }; } resp.clearRetainingCapacity(); std.mem.set(u8, hash_buf[0..], 0); std.mem.set(u8, hash_buf2[0..], 0); if (args.flag("-e")) { extractImage(db, id, alloc) catch |e| switch (e) { error.GO_ON => {}, else => { db.deinit(); std.os.exit(2); }, }; } } fn registerID(db: *sqlite.Db, id: u64) !void { log.info("Registering ID {d}.", .{id}); const foo = db.one( bool, "SELECT true FROM image WHERE id = ?;", .{}, .{ .id = id }, ) catch { sqliteErrorReport("SQLite error while checking if ID already present", db); return error.GO_ON; }; if (foo) |_| { log.info("ID {d} already registered.", .{id}); return; } try db.exec("BEGIN IMMEDIATE", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch {}; db.exec( \\INSERT OR ROLLBACK \\ INTO image (id) \\ VALUES (?); , .{}, .{ .id = id }) catch { sqliteErrorReport("Couldn't insert ID into database.", db); return error.GO_ON; }; db.exec("COMMIT", .{}, .{}) catch { sqliteErrorReport("FATAL: couldn't commit database", db); return error.FATAL; }; } fn storeMetadata( db: *sqlite.Db, id: u64, metadata: []const u8, ) !void { log.info("Storing metadata for ID {d}.", .{id}); const foobar = db.one( bool, "SELECT true FROM image WHERE id = ? AND metadata IS NOT NULL;", .{}, .{ .id = id }, ) catch { sqliteErrorReport("SQLite error while checking for metadata precence.", db); return error.GO_ON; }; if (foobar) |_| { log.info("Metadata for ID {d} already acquired. Use -u to replace.", .{id}); return; } const valid = std.json.validate(metadata); if (valid) { try db.exec("BEGIN IMMEDIATE;", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch {}; db.exec( \\INSERT OR ROLLBACK \\ INTO \\ image (id, metadata) \\ VALUES (?, ?) \\ ON CONFLICT (id) \\ DO UPDATE \\ SET metadata=excluded.metadata; , .{}, .{ .id = id, .metadata = metadata }) catch { sqliteErrorReport("Couldn't add metadata for ID {d} to database.", db); return error.GO_ON; }; hashit(metadata) catch |err| { log.err("Couldn't hash metadata for ID {d}: {s}", .{ id, err }); return error.GO_ON; }; db.exec( "UPDATE OR ROLLBACK image SET hash_meta = ? WHERE id = ?", .{}, .{ hash_buf2[0..], id }, ) catch { sqliteErrorReport("Couldn't set metadata hash", db); return error.GO_ON; }; db.exec("COMMIT", .{}, .{}) catch { sqliteErrorReport("FATAL: couldn't commit database", db); return error.FATAL; }; } } fn getMetadata( db: *sqlite.Db, id: u64, resp: *std.ArrayList(u8), handle: *curl.CURL, ) !void { log.info("Downloading metadata for ID {d}.", .{id}); const foobar = db.one( bool, "SELECT true FROM image WHERE id = ? AND metadata IS NOT NULL;", .{}, .{ .id = id }, ) catch { sqliteErrorReport("SQLite error while checking for metadata precence.", db); return error.GO_ON; }; if (foobar) |_| { log.info("Metadata for ID {d} already acquired. Use -u to replace.", .{id}); return; } _ = try std.fmt.bufPrintZ( urlbuf[0..], api_base ++ "/images/{d}", .{id}, ); easyFetch(handle, &urlbuf, resp) catch { log.info("Failed to download metadata for ID {d}.", .{id}); return error.FATAL; }; const valid = std.json.validate(resp.items); if (valid) { try db.exec("BEGIN IMMEDIATE;", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch {}; db.exec( \\INSERT OR ROLLBACK \\ INTO \\ image (id, metadata) \\ VALUES (?, ?) \\ ON CONFLICT (id) \\ DO UPDATE \\ SET metadata=excluded.metadata; , .{}, .{ .id = id, .metadata = resp.items }) catch { sqliteErrorReport("Couldn't add metadata for ID {d} to database.", db); return error.GO_ON; }; hashit(resp.items) catch |err| { log.err("Couldn't hash metadata for ID {d}: {s}", .{ id, err }); return error.GO_ON; }; db.exec( "UPDATE OR ROLLBACK image SET hash_meta = ? WHERE id = ?", .{}, .{ hash_buf2[0..], id }, ) catch { sqliteErrorReport("Couldn't set metadata hash", db); return error.GO_ON; }; db.exec("COMMIT", .{}, .{}) catch { sqliteErrorReport("FATAL: couldn't commit database", db); return error.FATAL; }; } else { log.err("Invalid metadata for ID {d}", .{id}); return error.FATAL; } } fn getImage( db: *sqlite.Db, id: u64, resp: *std.ArrayList(u8), alloc: *std.mem.Allocator, handle: *curl.CURL, ) !void { log.info("Downloading image and thumbnail for ID {d}.", .{id}); const foobar = db.oneAlloc( struct { full_url: ?[:0]u8, thumb_url: ?[:0]u8, }, alloc, "SELECT full_url, thumb_url FROM image WHERE id = ?", .{}, .{ .id = id }, ) catch { sqliteErrorReport("SQLite error while getting image URLs", db); return error.GO_ON; }; if (foobar) |res| { if (res.full_url) |url| blk: { defer alloc.free(url); const skipper = db.one(bool, \\SELECT true FROM image \\ WHERE id = ? AND image IS NOT NULL; , .{}, .{id}) catch { sqliteErrorReport("SQLite error while checking if image is already downloaded", db); return error.GO_ON; }; if (skipper) |_| { log.info("Image for ID {d} already downloaded.", .{id}); break :blk; } easyFetch(handle, url, resp) catch { log.info("Failed to download fullsize image for ID {d}", .{id}); return error.FATAL; }; try db.exec("BEGIN IMMEDIATE;", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch {}; db.exec( "UPDATE OR ROLLBACK image SET image = ? WHERE id = ?", .{}, .{ .image = resp.items, .id = id, }, ) catch { sqliteErrorReport("Couldn't add image to DB.", db); return error.GO_ON; }; hashit(resp.items) catch |err| { log.err("Couldn't hash image for ID {d}: {s}", .{ id, err }); return error.GO_ON; }; db.exec( "UPDATE OR ROLLBACK image SET hash_full = ? WHERE id = ?", .{}, .{ hash_buf2[0..], id }, ) catch { sqliteErrorReport("Couldn't set iamge hash", db); return error.GO_ON; }; db.exec("COMMIT", .{}, .{}) catch { sqliteErrorReport("FATAL: couldn't commit database", db); return error.FATAL; }; resp.clearRetainingCapacity(); std.mem.set(u8, hash_buf[0..], 0); std.mem.set(u8, hash_buf2[0..], 0); } if (res.thumb_url) |url| blk: { defer alloc.free(url); const skipper = db.one(bool, \\SELECT true FROM image \\ WHERE id = ? AND thumb IS NOT NULL; , .{}, .{id}) catch { sqliteErrorReport("SQLite error while checking if thumb is already downloaded", db); return error.GO_ON; }; if (skipper) |_| { log.info("Thumb for ID {d} already downloaded.", .{id}); break :blk; } easyFetch(handle, url, resp) catch { log.info("Failed to download thumbnail image for ID {d}", .{id}); return error.GO_ON; }; try db.exec("BEGIN IMMEDIATE;", .{}, .{}); errdefer db.exec("ROLLBACK;", .{}, .{}) catch {}; db.exec( "UPDATE OR ROLLBACK image SET thumb = ? WHERE id = ?", .{}, .{ .thumb = resp.items, .id = id, }, ) catch { sqliteErrorReport("Couldn't add thumb to DB", db); return error.GO_ON; }; hashit(resp.items) catch |err| { log.err("Couldn't hash thumb for ID {d}: {s}", .{ id, err }); return error.GO_ON; }; db.exec( "UPDATE OR ROLLBACK image SET hash_thumb = ? WHERE id = ?", .{}, .{ hash_buf2[0..], id }, ) catch { sqliteErrorReport("Couldn't add thumb hash", db); return error.GO_ON; }; db.exec("COMMIT", .{}, .{}) catch { sqliteErrorReport("FATAL: couldn't commit database", db); return error.FATAL; }; } } else { log.err("No metadata for id {d} available", .{id}); return; } } fn extractImage(db: *sqlite.Db, id: u64, alloc: *std.mem.Allocator) !void { log.info("Extracting image for ID {d}.", .{id}); const foo = db.oneAlloc( struct { image: ?[:0]u8, extension: ?[:0]u8, }, alloc, "SELECT image, extension FROM image WHERE id = ?", .{}, .{ .id = id }, ) catch { sqliteErrorReport("SQLite error while reading image", db); return error.GO_ON; }; defer { if (foo) |f| { if (f.image) |i| { alloc.free(i); } if (f.extension) |e| { alloc.free(e); } } } if (foo) |res| { const bar = comptime @as([]const u8, "unknown"); const baz = if (res.extension) |e| e else bar; var buf = [_]u8{0} ** 64; const buf2 = try std.fmt.bufPrint( buf[0..], "{d:0>10}.{s}", .{ id, baz, }, ); if (res.image) |image| { var dir = try std.fs.cwd().makeOpenPath("images", .{ .access_sub_paths = true, }); var file = try dir.createFile( buf2, .{ .read = false, .truncate = true, }, ); defer file.close(); try file.writeAll(image); log.info("Extracted image for ID {d}.", .{id}); } else { log.info("No image data for ID {d}.", .{id}); } } } fn easyFetch(handle: *curl.CURL, url: [*:0]const u8, resp: *std.ArrayList(u8)) !void { if (fetch_wait > 0) { if (fetch_timer) |*timer| { const cur = timer.read() / (1000 * 1000); if (cur < fetch_wait) { std.time.sleep((fetch_wait - cur) * 1000 * 1000); } timer.reset(); } else { fetch_timer = try std.time.Timer.start(); } } var ret = curl.curl_easy_setopt(handle, curl.CURLOPT_URL, url); if (ret != curl.CURLE_OK) { curlErrorReport("cURL set url:", ret); return error.FUCK; } ret = curl.curl_easy_setopt(handle, curl.CURLOPT_WRITEFUNCTION, writeToArrayListCallback); if (ret != curl.CURLE_OK) { curlErrorReport("cURL set writefunction:", ret); return error.FUCK; } ret = curl.curl_easy_setopt(handle, curl.CURLOPT_WRITEDATA, resp); if (ret != curl.CURLE_OK) { curlErrorReport("cURL set writedata:", ret); return error.FUCK; } ret = curl.curl_easy_setopt(handle, curl.CURLOPT_USERAGENT, "Derpiloader 0.1 (linux)"); if (ret != curl.CURLE_OK) { curlErrorReport("cURL set user agent:", ret); return error.FUCK; } ret = curl.curl_easy_perform(handle); if (ret != curl.CURLE_OK) { curlErrorReport("cURL perform:", ret); return error.FUCK; } log.info("Got {d} bytes", .{resp.items.len}); } fn writeToArrayListCallback( data: *c_void, size: c_uint, nmemb: c_uint, user_data: *c_void, ) callconv(.C) c_uint { var buffer = @intToPtr(*std.ArrayList(u8), @ptrToInt(user_data)); var typed_data = @intToPtr([*]u8, @ptrToInt(data)); buffer.appendSlice(typed_data[0 .. nmemb * size]) catch return 0; return nmemb * size; }