From 1da6cf255754eae763a86b07e96e0f3a7b6e86e3 Mon Sep 17 00:00:00 2001 From: Christopher Snowhill Date: Tue, 22 Feb 2022 23:10:02 -0800 Subject: [PATCH] Deduplicate artwork using hashes Artwork deduplication should be done with hashes, not by full data comparison. This should be a lot faster loading artwork from files now, especially if the playlist already contains a lot of unique artwork. Signed-off-by: Christopher Snowhill --- Cog.xcodeproj/project.pbxproj | 6 ++ Utils/RedundantPlaylistDataStore.h | 2 +- Utils/RedundantPlaylistDataStore.m | 14 +++-- Utils/SHA256Digest.h | 22 +++++++ Utils/SHA256Digest.m | 42 +++++++++++++ Utils/SQLiteStore.h | 2 +- Utils/SQLiteStore.m | 99 +++++++++++++++++++++++++++--- 7 files changed, 172 insertions(+), 15 deletions(-) create mode 100644 Utils/SHA256Digest.h create mode 100644 Utils/SHA256Digest.m diff --git a/Cog.xcodeproj/project.pbxproj b/Cog.xcodeproj/project.pbxproj index b1cce53ec..f59f28ade 100644 --- a/Cog.xcodeproj/project.pbxproj +++ b/Cog.xcodeproj/project.pbxproj @@ -136,6 +136,7 @@ 8370D73F2775AE1300245CE0 /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 8370D73E2775AE1300245CE0 /* libsqlite3.tbd */; }; 8377C66327B8CF6300E8BC0F /* SpectrumView.m in Sources */ = {isa = PBXBuildFile; fileRef = 8377C66127B8CF6300E8BC0F /* SpectrumView.m */; }; 8377C6B927B900F000E8BC0F /* SpectrumItem.m in Sources */ = {isa = PBXBuildFile; fileRef = 8377C6B827B900F000E8BC0F /* SpectrumItem.m */; }; + 8381A09227C5F72F00A1C530 /* SHA256Digest.m in Sources */ = {isa = PBXBuildFile; fileRef = 8381A09127C5F72F00A1C530 /* SHA256Digest.m */; }; 8384914018083E4E00E7332D /* filetype.icns in Resources */ = {isa = PBXBuildFile; fileRef = 8384913D18083E4E00E7332D /* filetype.icns */; }; 8384915918083EAB00E7332D /* infoTemplate.pdf in Resources */ = {isa = PBXBuildFile; fileRef = 8384914318083EAB00E7332D /* infoTemplate.pdf */; }; 8384915A18083EAB00E7332D /* missingArt@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 8384914418083EAB00E7332D /* missingArt@2x.png */; }; @@ -946,6 +947,8 @@ 8377C66427B8CF7A00E8BC0F /* VisualizationController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = VisualizationController.h; path = Audio/Visualization/VisualizationController.h; sourceTree = ""; }; 8377C6B727B900F000E8BC0F /* SpectrumItem.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = SpectrumItem.h; path = Visualization/SpectrumItem.h; sourceTree = ""; }; 8377C6B827B900F000E8BC0F /* SpectrumItem.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; name = SpectrumItem.m; path = Visualization/SpectrumItem.m; sourceTree = ""; }; + 8381A09027C5F72F00A1C530 /* SHA256Digest.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SHA256Digest.h; sourceTree = ""; }; + 8381A09127C5F72F00A1C530 /* SHA256Digest.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SHA256Digest.m; sourceTree = ""; }; 8384912518080F2D00E7332D /* Logging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Logging.h; sourceTree = ""; }; 8384913D18083E4E00E7332D /* filetype.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = filetype.icns; sourceTree = ""; }; 8384914318083EAB00E7332D /* infoTemplate.pdf */ = {isa = PBXFileReference; lastKnownFileType = image.pdf; name = infoTemplate.pdf; path = Images/infoTemplate.pdf; sourceTree = ""; }; @@ -1212,6 +1215,8 @@ 8370D73C277419F700245CE0 /* SQLiteStore.m */, 83988F0C27BE0A5900A0E89A /* RedundantPlaylistDataStore.h */, 83988F0D27BE0A5900A0E89A /* RedundantPlaylistDataStore.m */, + 8381A09027C5F72F00A1C530 /* SHA256Digest.h */, + 8381A09127C5F72F00A1C530 /* SHA256Digest.m */, ); path = Utils; sourceTree = ""; @@ -2524,6 +2529,7 @@ 179D03260E0CB2500064A77A /* PathNode.m in Sources */, 179D03270E0CB2500064A77A /* PathWatcher.m in Sources */, 179D03280E0CB2500064A77A /* SmartFolderNode.m in Sources */, + 8381A09227C5F72F00A1C530 /* SHA256Digest.m in Sources */, 173855FF0E0CC81F00488CD4 /* FileTreeOutlineView.m in Sources */, 07D971E60ED1DAA800E7602E /* TagEditorController.m in Sources */, 17E0D5EA0F520F02005B6FED /* MainWindow.m in Sources */, diff --git a/Utils/RedundantPlaylistDataStore.h b/Utils/RedundantPlaylistDataStore.h index e31b49cfd..2927f7fa5 100644 --- a/Utils/RedundantPlaylistDataStore.h +++ b/Utils/RedundantPlaylistDataStore.h @@ -16,7 +16,7 @@ NS_ASSUME_NONNULL_BEGIN @interface RedundantPlaylistDataStore : NSObject { NSMutableArray *stringStore; - NSMutableArray *artStore; + NSMutableDictionary *artStore; } - (id)init; diff --git a/Utils/RedundantPlaylistDataStore.m b/Utils/RedundantPlaylistDataStore.m index 6273cecd3..f8b96f0ee 100644 --- a/Utils/RedundantPlaylistDataStore.m +++ b/Utils/RedundantPlaylistDataStore.m @@ -10,6 +10,8 @@ #import "RedundantPlaylistDataStore.h" +#import "SHA256Digest.h" + @implementation RedundantPlaylistDataStore - (id)init { @@ -17,7 +19,7 @@ if(self) { stringStore = [[NSMutableArray alloc] init]; - artStore = [[NSMutableArray alloc] init]; + artStore = [[NSMutableDictionary alloc] init]; } return self; @@ -38,12 +40,14 @@ - (NSData *)coalesceArt:(NSData *)in { if(in == nil) return in; - NSUInteger index = [artStore indexOfObject:in]; - if(index == NSNotFound) { - [artStore addObject:in]; + NSString *key = [SHA256Digest digestDataAsString:in]; + + NSData *ret = [artStore objectForKey:key]; + if(ret == nil) { + [artStore setObject:in forKey:key]; return in; } else { - return [artStore objectAtIndex:index]; + return ret; } } diff --git a/Utils/SHA256Digest.h b/Utils/SHA256Digest.h new file mode 100644 index 000000000..00242c0db --- /dev/null +++ b/Utils/SHA256Digest.h @@ -0,0 +1,22 @@ +// +// SHA256Digest.h +// Cog +// +// Created by Christopher Snowhill on 2/22/22. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface SHA256Digest : NSObject + ++ (NSData *)digestBytes:(const void *)bytes length:(size_t)length; ++ (NSData *)digestData:(const NSData *)data; + ++ (NSString *)digestBytesAsString:(const void *)bytes length:(size_t)length; ++ (NSString *)digestDataAsString:(const NSData *)data; + +@end + +NS_ASSUME_NONNULL_END diff --git a/Utils/SHA256Digest.m b/Utils/SHA256Digest.m new file mode 100644 index 000000000..a69627882 --- /dev/null +++ b/Utils/SHA256Digest.m @@ -0,0 +1,42 @@ +// +// SHA256Digest.m +// Cog +// +// Created by Christopher Snowhill on 2/22/22. +// + +#import "SHA256Digest.h" + +#import + +@implementation SHA256Digest + ++ (NSData *)digestBytes:(const void *)bytes length:(size_t)length { + uint8_t result[CC_SHA256_DIGEST_LENGTH]; + CC_SHA256_CTX ctx; + CC_SHA256_Init(&ctx); + CC_SHA256_Update(&ctx, bytes, (CC_LONG)length); + CC_SHA256_Final(&result[0], &ctx); + return [NSData dataWithBytes:&result[0] length:sizeof(result)]; +} + ++ (NSData *)digestData:(const NSData *)data { + return [SHA256Digest digestBytes:[data bytes] length:[data length]]; +} + ++ (NSString *)digestBytesAsString:(const void *)bytes length:(size_t)length { + NSData *hashData = [SHA256Digest digestBytes:bytes length:length]; + length = [hashData length]; + NSMutableString *result = [NSMutableString stringWithCapacity:length * 2]; + const uint8_t *values = (const uint8_t *)[hashData bytes]; + for(size_t i = 0; i < length; ++i) { + [result appendFormat:@"%02x", values[i]]; + } + return [NSString stringWithString:result]; +} + ++ (NSString *)digestDataAsString:(const NSData *)data { + return [SHA256Digest digestBytesAsString:[data bytes] length:[data length]]; +} + +@end diff --git a/Utils/SQLiteStore.h b/Utils/SQLiteStore.h index 281d8427c..2d9eb5fa1 100644 --- a/Utils/SQLiteStore.h +++ b/Utils/SQLiteStore.h @@ -16,7 +16,7 @@ @private sqlite3 *g_database; @private - sqlite3_stmt *stmt[39]; + sqlite3_stmt *stmt[41]; @private NSMutableArray *databaseMirror; NSMutableDictionary *artTable; diff --git a/Utils/SQLiteStore.m b/Utils/SQLiteStore.m index 3a1ad6833..3f2f9aca9 100644 --- a/Utils/SQLiteStore.m +++ b/Utils/SQLiteStore.m @@ -5,9 +5,12 @@ // Created by Christopher Snowhill on 12/22/21. // +#import + #import "SQLiteStore.h" #import "Logging.h" -#import + +#import "SHA256Digest.h" NSString *getDatabasePath(void) { NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, YES); @@ -16,7 +19,7 @@ NSString *getDatabasePath(void) { return [basePath stringByAppendingPathComponent:filename]; } -static int64_t currentSchemaVersion = 2; +static int64_t currentSchemaVersion = 3; NSArray *createSchema(void) { return @[ @@ -27,9 +30,11 @@ NSArray *createSchema(void) { );", @"CREATE TABLE IF NOT EXISTS artdictionary ( \ artid INTEGER PRIMARY KEY AUTOINCREMENT, \ + arthash BLOB NOT NULL, \ referencecount INTEGER, \ value BLOB NOT NULL \ );", + @"CREATE UNIQUE INDEX idx_art_hash ON artdictionary (arthash);", @"CREATE TABLE IF NOT EXISTS knowntracks ( \ trackid INTEGER PRIMARY KEY AUTOINCREMENT, \ referencecount INTEGER, \ @@ -87,12 +92,14 @@ enum { stmt_remove_string, stmt_select_art, + stmt_select_art_all, stmt_select_art_refcount, stmt_select_art_value, stmt_bump_art, stmt_pop_art, stmt_add_art, stmt_remove_art, + stmt_add_art_renamed, stmt_select_track, stmt_select_track_refcount, @@ -180,13 +187,21 @@ enum { const char *query_remove_string = "DELETE FROM stringdictionary WHERE (stringid = ?)"; enum { - select_art_in_value = 1, + select_art_in_arthash = 1, select_art_out_art_id = 0, select_art_out_reference_count, }; -const char *query_select_art = "SELECT artid, referencecount FROM artdictionary WHERE (value = ?) LIMIT 1"; +const char *query_select_art = "SELECT artid, referencecount FROM artdictionary WHERE (arthash = ?) LIMIT 1"; + +enum { + select_art_all_out_id = 0, + select_art_all_out_referencecount, + select_art_all_out_value, +}; + +const char *query_select_art_all = "SELECT artid, referencecount, value FROM artdictionary"; enum { select_art_refcount_in_id = 1, @@ -217,10 +232,11 @@ enum { const char *query_pop_art = "UPDATE artdictionary SET referencecount = referencecount - 1 WHERE (artid = ?) LIMIT 1"; enum { - add_art_in_value = 1, + add_art_in_hash = 1, + add_art_in_value, }; -const char *query_add_art = "INSERT INTO artdictionary (referencecount, value) VALUES (1, ?)"; +const char *query_add_art = "INSERT INTO artdictionary (referencecount, arthash, value) VALUES (1, ?, ?)"; enum { remove_art_in_id = 1, @@ -228,6 +244,15 @@ enum { const char *query_remove_art = "DELETE FROM artdictionary WHERE (artid = ?)"; +enum { + add_art_renamed_in_id = 1, + add_art_renamed_in_referencecount, + add_art_renamed_in_hash, + add_art_renamed_in_value, +}; + +const char *query_add_art_renamed = "INSERT INTO artdictionary_v2 (artid, referencecount, arthash, value) VALUES (?, ?, ?, ?)"; + enum { select_track_in_id = 1, @@ -607,6 +632,62 @@ static SQLiteStore *g_sharedStore = NULL; } break; + case 2: + // Schema 2 to 3: Add arthash blob field to the artdictionary table, requires transmutation + { + if(sqlite3_exec(g_database, "CREATE TABLE IF NOT EXISTS artdictionary_v2 ( " + " artid INTEGER PRIMARY KEY AUTOINCREMENT, " + " arthash BLOB NOT NULL, " + " referencecount INTEGER, " + " value BLOB NOT NULL); " + "CREATE UNIQUE INDEX idx_art_hash ON artdictionary_v2 (arthash);", + NULL, NULL, &error)) { + DLog(@"SQLite error: %s", error); + return nil; + } + + if(PREPARE(select_art_all) || + PREPARE(add_art_renamed)) + return nil; + + // Add the art hashes to the table + st = stmt[stmt_select_art_all]; + sqlite3_stmt *sta = stmt[stmt_add_art_renamed]; + + if(sqlite3_reset(st)) + return nil; + + while(sqlite3_step(st) == SQLITE_ROW) { + int64_t artId = sqlite3_column_int64(st, select_art_all_out_id); + int64_t referenceCount = sqlite3_column_int64(st, select_art_all_out_referencecount); + const void *artBytes = sqlite3_column_blob(st, select_art_all_out_value); + size_t artLength = sqlite3_column_bytes(st, select_art_all_out_value); + NSData *hash = [SHA256Digest digestBytes:artBytes length:artLength]; + if(sqlite3_reset(sta) || + sqlite3_bind_int64(sta, add_art_renamed_in_id, artId) || + sqlite3_bind_int64(sta, add_art_renamed_in_referencecount, referenceCount) || + sqlite3_bind_blob64(sta, add_art_renamed_in_hash, [hash bytes], [hash length], SQLITE_STATIC) || + sqlite3_bind_blob64(sta, add_art_renamed_in_value, artBytes, artLength, SQLITE_STATIC) || + sqlite3_step(sta) != SQLITE_DONE) + return nil; + } + + sqlite3_reset(sta); + + sqlite3_finalize(sta); + sqlite3_finalize(st); + + stmt[stmt_select_art_all] = NULL; + stmt[stmt_add_art_renamed] = NULL; + + if(sqlite3_exec(g_database, "PRAGMA foreign_keys=off; BEGIN TRANSACTION; DROP TABLE artdictionary; ALTER TABLE artdictionary_v2 RENAME TO artdictionary; COMMIT; PRAGMA foreign_keys=on;", NULL, NULL, &error)) { + DLog(@"SQLite error: %s", error); + return nil; + } + } + + break; + default: break; } @@ -664,7 +745,6 @@ static SQLiteStore *g_sharedStore = NULL; return nil; } #undef PREPARE - size_t count = [self playlistGetCount]; databaseMirror = [[NSMutableArray alloc] init]; @@ -847,10 +927,12 @@ static SQLiteStore *g_sharedStore = NULL; return -1; } + NSData *digest = [SHA256Digest digestData:*art]; + sqlite3_stmt *st = stmt[stmt_select_art]; if(sqlite3_reset(st) || - sqlite3_bind_blob64(st, select_art_in_value, [*art bytes], [*art length], SQLITE_STATIC)) { + sqlite3_bind_blob64(st, select_art_in_arthash, [digest bytes], [digest length], SQLITE_STATIC)) { return -1; } @@ -873,6 +955,7 @@ static SQLiteStore *g_sharedStore = NULL; st = stmt[stmt_add_art]; if(sqlite3_reset(st) || + sqlite3_bind_blob64(st, add_art_in_hash, [digest bytes], [digest length], SQLITE_STATIC) || sqlite3_bind_blob64(st, add_art_in_value, [*art bytes], [*art length], SQLITE_STATIC) || sqlite3_step(st) != SQLITE_DONE || sqlite3_reset(st)) {