Deduplicate artwork using hashes

Artwork deduplication should be done with hashes, not by full data
comparison. This should be a lot faster loading artwork from files now,
especially if the playlist already contains a lot of unique artwork.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
CQTexperiment
Christopher Snowhill 2022-02-22 23:10:02 -08:00
parent 9a2ac6ae5a
commit 1da6cf2557
7 changed files with 172 additions and 15 deletions

View File

@ -136,6 +136,7 @@
8370D73F2775AE1300245CE0 /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 8370D73E2775AE1300245CE0 /* libsqlite3.tbd */; };
8377C66327B8CF6300E8BC0F /* SpectrumView.m in Sources */ = {isa = PBXBuildFile; fileRef = 8377C66127B8CF6300E8BC0F /* SpectrumView.m */; };
8377C6B927B900F000E8BC0F /* SpectrumItem.m in Sources */ = {isa = PBXBuildFile; fileRef = 8377C6B827B900F000E8BC0F /* SpectrumItem.m */; };
8381A09227C5F72F00A1C530 /* SHA256Digest.m in Sources */ = {isa = PBXBuildFile; fileRef = 8381A09127C5F72F00A1C530 /* SHA256Digest.m */; };
8384914018083E4E00E7332D /* filetype.icns in Resources */ = {isa = PBXBuildFile; fileRef = 8384913D18083E4E00E7332D /* filetype.icns */; };
8384915918083EAB00E7332D /* infoTemplate.pdf in Resources */ = {isa = PBXBuildFile; fileRef = 8384914318083EAB00E7332D /* infoTemplate.pdf */; };
8384915A18083EAB00E7332D /* missingArt@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 8384914418083EAB00E7332D /* missingArt@2x.png */; };
@ -946,6 +947,8 @@
8377C66427B8CF7A00E8BC0F /* VisualizationController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = VisualizationController.h; path = Audio/Visualization/VisualizationController.h; sourceTree = "<group>"; };
8377C6B727B900F000E8BC0F /* SpectrumItem.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = SpectrumItem.h; path = Visualization/SpectrumItem.h; sourceTree = "<group>"; };
8377C6B827B900F000E8BC0F /* SpectrumItem.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; name = SpectrumItem.m; path = Visualization/SpectrumItem.m; sourceTree = "<group>"; };
8381A09027C5F72F00A1C530 /* SHA256Digest.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SHA256Digest.h; sourceTree = "<group>"; };
8381A09127C5F72F00A1C530 /* SHA256Digest.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SHA256Digest.m; sourceTree = "<group>"; };
8384912518080F2D00E7332D /* Logging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Logging.h; sourceTree = "<group>"; };
8384913D18083E4E00E7332D /* filetype.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = filetype.icns; sourceTree = "<group>"; };
8384914318083EAB00E7332D /* infoTemplate.pdf */ = {isa = PBXFileReference; lastKnownFileType = image.pdf; name = infoTemplate.pdf; path = Images/infoTemplate.pdf; sourceTree = "<group>"; };
@ -1212,6 +1215,8 @@
8370D73C277419F700245CE0 /* SQLiteStore.m */,
83988F0C27BE0A5900A0E89A /* RedundantPlaylistDataStore.h */,
83988F0D27BE0A5900A0E89A /* RedundantPlaylistDataStore.m */,
8381A09027C5F72F00A1C530 /* SHA256Digest.h */,
8381A09127C5F72F00A1C530 /* SHA256Digest.m */,
);
path = Utils;
sourceTree = "<group>";
@ -2524,6 +2529,7 @@
179D03260E0CB2500064A77A /* PathNode.m in Sources */,
179D03270E0CB2500064A77A /* PathWatcher.m in Sources */,
179D03280E0CB2500064A77A /* SmartFolderNode.m in Sources */,
8381A09227C5F72F00A1C530 /* SHA256Digest.m in Sources */,
173855FF0E0CC81F00488CD4 /* FileTreeOutlineView.m in Sources */,
07D971E60ED1DAA800E7602E /* TagEditorController.m in Sources */,
17E0D5EA0F520F02005B6FED /* MainWindow.m in Sources */,

View File

@ -16,7 +16,7 @@ NS_ASSUME_NONNULL_BEGIN
@interface RedundantPlaylistDataStore : NSObject {
NSMutableArray *stringStore;
NSMutableArray *artStore;
NSMutableDictionary *artStore;
}
- (id)init;

View File

@ -10,6 +10,8 @@
#import "RedundantPlaylistDataStore.h"
#import "SHA256Digest.h"
@implementation RedundantPlaylistDataStore
- (id)init {
@ -17,7 +19,7 @@
if(self) {
stringStore = [[NSMutableArray alloc] init];
artStore = [[NSMutableArray alloc] init];
artStore = [[NSMutableDictionary alloc] init];
}
return self;
@ -38,12 +40,14 @@
- (NSData *)coalesceArt:(NSData *)in {
if(in == nil) return in;
NSUInteger index = [artStore indexOfObject:in];
if(index == NSNotFound) {
[artStore addObject:in];
NSString *key = [SHA256Digest digestDataAsString:in];
NSData *ret = [artStore objectForKey:key];
if(ret == nil) {
[artStore setObject:in forKey:key];
return in;
} else {
return [artStore objectAtIndex:index];
return ret;
}
}

22
Utils/SHA256Digest.h Normal file
View File

@ -0,0 +1,22 @@
//
// SHA256Digest.h
// Cog
//
// Created by Christopher Snowhill on 2/22/22.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@interface SHA256Digest : NSObject
+ (NSData *)digestBytes:(const void *)bytes length:(size_t)length;
+ (NSData *)digestData:(const NSData *)data;
+ (NSString *)digestBytesAsString:(const void *)bytes length:(size_t)length;
+ (NSString *)digestDataAsString:(const NSData *)data;
@end
NS_ASSUME_NONNULL_END

42
Utils/SHA256Digest.m Normal file
View File

@ -0,0 +1,42 @@
//
// SHA256Digest.m
// Cog
//
// Created by Christopher Snowhill on 2/22/22.
//
#import "SHA256Digest.h"
#import <CommonCrypto/CommonDigest.h>
@implementation SHA256Digest
+ (NSData *)digestBytes:(const void *)bytes length:(size_t)length {
uint8_t result[CC_SHA256_DIGEST_LENGTH];
CC_SHA256_CTX ctx;
CC_SHA256_Init(&ctx);
CC_SHA256_Update(&ctx, bytes, (CC_LONG)length);
CC_SHA256_Final(&result[0], &ctx);
return [NSData dataWithBytes:&result[0] length:sizeof(result)];
}
+ (NSData *)digestData:(const NSData *)data {
return [SHA256Digest digestBytes:[data bytes] length:[data length]];
}
+ (NSString *)digestBytesAsString:(const void *)bytes length:(size_t)length {
NSData *hashData = [SHA256Digest digestBytes:bytes length:length];
length = [hashData length];
NSMutableString *result = [NSMutableString stringWithCapacity:length * 2];
const uint8_t *values = (const uint8_t *)[hashData bytes];
for(size_t i = 0; i < length; ++i) {
[result appendFormat:@"%02x", values[i]];
}
return [NSString stringWithString:result];
}
+ (NSString *)digestDataAsString:(const NSData *)data {
return [SHA256Digest digestBytesAsString:[data bytes] length:[data length]];
}
@end

View File

@ -16,7 +16,7 @@
@private
sqlite3 *g_database;
@private
sqlite3_stmt *stmt[39];
sqlite3_stmt *stmt[41];
@private
NSMutableArray *databaseMirror;
NSMutableDictionary *artTable;

View File

@ -5,9 +5,12 @@
// Created by Christopher Snowhill on 12/22/21.
//
#import <Foundation/Foundation.h>
#import "SQLiteStore.h"
#import "Logging.h"
#import <Foundation/Foundation.h>
#import "SHA256Digest.h"
NSString *getDatabasePath(void) {
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, YES);
@ -16,7 +19,7 @@ NSString *getDatabasePath(void) {
return [basePath stringByAppendingPathComponent:filename];
}
static int64_t currentSchemaVersion = 2;
static int64_t currentSchemaVersion = 3;
NSArray *createSchema(void) {
return @[
@ -27,9 +30,11 @@ NSArray *createSchema(void) {
);",
@"CREATE TABLE IF NOT EXISTS artdictionary ( \
artid INTEGER PRIMARY KEY AUTOINCREMENT, \
arthash BLOB NOT NULL, \
referencecount INTEGER, \
value BLOB NOT NULL \
);",
@"CREATE UNIQUE INDEX idx_art_hash ON artdictionary (arthash);",
@"CREATE TABLE IF NOT EXISTS knowntracks ( \
trackid INTEGER PRIMARY KEY AUTOINCREMENT, \
referencecount INTEGER, \
@ -87,12 +92,14 @@ enum {
stmt_remove_string,
stmt_select_art,
stmt_select_art_all,
stmt_select_art_refcount,
stmt_select_art_value,
stmt_bump_art,
stmt_pop_art,
stmt_add_art,
stmt_remove_art,
stmt_add_art_renamed,
stmt_select_track,
stmt_select_track_refcount,
@ -180,13 +187,21 @@ enum {
const char *query_remove_string = "DELETE FROM stringdictionary WHERE (stringid = ?)";
enum {
select_art_in_value = 1,
select_art_in_arthash = 1,
select_art_out_art_id = 0,
select_art_out_reference_count,
};
const char *query_select_art = "SELECT artid, referencecount FROM artdictionary WHERE (value = ?) LIMIT 1";
const char *query_select_art = "SELECT artid, referencecount FROM artdictionary WHERE (arthash = ?) LIMIT 1";
enum {
select_art_all_out_id = 0,
select_art_all_out_referencecount,
select_art_all_out_value,
};
const char *query_select_art_all = "SELECT artid, referencecount, value FROM artdictionary";
enum {
select_art_refcount_in_id = 1,
@ -217,10 +232,11 @@ enum {
const char *query_pop_art = "UPDATE artdictionary SET referencecount = referencecount - 1 WHERE (artid = ?) LIMIT 1";
enum {
add_art_in_value = 1,
add_art_in_hash = 1,
add_art_in_value,
};
const char *query_add_art = "INSERT INTO artdictionary (referencecount, value) VALUES (1, ?)";
const char *query_add_art = "INSERT INTO artdictionary (referencecount, arthash, value) VALUES (1, ?, ?)";
enum {
remove_art_in_id = 1,
@ -228,6 +244,15 @@ enum {
const char *query_remove_art = "DELETE FROM artdictionary WHERE (artid = ?)";
enum {
add_art_renamed_in_id = 1,
add_art_renamed_in_referencecount,
add_art_renamed_in_hash,
add_art_renamed_in_value,
};
const char *query_add_art_renamed = "INSERT INTO artdictionary_v2 (artid, referencecount, arthash, value) VALUES (?, ?, ?, ?)";
enum {
select_track_in_id = 1,
@ -607,6 +632,62 @@ static SQLiteStore *g_sharedStore = NULL;
}
break;
case 2:
// Schema 2 to 3: Add arthash blob field to the artdictionary table, requires transmutation
{
if(sqlite3_exec(g_database, "CREATE TABLE IF NOT EXISTS artdictionary_v2 ( "
" artid INTEGER PRIMARY KEY AUTOINCREMENT, "
" arthash BLOB NOT NULL, "
" referencecount INTEGER, "
" value BLOB NOT NULL); "
"CREATE UNIQUE INDEX idx_art_hash ON artdictionary_v2 (arthash);",
NULL, NULL, &error)) {
DLog(@"SQLite error: %s", error);
return nil;
}
if(PREPARE(select_art_all) ||
PREPARE(add_art_renamed))
return nil;
// Add the art hashes to the table
st = stmt[stmt_select_art_all];
sqlite3_stmt *sta = stmt[stmt_add_art_renamed];
if(sqlite3_reset(st))
return nil;
while(sqlite3_step(st) == SQLITE_ROW) {
int64_t artId = sqlite3_column_int64(st, select_art_all_out_id);
int64_t referenceCount = sqlite3_column_int64(st, select_art_all_out_referencecount);
const void *artBytes = sqlite3_column_blob(st, select_art_all_out_value);
size_t artLength = sqlite3_column_bytes(st, select_art_all_out_value);
NSData *hash = [SHA256Digest digestBytes:artBytes length:artLength];
if(sqlite3_reset(sta) ||
sqlite3_bind_int64(sta, add_art_renamed_in_id, artId) ||
sqlite3_bind_int64(sta, add_art_renamed_in_referencecount, referenceCount) ||
sqlite3_bind_blob64(sta, add_art_renamed_in_hash, [hash bytes], [hash length], SQLITE_STATIC) ||
sqlite3_bind_blob64(sta, add_art_renamed_in_value, artBytes, artLength, SQLITE_STATIC) ||
sqlite3_step(sta) != SQLITE_DONE)
return nil;
}
sqlite3_reset(sta);
sqlite3_finalize(sta);
sqlite3_finalize(st);
stmt[stmt_select_art_all] = NULL;
stmt[stmt_add_art_renamed] = NULL;
if(sqlite3_exec(g_database, "PRAGMA foreign_keys=off; BEGIN TRANSACTION; DROP TABLE artdictionary; ALTER TABLE artdictionary_v2 RENAME TO artdictionary; COMMIT; PRAGMA foreign_keys=on;", NULL, NULL, &error)) {
DLog(@"SQLite error: %s", error);
return nil;
}
}
break;
default:
break;
}
@ -664,7 +745,6 @@ static SQLiteStore *g_sharedStore = NULL;
return nil;
}
#undef PREPARE
size_t count = [self playlistGetCount];
databaseMirror = [[NSMutableArray alloc] init];
@ -847,10 +927,12 @@ static SQLiteStore *g_sharedStore = NULL;
return -1;
}
NSData *digest = [SHA256Digest digestData:*art];
sqlite3_stmt *st = stmt[stmt_select_art];
if(sqlite3_reset(st) ||
sqlite3_bind_blob64(st, select_art_in_value, [*art bytes], [*art length], SQLITE_STATIC)) {
sqlite3_bind_blob64(st, select_art_in_arthash, [digest bytes], [digest length], SQLITE_STATIC)) {
return -1;
}
@ -873,6 +955,7 @@ static SQLiteStore *g_sharedStore = NULL;
st = stmt[stmt_add_art];
if(sqlite3_reset(st) ||
sqlite3_bind_blob64(st, add_art_in_hash, [digest bytes], [digest length], SQLITE_STATIC) ||
sqlite3_bind_blob64(st, add_art_in_value, [*art bytes], [*art length], SQLITE_STATIC) ||
sqlite3_step(st) != SQLITE_DONE ||
sqlite3_reset(st)) {