Fighting mp3 tag charset problem:

* id3v1 tags are now displayed in default windows charset rather than always being latin1
* only reading required bits from encoding byte (cures some faulty tag problem)
* faulty utf16 headers without endianness will now display correctly as well
CQTexperiment
o1 2015-01-15 09:02:24 +03:00
parent b3072709e4
commit 3c4cecd1e4
5 changed files with 108 additions and 7 deletions

View File

@ -174,6 +174,7 @@
32AE5AFF14E70ED700420CA0 /* taglib_config.h in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A5914E70ED600420CA0 /* taglib_config.h */; settings = {ATTRIBUTES = (Public, ); }; };
32AE5B0014E70F4700420CA0 /* tlist.tcc in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A4614E70ED600420CA0 /* tlist.tcc */; settings = {ATTRIBUTES = (Public, ); }; };
32AE5B0114E70F4A00420CA0 /* tmap.tcc in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A4814E70ED600420CA0 /* tmap.tcc */; settings = {ATTRIBUTES = (Public, ); }; };
4872B8881A675CCB00674347 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 4872B8871A675CCB00674347 /* libiconv.dylib */; };
83790D241809E8CA0073CF51 /* opusfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83790D201809E8CA0073CF51 /* opusfile.cpp */; };
83790D251809E8CA0073CF51 /* opusfile.h in Headers */ = {isa = PBXBuildFile; fileRef = 83790D211809E8CA0073CF51 /* opusfile.h */; };
83790D261809E8CA0073CF51 /* opusproperties.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83790D221809E8CA0073CF51 /* opusproperties.cpp */; };
@ -350,6 +351,7 @@
32AE5A5714E70ED600420CA0 /* wavpackproperties.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wavpackproperties.cpp; sourceTree = "<group>"; };
32AE5A5814E70ED600420CA0 /* wavpackproperties.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wavpackproperties.h; sourceTree = "<group>"; };
32AE5A5914E70ED600420CA0 /* taglib_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = taglib_config.h; path = taglib/taglib_config.h; sourceTree = "<group>"; };
4872B8871A675CCB00674347 /* libiconv.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libiconv.dylib; path = usr/lib/libiconv.dylib; sourceTree = SDKROOT; };
83790D201809E8CA0073CF51 /* opusfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = opusfile.cpp; sourceTree = "<group>"; };
83790D211809E8CA0073CF51 /* opusfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = opusfile.h; sourceTree = "<group>"; };
83790D221809E8CA0073CF51 /* opusproperties.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = opusproperties.cpp; sourceTree = "<group>"; };
@ -363,6 +365,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
4872B8881A675CCB00674347 /* libiconv.dylib in Frameworks */,
174C7A370F4FD43100E18B0F /* libz.dylib in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -381,6 +384,7 @@
0867D691FE84028FC02AAC07 /* TagLib */ = {
isa = PBXGroup;
children = (
4872B8871A675CCB00674347 /* libiconv.dylib */,
8E75700C09F318D70080F1EE /* Source */,
089C1665FE841158C02AAC07 /* Resources */,
0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */,

View File

@ -105,7 +105,7 @@ void TextIdentificationFrame::parseFields(const ByteVector &data)
// read the string data type (the first byte of the field data)
d->textEncoding = String::Type(data[0]);
d->textEncoding = String::Type(data[0]&3);
// split the byte array into chunks based on the string type (two byte delimiter
// for unicode encodings)

View File

@ -32,7 +32,12 @@
#include <string.h>
#include <math.h>
#include <iconv.h>
namespace TagLib {
char default_ascii_encoding[] = "latin1";
char ascii_encoding[256] = "";
inline unsigned short byteSwap(unsigned short x)
{
@ -161,17 +166,61 @@ String::String(char c, Type t)
prepare(t);
}
String::String(const ByteVector &v, Type t)
{
d = new StringPrivate;
if(v.isEmpty())
return;
if(t == Latin1 || t == UTF8) {
int length = 0;
d->data.resize(v.size());
iconv_t encoder = (iconv_t)-1;
if ( t == Latin1 ) {
encoder = iconv_open("utf-8",(*ascii_encoding)?ascii_encoding:default_ascii_encoding);
if ( encoder == (iconv_t)-1 )
encoder = iconv_open("utf-8",default_ascii_encoding);
}
if ( t == Latin1 && encoder != (iconv_t)-1 ) {
size_t srclen = v.size();
char *src = new char[srclen+1];
size_t dstlen = v.size()*6;
char *dst = new char[dstlen+1];
int n=0;
char *src_param = src;
char *dst_param = dst;
size_t src_remaining = srclen;
size_t dst_remaining = dstlen;
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it)
src[n++] = *it;
src[n++] = 0;
iconv(encoder, &src_param, &src_remaining, &dst_param, &dst_remaining);
t = UTF8;
int length = 0;
d->data.resize(dstlen);
wstring::iterator targetIt = d->data.begin();
for ( int i=0; i<dstlen-dst_remaining; i++ ) {
*targetIt = dst[i];
++targetIt;
++length;
}
d->data.resize(length);
delete[] src;
delete[] dst;
iconv_close(encoder);
t = UTF8;
} else if ( t == UTF8 || ( t == Latin1 && encoder == (iconv_t)-1) ) { // UTF8 string or encoder failed to start
int length = 0;
d->data.resize(v.size()*2);
wstring::iterator targetIt = d->data.begin();
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
*targetIt = uchar(*it);
@ -183,7 +232,19 @@ String::String(const ByteVector &v, Type t)
else {
d->data.resize(v.size() / 2);
wstring::iterator targetIt = d->data.begin();
// Cure some faulty UTF16 headers without endianness: insert endianness byte into the beginning of the dst string.
if ( v.size() > 1 ) {
wchar w = combine(v.data()[0], v.data()[1]);
if ( w != 0xfeff && w != 0xfffe ) {
d->data.resize(v.size()/2 + 1);
targetIt = d->data.begin();
*targetIt = 0xfffe;
++targetIt;
// String append will continue in the loop below.
}
}
for(ByteVector::ConstIterator it = v.begin();
it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
it += 2)

View File

@ -55,6 +55,7 @@
#define TStringToQString(s) QString::fromUtf8(s.toCString(true))
namespace TagLib {
extern char ascii_encoding[];
//! A \e wide string class suitable for unicode.

View File

@ -24,6 +24,41 @@
NSMutableDictionary *dict = [[NSMutableDictionary alloc] init];
if ( !*TagLib::ascii_encoding ) {
NSStringEncoding enc = [NSString defaultCStringEncoding];
CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(enc);
NSString *ref = (NSString *)CFStringConvertEncodingToIANACharSetName(cfenc);
UInt32 cp = CFStringConvertEncodingToWindowsCodepage(cfenc);
// Most tags are using windows codepage, so remap OS X codepage to Windows one.
static struct {
UInt32 from, to;
} codepage_remaps[] = {
{ 10001, 932 }, // Japanese Shift-JIS
{ 10002, 950 }, // Traditional Chinese
{ 10003, 949 }, // Korean
{ 10004, 1256 }, // Arabic
{ 10005, 1255 }, // Hebrew
{ 10006, 1253 }, // Greek
{ 10007, 1251 }, // Cyrillic
{ 10008, 936 }, // Simplified Chinese
{ 10029, 1250 }, // Central European (latin2)
};
int i;
int max = sizeof(codepage_remaps)/sizeof(codepage_remaps[0]);
for ( i=0; i<max; i++ )
if ( codepage_remaps[i].from == cp )
break;
if ( i < max )
sprintf(TagLib::ascii_encoding, "windows-%d", codepage_remaps[i].to);
else
strcpy(TagLib::ascii_encoding, [ref UTF8String]);
}
TagLib::FileRef f((const char *)[[url path] UTF8String], false);
if (!f.isNull())
{