Fighting mp3 tag charset problem:
* id3v1 tags are now displayed in default windows charset rather than always being latin1 * only reading required bits from encoding byte (cures some faulty tag problem) * faulty utf16 headers without endianness will now display correctly as wellCQTexperiment
parent
b3072709e4
commit
3c4cecd1e4
|
@ -174,6 +174,7 @@
|
|||
32AE5AFF14E70ED700420CA0 /* taglib_config.h in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A5914E70ED600420CA0 /* taglib_config.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
32AE5B0014E70F4700420CA0 /* tlist.tcc in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A4614E70ED600420CA0 /* tlist.tcc */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
32AE5B0114E70F4A00420CA0 /* tmap.tcc in Headers */ = {isa = PBXBuildFile; fileRef = 32AE5A4814E70ED600420CA0 /* tmap.tcc */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
4872B8881A675CCB00674347 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 4872B8871A675CCB00674347 /* libiconv.dylib */; };
|
||||
83790D241809E8CA0073CF51 /* opusfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83790D201809E8CA0073CF51 /* opusfile.cpp */; };
|
||||
83790D251809E8CA0073CF51 /* opusfile.h in Headers */ = {isa = PBXBuildFile; fileRef = 83790D211809E8CA0073CF51 /* opusfile.h */; };
|
||||
83790D261809E8CA0073CF51 /* opusproperties.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83790D221809E8CA0073CF51 /* opusproperties.cpp */; };
|
||||
|
@ -350,6 +351,7 @@
|
|||
32AE5A5714E70ED600420CA0 /* wavpackproperties.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wavpackproperties.cpp; sourceTree = "<group>"; };
|
||||
32AE5A5814E70ED600420CA0 /* wavpackproperties.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wavpackproperties.h; sourceTree = "<group>"; };
|
||||
32AE5A5914E70ED600420CA0 /* taglib_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = taglib_config.h; path = taglib/taglib_config.h; sourceTree = "<group>"; };
|
||||
4872B8871A675CCB00674347 /* libiconv.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libiconv.dylib; path = usr/lib/libiconv.dylib; sourceTree = SDKROOT; };
|
||||
83790D201809E8CA0073CF51 /* opusfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = opusfile.cpp; sourceTree = "<group>"; };
|
||||
83790D211809E8CA0073CF51 /* opusfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = opusfile.h; sourceTree = "<group>"; };
|
||||
83790D221809E8CA0073CF51 /* opusproperties.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = opusproperties.cpp; sourceTree = "<group>"; };
|
||||
|
@ -363,6 +365,7 @@
|
|||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
4872B8881A675CCB00674347 /* libiconv.dylib in Frameworks */,
|
||||
174C7A370F4FD43100E18B0F /* libz.dylib in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
|
@ -381,6 +384,7 @@
|
|||
0867D691FE84028FC02AAC07 /* TagLib */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
4872B8871A675CCB00674347 /* libiconv.dylib */,
|
||||
8E75700C09F318D70080F1EE /* Source */,
|
||||
089C1665FE841158C02AAC07 /* Resources */,
|
||||
0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */,
|
||||
|
|
|
@ -105,7 +105,7 @@ void TextIdentificationFrame::parseFields(const ByteVector &data)
|
|||
|
||||
// read the string data type (the first byte of the field data)
|
||||
|
||||
d->textEncoding = String::Type(data[0]);
|
||||
d->textEncoding = String::Type(data[0]&3);
|
||||
|
||||
// split the byte array into chunks based on the string type (two byte delimiter
|
||||
// for unicode encodings)
|
||||
|
|
|
@ -32,7 +32,12 @@
|
|||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
|
||||
namespace TagLib {
|
||||
char default_ascii_encoding[] = "latin1";
|
||||
char ascii_encoding[256] = "";
|
||||
|
||||
inline unsigned short byteSwap(unsigned short x)
|
||||
{
|
||||
|
@ -161,6 +166,7 @@ String::String(char c, Type t)
|
|||
prepare(t);
|
||||
}
|
||||
|
||||
|
||||
String::String(const ByteVector &v, Type t)
|
||||
{
|
||||
d = new StringPrivate;
|
||||
|
@ -168,10 +174,53 @@ String::String(const ByteVector &v, Type t)
|
|||
if(v.isEmpty())
|
||||
return;
|
||||
|
||||
if(t == Latin1 || t == UTF8) {
|
||||
iconv_t encoder = (iconv_t)-1;
|
||||
if ( t == Latin1 ) {
|
||||
encoder = iconv_open("utf-8",(*ascii_encoding)?ascii_encoding:default_ascii_encoding);
|
||||
if ( encoder == (iconv_t)-1 )
|
||||
encoder = iconv_open("utf-8",default_ascii_encoding);
|
||||
}
|
||||
|
||||
if ( t == Latin1 && encoder != (iconv_t)-1 ) {
|
||||
size_t srclen = v.size();
|
||||
char *src = new char[srclen+1];
|
||||
size_t dstlen = v.size()*6;
|
||||
char *dst = new char[dstlen+1];
|
||||
int n=0;
|
||||
|
||||
char *src_param = src;
|
||||
char *dst_param = dst;
|
||||
size_t src_remaining = srclen;
|
||||
size_t dst_remaining = dstlen;
|
||||
|
||||
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it)
|
||||
src[n++] = *it;
|
||||
src[n++] = 0;
|
||||
|
||||
iconv(encoder, &src_param, &src_remaining, &dst_param, &dst_remaining);
|
||||
t = UTF8;
|
||||
|
||||
int length = 0;
|
||||
d->data.resize(v.size());
|
||||
|
||||
d->data.resize(dstlen);
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
for ( int i=0; i<dstlen-dst_remaining; i++ ) {
|
||||
*targetIt = dst[i];
|
||||
++targetIt;
|
||||
++length;
|
||||
}
|
||||
|
||||
d->data.resize(length);
|
||||
|
||||
delete[] src;
|
||||
delete[] dst;
|
||||
|
||||
iconv_close(encoder);
|
||||
|
||||
t = UTF8;
|
||||
} else if ( t == UTF8 || ( t == Latin1 && encoder == (iconv_t)-1) ) { // UTF8 string or encoder failed to start
|
||||
int length = 0;
|
||||
d->data.resize(v.size()*2);
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
|
||||
*targetIt = uchar(*it);
|
||||
|
@ -184,6 +233,18 @@ String::String(const ByteVector &v, Type t)
|
|||
d->data.resize(v.size() / 2);
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
|
||||
// Cure some faulty UTF16 headers without endianness: insert endianness byte into the beginning of the dst string.
|
||||
if ( v.size() > 1 ) {
|
||||
wchar w = combine(v.data()[0], v.data()[1]);
|
||||
if ( w != 0xfeff && w != 0xfffe ) {
|
||||
d->data.resize(v.size()/2 + 1);
|
||||
targetIt = d->data.begin();
|
||||
*targetIt = 0xfffe;
|
||||
++targetIt;
|
||||
// String append will continue in the loop below.
|
||||
}
|
||||
}
|
||||
|
||||
for(ByteVector::ConstIterator it = v.begin();
|
||||
it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
|
||||
it += 2)
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#define TStringToQString(s) QString::fromUtf8(s.toCString(true))
|
||||
|
||||
namespace TagLib {
|
||||
extern char ascii_encoding[];
|
||||
|
||||
//! A \e wide string class suitable for unicode.
|
||||
|
||||
|
|
|
@ -24,6 +24,41 @@
|
|||
|
||||
NSMutableDictionary *dict = [[NSMutableDictionary alloc] init];
|
||||
|
||||
if ( !*TagLib::ascii_encoding ) {
|
||||
NSStringEncoding enc = [NSString defaultCStringEncoding];
|
||||
CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(enc);
|
||||
NSString *ref = (NSString *)CFStringConvertEncodingToIANACharSetName(cfenc);
|
||||
UInt32 cp = CFStringConvertEncodingToWindowsCodepage(cfenc);
|
||||
|
||||
// Most tags are using windows codepage, so remap OS X codepage to Windows one.
|
||||
|
||||
static struct {
|
||||
UInt32 from, to;
|
||||
} codepage_remaps[] = {
|
||||
{ 10001, 932 }, // Japanese Shift-JIS
|
||||
{ 10002, 950 }, // Traditional Chinese
|
||||
{ 10003, 949 }, // Korean
|
||||
{ 10004, 1256 }, // Arabic
|
||||
{ 10005, 1255 }, // Hebrew
|
||||
{ 10006, 1253 }, // Greek
|
||||
{ 10007, 1251 }, // Cyrillic
|
||||
{ 10008, 936 }, // Simplified Chinese
|
||||
{ 10029, 1250 }, // Central European (latin2)
|
||||
};
|
||||
|
||||
int i;
|
||||
int max = sizeof(codepage_remaps)/sizeof(codepage_remaps[0]);
|
||||
for ( i=0; i<max; i++ )
|
||||
if ( codepage_remaps[i].from == cp )
|
||||
break;
|
||||
if ( i < max )
|
||||
sprintf(TagLib::ascii_encoding, "windows-%d", codepage_remaps[i].to);
|
||||
else
|
||||
strcpy(TagLib::ascii_encoding, [ref UTF8String]);
|
||||
|
||||
}
|
||||
|
||||
|
||||
TagLib::FileRef f((const char *)[[url path] UTF8String], false);
|
||||
if (!f.isNull())
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue