diff --git a/source/ChanSort.Loader.MediaTek/Serializer.cs b/source/ChanSort.Loader.MediaTek/Serializer.cs index 1475e81..e4b7dff 100644 --- a/source/ChanSort.Loader.MediaTek/Serializer.cs +++ b/source/ChanSort.Loader.MediaTek/Serializer.cs @@ -1,5 +1,4 @@ -using System.Collections.Generic; -using System.IO; +using System.IO; using System.Linq; using System.Text; using System.Xml; @@ -11,8 +10,12 @@ namespace ChanSort.Loader.MediaTek; public class Serializer : SerializerBase { /* - * Some Android based TVs export (in addition to the brand specific channel list files) a file named MtkChannelList.xml - * Examples are Philips channel list formats 120 and 125 + * Some Android based TVs export an XML file with the format described below. + * Examples are Philips channel list formats 120 and 125 and Sony BRAVIA 7 (2024). + * However there are differences between Philips and Sony: + * - Sony lacks a number of XML elements + * - Sony seems to manage TV, Radio and Data channels internally in separate lists, all starting at 1, while Philips seems to use one combined list with no duplicate major_channel_numbers + * * * * @@ -26,6 +29,9 @@ public class Serializer : SerializerBase * * service://SERVICE_LIST_GENERAL_SATELLITE/[service_list_id]/[major_channel_number] * + * + * The following elements exist in the Philips lists but not in the Sony's sdb.xml + * * SID * TSID * NID @@ -47,13 +53,13 @@ public class Serializer : SerializerBase private byte[] content; private string textContent; private readonly StringBuilder fileInfo = new(); - - private readonly Dictionary listsById = new(); + private readonly bool splitTvRadioData; #region ctor() - public Serializer(string inputFile) : base(inputFile) + public Serializer(string inputFile, bool separateTvRadioData = false) : base(inputFile) { + this.splitTvRadioData = separateTvRadioData; this.Features.ChannelNameEdit = ChannelNameEditMode.All; this.Features.DeleteMode = DeleteMode.NotSupported; this.Features.FavoritesMode = FavoritesMode.None; @@ -134,7 +140,7 @@ private void ReadServiceListInfos(XmlNode serviceListInfosNode) #region ReadServiceList() private void ReadServiceList(XmlElement node) { - SignalSource ss = SignalSource.Tv | SignalSource.Radio | SignalSource.Data | SignalSource.Dvb; + var ss = SignalSource.Dvb; var slt = node.GetAttribute("service_list_type"); if (slt.Contains("SATELLITE")) ss |= SignalSource.Sat; @@ -143,12 +149,8 @@ private void ReadServiceList(XmlElement node) else if (slt.Contains("TERR")) ss |= SignalSource.Antenna; - // service_list_id example: SERVICE_LIST_GENERAL_SATELLITE/17 - var serviceListId = node.GetAttribute("service_list_id"); - - var list = new ChannelList(ss, serviceListId); - this.listsById[serviceListId] = list; + //var serviceListId = node.GetAttribute("service_list_id"); int idx = 0; foreach (var child in node.ChildNodes) @@ -156,16 +158,14 @@ private void ReadServiceList(XmlElement node) if (!(child is XmlElement si && si.LocalName == "service_info")) continue; - ReadChannel(si, ss, idx++, list); + ReadChannel(si, ss, idx++); } - - this.DataRoot.AddChannelList(list); } #endregion #region ReadChannel() - private ChannelInfo ReadChannel(XmlElement si, SignalSource ss, int idx, ChannelList list) + private void ReadChannel(XmlElement si, SignalSource ss, int idx) { // record_id example: service://SERVICE_LIST_GENERAL_SATELLITE/17/1 var recIdUri = si.GetElementString("record_id") ?? ""; @@ -197,13 +197,28 @@ private ChannelInfo ReadChannel(XmlElement si, SignalSource ss, int idx, Channel else if ((ss & SignalSource.Cable) != 0) chan.ChannelOrTransponder = LookupData.Instance.GetDvbcTransponder(chan.FreqInMhz).ToString(); - var elements = si.GetElementsByTagName("major_channel_number", si.NamespaceURI); - list.ReadOnly |= elements.Count == 1 && elements[0].Attributes["editable", si.NamespaceURI].InnerText == "false"; - list.AddChannel(chan); + if (splitTvRadioData) + ss |= LookupData.Instance.IsRadioTvOrData(chan.ServiceType); + else + ss |= SignalSource.Tv | SignalSource.Radio | SignalSource.Data; - return chan; + var list = DataRoot.GetChannelList(ss); + if (list == null) + { + var name = (ss & SignalSource.Antenna) != 0 ? "Antenna" : (ss & SignalSource.Cable) != 0 ? "Cable" : (ss & SignalSource.Sat) != 0 ? "Sat" : (ss & SignalSource.Ip) != 0 ? "IP" : "Other"; + if (splitTvRadioData) + name += " " + ((ss & SignalSource.Tv) != 0 ? " TV" : (ss & SignalSource.Radio) != 0 ? " Radio" : " Data"); + + list = new ChannelList(ss, name); + this.DataRoot.AddChannelList(list); + } + + var elements = si.GetElementsByTagName("major_channel_number", si.NamespaceURI); + list.ReadOnly |= elements.Count == 1 && elements[0].Attributes!["editable", si.NamespaceURI].InnerText == "false"; + + list.AddChannel(chan); } #endregion @@ -231,10 +246,11 @@ public override void Save() continue; var si = ch.Xml; - si["major_channel_number"].InnerText = ch.NewProgramNr.ToString(); - si["service_name"].InnerText = ch.Name; - si["lock"].InnerText = ch.Lock ? "1" : "0"; - si["visible_service"].InnerText = ch.Hidden ? "1" : "3"; + si["major_channel_number"]!.InnerText = ch.NewProgramNr.ToString(); + si["service_name"]!.InnerText = ch.Name; + si["visible_service"]!.InnerText = ch.Hidden ? "1" : "3"; + if (si["lock"] != null) // Sony lists don't have this elements + si["lock"].InnerText = ch.Lock ? "1" : "0"; } } diff --git a/source/ChanSort.Loader.Philips/XmlSerializer.cs b/source/ChanSort.Loader.Philips/XmlSerializer.cs index 4d06518..5d93f67 100644 --- a/source/ChanSort.Loader.Philips/XmlSerializer.cs +++ b/source/ChanSort.Loader.Philips/XmlSerializer.cs @@ -591,74 +591,99 @@ private string DecodeName(string input, NameType nameType) return input; // The Philips encodes names is a complete mess. - // Each character is represented as two bytes, with the low byte first and the high second, but this isn't utf16. - // All observed files have the "high" byte always as 0x00 - // If looking only at the odd bytes, this can either be encoded in some random locale, a valid utf8 sequence or 1 byte characters mixed with big-endian utf16 double-bytes characters. + // Two hex digits are combined to an integer with the low byte first and the high byte second. + // If the high byte is 0xFF, it is treated as if it were 0x00. + // If any of the high bytes is non-zero, the encoding is little-endian utf16. + // If all high bytes are 0, some guesswork is needed to decode the sequence of odd bytes. It can be encoded as + // - a valid utf8 sequence + // - 1 byte per character in some undetermined locale + // - 1 byte per character mixed with 2 bytes big-endian utf16 (v125) // according to https://github.com/PredatH0r/ChanSort/issues/347 Philips seems use a locale dependent encoding for favorite list names, // writing "0xAA 0x00" to the file for an 8 bit code point. Congratulations, well done! - // In version 120/125 umlauts in channel names are encoded as 1 byte CP-1252/UTF16 code point + 0xFF as the second byte (i.e. for "Ä" it is 0xC4 0xFF instead of 0xC4 0x00) - // Also: 0x62 0x00 0x65 0x00 0x49 0x00 0x4e 0x00 0x20 0x00 0x01 0x00 0x30 0x00 0x5a - here 0x01 0x00 0x30 0x00 refers to U+0130 (the upper case I with dot), in "beIN İZ" + // Version 100: CAN use little-endian UTF16: 0x11 0x04 0x35 0x04 0x3B 0x04 0x30 0x04 0x40 0x04 0x43 0x04 0x41 0x04 0x4C 0x04 0x20 0x00 0x31 0x00 0x20 0x00 0x48 0x00 0x44 0x00 for "Беларусь 1 HD" + // also 100: odd bytes contain UTF8, high are all 0: https://github.com/PredatH0r/ChanSort/issues/421: + // 0x38 0x00 0x20 0x00 0xD0 0x00 0xBA 0x00 0xD0 0x00 0xB0 0x00 0xD0 0x00 0xBD 0x00 0xD0 0x00 0xB0 0x00 0xD0 0x00 0xBB 0x00 0x20 0x00 0x48 0x00 0x44 0x00 for "8 канал HD" - // https://github.com/PredatH0r/ChanSort/issues/421: 0x38 0x00 0x20 0x00 0xD0 0x00 0xBA ... seems to contain cyrillic UTF-8 encoding in channel names instead of UTF-16 + // Version 120/125: Umlauts in channel names are encoded as 1 byte CP-1252/UTF16 code point + 0xFF as the second byte (i.e. for "Ä" it is 0xC4 0xFF instead of 0xC4 0x00) + // Also 125: 0x62 0x00 0x65 0x00 0x49 0x00 0x4e 0x00 0x20 0x00 0x01 0x00 0x30 0x00 0x5a for "beIN İZ" where 0x01 0x00 0x30 0x00 refers to U+0130 "İ" - var hexParts = input.Split(' '); var utf16 = new MemoryStream(); var utf8 = new MemoryStream(); - bool highByte = false; + bool isHighByte = false; + int intValue = 0; bool invalidUtf8 = false; byte bigEndianUnicodeHighByte = 0; - int bigEndianUnicodeIndex = -1; + bool isBigEndianUtf16InOddBytes = false; + var hexParts = input.Split(' '); foreach (var part in hexParts) { if (part == "") continue; - var val = (byte)ParseInt(part); - invalidUtf8 |= highByte && val != 0; - if (highByte && val == 0xff) // hack-around for version 120 - val = 0; + var curByte = (byte)ParseInt(part); + + if (!isHighByte) + { + intValue = curByte; + isHighByte = true; + continue; + } + + isHighByte = false; + if (curByte == 0xff) // hack-around for version 120 where 0xFFxx is actually a CP1252 code point xx + curByte = 0; + + intValue += curByte << 8; + invalidUtf8 |= curByte != 0; + + if (intValue == 0) // break when reaching a 0x00 0x00 sequence + break; + + if (!invalidUtf8) + utf8.WriteByte((byte)intValue); - if (bigEndianUnicodeIndex >= 0) // special handling when a character < 32 was detected, which means we have a messed up "HI 00 LO 00" encoding for an UTF16 character (where HI is < 32) + if (isBigEndianUtf16InOddBytes) // special handling when a character < 32 was detected, which means we may have a "HI 00 LO 00" encoding for a UTF16 character { - ++bigEndianUnicodeIndex; - if (bigEndianUnicodeIndex == 2) + if (curByte == 0) // expected case where LO is followed by 00 { - utf16.WriteByte(val); + utf16.WriteByte((byte)intValue); utf16.WriteByte(bigEndianUnicodeHighByte); bigEndianUnicodeHighByte = 0; } - else if (bigEndianUnicodeIndex == 3) - bigEndianUnicodeIndex = -1; + else // fallback to write full 4 byte sequence + { + utf16.WriteByte(bigEndianUnicodeHighByte); + utf16.WriteByte(0); + utf16.WriteByte((byte)(intValue >> 8)); + utf16.WriteByte(curByte); + } + + isBigEndianUtf16InOddBytes = false; } else { - if (!highByte) + if (intValue < 32) // an int < 32 is likely the high byte of a "HI 00 LO 00" encoded UTF16 character { - if (val < 32 && val != 0) // a char < 32 is likely the high byte of a "HI 00 LO 00" encoded UTF16 character - { - bigEndianUnicodeHighByte = val; - bigEndianUnicodeIndex = 0; - invalidUtf8 = true; - } - else if (!invalidUtf8) - utf8.WriteByte(val); + isBigEndianUtf16InOddBytes = true; + bigEndianUnicodeHighByte = (byte)intValue; + invalidUtf8 = true; + } + else + { + utf16.WriteByte((byte)(intValue & 0xFF)); + utf16.WriteByte(curByte); } - if (bigEndianUnicodeIndex < 0) - utf16.WriteByte(val); } - - - highByte = !highByte; } - // in the FavList the name can be a random locale based on the country setting (other than CP-1252 or U-0000-00FF) + // in the FavList the name can be a random locale based on the country setting (other than CP-1252 or U-0000-00FF, i.e. turkish) if (nameType == NameType.FavList) return this.DefaultEncoding.GetString(utf8.GetBuffer(), 0, (int)utf8.Length).TrimGarbage(); - // e.g. for cyrillic names, where only the low-byte is used for an utf8 encoding while the high-byte is always 0 + // best-effort utf8 decoding if (!invalidUtf8 && Tools.IsUtf8(utf8.GetBuffer(), 0, (int)utf8.Length)) return Encoding.UTF8.GetString(utf8.GetBuffer(), 0, (int)utf8.Length).TrimGarbage(); diff --git a/source/ChanSort.Loader.Sony/ChanSort.Loader.Sony.csproj b/source/ChanSort.Loader.Sony/ChanSort.Loader.Sony.csproj index 034b54b..ab29112 100644 --- a/source/ChanSort.Loader.Sony/ChanSort.Loader.Sony.csproj +++ b/source/ChanSort.Loader.Sony/ChanSort.Loader.Sony.csproj @@ -23,6 +23,7 @@ + diff --git a/source/ChanSort.Loader.Sony/SonyPlugin.cs b/source/ChanSort.Loader.Sony/SonyPlugin.cs index 7be50b1..69c4d5c 100644 --- a/source/ChanSort.Loader.Sony/SonyPlugin.cs +++ b/source/ChanSort.Loader.Sony/SonyPlugin.cs @@ -1,4 +1,5 @@ -using ChanSort.Api; +using System.IO; +using ChanSort.Api; namespace ChanSort.Loader.Sony { @@ -10,6 +11,14 @@ public class SonyPlugin : ISerializerPlugin public SerializerBase CreateSerializer(string inputFile) { + using (var rdr = new StreamReader(inputFile)) + { + var line1 = rdr.ReadLine() ?? ""; + var line2 = rdr.ReadLine() ?? ""; + if (line1.Contains("") || line2.Contains("")) + return new MediaTek.Serializer(inputFile, true); + } + return new Serializer(inputFile); } } diff --git a/source/ChanSort.sln b/source/ChanSort.sln index cccd911..6d54f04 100644 --- a/source/ChanSort.sln +++ b/source/ChanSort.sln @@ -5,15 +5,16 @@ VisualStudioVersion = 17.0.32112.339 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort", "ChanSort\ChanSort.csproj", "{5FAFDABC-A52F-498C-BD2F-AFFC4119797A}" ProjectSection(ProjectDependencies) = postProject + {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} = {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} {4E68F218-5135-4D92-8C17-14FAA5D4CBF3} = {4E68F218-5135-4D92-8C17-14FAA5D4CBF3} - {74A18C6F-09FF-413E-90D9-827066FA5B36} = {74A18C6F-09FF-413E-90D9-827066FA5B36} {68DA8072-3A29-4076-9F64-D66F38349585} = {68DA8072-3A29-4076-9F64-D66F38349585} + {74A18C6F-09FF-413E-90D9-827066FA5B36} = {74A18C6F-09FF-413E-90D9-827066FA5B36} {A1C9A98D-368A-44E8-9B7F-7EACA46C9EC5} = {A1C9A98D-368A-44E8-9B7F-7EACA46C9EC5} - {F6F02792-07F1-48D5-9AF3-F945CA5E3931} = {F6F02792-07F1-48D5-9AF3-F945CA5E3931} - {E972D8A1-2F5F-421C-AC91-CFF45E5191BE} = {E972D8A1-2F5F-421C-AC91-CFF45E5191BE} - {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} = {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} + {A5C22199-1C51-4265-89CA-A7183F1BDB8B} = {A5C22199-1C51-4265-89CA-A7183F1BDB8B} {B594DDA4-7BD5-450E-B648-668E0F659813} = {B594DDA4-7BD5-450E-B648-668E0F659813} {D093E7EE-D3AD-4E7B-AF82-C6918CA017FB} = {D093E7EE-D3AD-4E7B-AF82-C6918CA017FB} + {E972D8A1-2F5F-421C-AC91-CFF45E5191BE} = {E972D8A1-2F5F-421C-AC91-CFF45E5191BE} + {F6F02792-07F1-48D5-9AF3-F945CA5E3931} = {F6F02792-07F1-48D5-9AF3-F945CA5E3931} EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Api", "ChanSort.Api\ChanSort.Api.csproj", "{DCCFFA08-472B-4D17-BB90-8F513FC01392}" @@ -160,7 +161,7 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChanSort.Loader.TechniSat", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Loader.Amdb", "ChanSort.Loader.Amdb\ChanSort.Loader.Amdb.csproj", "{30E9D084-6F3C-41A9-9B46-846178C91BDB}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChanSort.Loader.MediaTek", "ChanSort.Loader.MediaTek\ChanSort.Loader.MediaTek.csproj", "{5FC54726-B7EC-4A81-919F-F924110C723E}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Loader.MediaTek", "ChanSort.Loader.MediaTek\ChanSort.Loader.MediaTek.csproj", "{5FC54726-B7EC-4A81-919F-F924110C723E}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/source/ChanSort/ChanSort.csproj b/source/ChanSort/ChanSort.csproj index 4cf1f3c..66d830a 100644 --- a/source/ChanSort/ChanSort.csproj +++ b/source/ChanSort/ChanSort.csproj @@ -145,6 +145,7 @@ + diff --git a/source/changelog.md b/source/changelog.md index 3fb7b04..045e7c2 100644 --- a/source/changelog.md +++ b/source/changelog.md @@ -1,7 +1,11 @@ ChanSort Change Log =================== -2024-09-31 +2024-10-02 +- Philips formats 100-125: further improvements/fixes for decoding non-latin characters +- Sony Bravia 7 (2024) sdb.xml: support to read it as a reference list (the TV will ignore any edits to this file) + +2024-10-01 - experimental support for Philips channel list format 125 (with automatic sync to MtkChannelList.xml) - experimental support for MtkChannelList.xml (which is part of several MediaTek based Google TVs, e.g. Philips formats 120 and 125) - Philips formats 100-125: improved decoding of non-latin characters (turkish, cyrillic, ...)