diff --git a/OpenMcdf.sln b/OpenMcdf.sln index f2cc4d1a..80183e9b 100644 --- a/OpenMcdf.sln +++ b/OpenMcdf.sln @@ -34,6 +34,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TestFiles", "TestFiles", "{ sources\TestFiles\report.xls = sources\TestFiles\report.xls sources\TestFiles\reportREAD.xls = sources\TestFiles\reportREAD.xls sources\TestFiles\report_name_fix.xls = sources\TestFiles\report_name_fix.xls + sources\Test\TestFiles\SampleWorkBook_bug98.xls = sources\Test\TestFiles\SampleWorkBook_bug98.xls sources\TestFiles\testbad.ole = sources\TestFiles\testbad.ole sources\Test\TestFiles\winUnicodeDictionary.doc = sources\Test\TestFiles\winUnicodeDictionary.doc sources\Test\TestFiles\wstr_presets.doc = sources\Test\TestFiles\wstr_presets.doc diff --git a/sources/OpenMcdf.Extensions/OLEProperties/OLEPropertiesContainer.cs b/sources/OpenMcdf.Extensions/OLEProperties/OLEPropertiesContainer.cs index e3b66889..0dc62fbc 100644 --- a/sources/OpenMcdf.Extensions/OLEProperties/OLEPropertiesContainer.cs +++ b/sources/OpenMcdf.Extensions/OLEProperties/OLEPropertiesContainer.cs @@ -227,9 +227,12 @@ public void Save(CFStream cfStream) } }; + PropertyFactory factory = + this.ContainerType == ContainerType.DocumentSummaryInfo ? DocumentSummaryInfoPropertyFactory.Instance : DefaultPropertyFactory.Instance; + foreach (var op in this.Properties) { - ITypedPropertyValue p = PropertyFactory.Instance.NewProperty(op.VTType, this.Context.CodePage); + ITypedPropertyValue p = factory.NewProperty(op.VTType, this.Context.CodePage, op.PropertyIdentifier); p.Value = op.Value; ps.PropertySet0.Properties.Add(p); ps.PropertySet0.PropertyIdentifierAndOffsets.Add(new PropertyIdentifierAndOffset() { PropertyIdentifier = op.PropertyIdentifier, Offset = 0 }); @@ -264,7 +267,7 @@ public void Save(CFStream cfStream) // Add the properties themselves foreach (var op in this.UserDefinedProperties.Properties) { - ITypedPropertyValue p = PropertyFactory.Instance.NewProperty(op.VTType, ps.PropertySet1.PropertyContext.CodePage); + ITypedPropertyValue p = DefaultPropertyFactory.Instance.NewProperty(op.VTType, ps.PropertySet1.PropertyContext.CodePage, op.PropertyIdentifier); p.Value = op.Value; ps.PropertySet1.Properties.Add(p); ps.PropertySet1.PropertyIdentifierAndOffsets.Add(new PropertyIdentifierAndOffset() { PropertyIdentifier = op.PropertyIdentifier, Offset = 0 }); diff --git a/sources/OpenMcdf.Extensions/OLEProperties/PropertyFactory.cs b/sources/OpenMcdf.Extensions/OLEProperties/PropertyFactory.cs index 4055a491..934291ce 100644 --- a/sources/OpenMcdf.Extensions/OLEProperties/PropertyFactory.cs +++ b/sources/OpenMcdf.Extensions/OLEProperties/PropertyFactory.cs @@ -1,35 +1,20 @@ using OpenMcdf.Extensions.OLEProperties.Interfaces; using System; -using System.Collections.Generic; using System.Text; using System.IO; -using System.Threading; namespace OpenMcdf.Extensions.OLEProperties { - internal class PropertyFactory + internal abstract class PropertyFactory { - private static ThreadLocal instance - = new ThreadLocal(() => { return new PropertyFactory(); }); - - public static PropertyFactory Instance + static PropertyFactory() { - get - { - #if NETSTANDARD2_0_OR_GREATER - Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); #endif - return instance.Value; - } - } - - private PropertyFactory() - { - } - public ITypedPropertyValue NewProperty(VTPropertyType vType, int codePage, bool isVariant = false) + public ITypedPropertyValue NewProperty(VTPropertyType vType, int codePage, uint propertyIdentifier, bool isVariant = false) { ITypedPropertyValue pr = null; @@ -75,9 +60,11 @@ public ITypedPropertyValue NewProperty(VTPropertyType vType, int codePage, bool pr = new VT_UI8_Property(vType, isVariant); break; case VTPropertyType.VT_BSTR: - case VTPropertyType.VT_LPSTR: pr = new VT_LPSTR_Property(vType, codePage, isVariant); break; + case VTPropertyType.VT_LPSTR: + pr = CreateLpstrProperty(vType, codePage, propertyIdentifier, isVariant); + break; case VTPropertyType.VT_LPWSTR: pr = new VT_LPWSTR_Property(vType, codePage, isVariant); break; @@ -94,7 +81,7 @@ public ITypedPropertyValue NewProperty(VTPropertyType vType, int codePage, bool pr = new VT_EMPTY_Property(vType, isVariant); break; case VTPropertyType.VT_VARIANT_VECTOR: - pr = new VT_VariantVector(vType, codePage, isVariant); + pr = new VT_VariantVector(vType, codePage, isVariant, this, propertyIdentifier); break; case VTPropertyType.VT_CF: pr = new VT_CF_Property(vType, isVariant); @@ -113,6 +100,11 @@ public ITypedPropertyValue NewProperty(VTPropertyType vType, int codePage, bool return pr; } + protected virtual ITypedPropertyValue CreateLpstrProperty(VTPropertyType vType, int codePage, uint propertyIdentifier, bool isVariant) + { + return new VT_LPSTR_Property(vType, codePage, isVariant); + } + #region Property implementations private class VT_EMPTY_Property : TypedPropertyValue @@ -399,7 +391,7 @@ public override void WriteScalarValue(System.IO.BinaryWriter bw, DateTime pValue } } - private class VT_LPSTR_Property : TypedPropertyValue + protected class VT_LPSTR_Property : TypedPropertyValue { private byte[] data; @@ -498,6 +490,14 @@ public override void WriteScalarValue(BinaryWriter bw, string pValue) } } + protected class VT_Unaligned_LPSTR_Property : VT_LPSTR_Property + { + public VT_Unaligned_LPSTR_Property(VTPropertyType vType, int codePage, bool isVariant) : base(vType, codePage, isVariant) + { + this.NeedsPadding = false; + } + } + private class VT_LPWSTR_Property : TypedPropertyValue { @@ -726,10 +726,15 @@ public override void WriteScalarValue(BinaryWriter bw, object pValue) private class VT_VariantVector : TypedPropertyValue { private readonly int codePage; + private readonly PropertyFactory factory; + private readonly uint propertyIdentifier; - public VT_VariantVector(VTPropertyType vType, int codePage, bool isVariant) : base(vType, isVariant) + public VT_VariantVector(VTPropertyType vType, int codePage, bool isVariant, PropertyFactory factory, uint propertyIdentifier) : base(vType, isVariant) { this.codePage = codePage; + this.factory = factory; + this.propertyIdentifier = propertyIdentifier; + this.NeedsPadding = false; } public override object ReadScalarValue(System.IO.BinaryReader br) @@ -737,7 +742,7 @@ public override object ReadScalarValue(System.IO.BinaryReader br) VTPropertyType vType = (VTPropertyType)br.ReadUInt16(); br.ReadUInt16(); // Ushort Padding - ITypedPropertyValue p = PropertyFactory.Instance.NewProperty(vType, codePage, true); + ITypedPropertyValue p = factory.NewProperty(vType, codePage, propertyIdentifier, true); p.Read(br); return p; } @@ -753,4 +758,27 @@ public override void WriteScalarValue(BinaryWriter bw, object pValue) #endregion } + + // The default property factory. + internal sealed class DefaultPropertyFactory : PropertyFactory + { + public static PropertyFactory Instance { get; } = new DefaultPropertyFactory(); + } + + // A separate factory for DocumentSummaryInformation properties, to handle special cases with unaligned strings. + internal sealed class DocumentSummaryInfoPropertyFactory : PropertyFactory + { + public static PropertyFactory Instance { get; } = new DocumentSummaryInfoPropertyFactory(); + + protected override ITypedPropertyValue CreateLpstrProperty(VTPropertyType vType, int codePage, uint propertyIdentifier, bool isVariant) + { + // PIDDSI_HEADINGPAIR and PIDDSI_DOCPARTS use unaligned (unpadded) strings - the others are padded + if (propertyIdentifier == 0x0000000C || propertyIdentifier == 0x0000000D) + { + return new VT_Unaligned_LPSTR_Property(vType, codePage, isVariant); + } + + return base.CreateLpstrProperty(vType, codePage, propertyIdentifier, isVariant); + } + } } diff --git a/sources/OpenMcdf.Extensions/OLEProperties/PropertySetStream.cs b/sources/OpenMcdf.Extensions/OLEProperties/PropertySetStream.cs index 8ee40ce9..673f82e4 100644 --- a/sources/OpenMcdf.Extensions/OLEProperties/PropertySetStream.cs +++ b/sources/OpenMcdf.Extensions/OLEProperties/PropertySetStream.cs @@ -50,6 +50,9 @@ public void Read(System.IO.BinaryReader br) PropertySet0.Size = br.ReadUInt32(); PropertySet0.NumProperties = br.ReadUInt32(); + // Create appropriate property factory based on the stream type + Guid docSummaryGuid = new Guid(WellKnownFMTID.FMTID_DocSummaryInformation); + PropertyFactory factory = FMTID0 == docSummaryGuid ? DocumentSummaryInfoPropertyFactory.Instance : DefaultPropertyFactory.Instance; // Read property offsets (P0) for (int i = 0; i < PropertySet0.NumProperties; i++) @@ -66,7 +69,7 @@ public void Read(System.IO.BinaryReader br) for (int i = 0; i < PropertySet0.NumProperties; i++) { br.BaseStream.Seek(Offset0 + PropertySet0.PropertyIdentifierAndOffsets[i].Offset, System.IO.SeekOrigin.Begin); - PropertySet0.Properties.Add(ReadProperty(PropertySet0.PropertyIdentifierAndOffsets[i].PropertyIdentifier, PropertySet0.PropertyContext.CodePage, br)); + PropertySet0.Properties.Add(ReadProperty(PropertySet0.PropertyIdentifierAndOffsets[i].PropertyIdentifier, PropertySet0.PropertyContext.CodePage, br, factory)); } if (NumPropertySets == 2) @@ -91,7 +94,7 @@ public void Read(System.IO.BinaryReader br) for (int i = 0; i < PropertySet1.NumProperties; i++) { br.BaseStream.Seek(Offset1 + PropertySet1.PropertyIdentifierAndOffsets[i].Offset, System.IO.SeekOrigin.Begin); - PropertySet1.Properties.Add(ReadProperty(PropertySet1.PropertyIdentifierAndOffsets[i].PropertyIdentifier, PropertySet1.PropertyContext.CodePage, br)); + PropertySet1.Properties.Add(ReadProperty(PropertySet1.PropertyIdentifierAndOffsets[i].PropertyIdentifier, PropertySet1.PropertyContext.CodePage, br, DefaultPropertyFactory.Instance)); } } } @@ -222,14 +225,14 @@ public void Write(System.IO.BinaryWriter bw) - private IProperty ReadProperty(uint propertyIdentifier, int codePage, BinaryReader br) + private IProperty ReadProperty(uint propertyIdentifier, int codePage, BinaryReader br, PropertyFactory factory) { if (propertyIdentifier != 0) { VTPropertyType vType = (VTPropertyType)br.ReadUInt16(); br.ReadUInt16(); // Ushort Padding - ITypedPropertyValue pr = PropertyFactory.Instance.NewProperty(vType, codePage); + ITypedPropertyValue pr = factory.NewProperty(vType, codePage, propertyIdentifier); pr.Read(br); return pr; diff --git a/sources/OpenMcdf.Extensions/OLEProperties/TypedPropertyValue.cs b/sources/OpenMcdf.Extensions/OLEProperties/TypedPropertyValue.cs index f0314f91..dece59d6 100644 --- a/sources/OpenMcdf.Extensions/OLEProperties/TypedPropertyValue.cs +++ b/sources/OpenMcdf.Extensions/OLEProperties/TypedPropertyValue.cs @@ -44,6 +44,8 @@ public bool IsVariant get { return isVariant; } } + protected virtual bool NeedsPadding { get; set; } = true; + private PropertyDimensions CheckPropertyDimensions(VTPropertyType vtType) { if ((((ushort)vtType) & 0x1000) != 0) @@ -73,40 +75,50 @@ public virtual object Value public void Read(System.IO.BinaryReader br) { long currentPos = br.BaseStream.Position; - int size = 0; - int m = 0; switch (this.PropertyDimensions) { case PropertyDimensions.IsScalar: - this.propertyValue = ReadScalarValue(br); - size = (int)(br.BaseStream.Position - currentPos); + { + this.propertyValue = ReadScalarValue(br); + int size = (int)(br.BaseStream.Position - currentPos); - m = (int)size % 4; + int m = (int)size % 4; + + if (m > 0 && this.NeedsPadding) + br.ReadBytes(4 - m); // padding + } - if (m > 0 && !IsVariant) - br.ReadBytes(4 - m); // padding break; case PropertyDimensions.IsVector: - uint nItems = br.ReadUInt32(); + { + uint nItems = br.ReadUInt32(); - List res = new List(); + List res = new List(); - for (int i = 0; i < nItems; i++) - { - T s = ReadScalarValue(br); + for (int i = 0; i < nItems; i++) + { + T s = ReadScalarValue(br); - res.Add(s); - } + res.Add(s); - this.propertyValue = res; - size = (int)(br.BaseStream.Position - currentPos); + // The padding in a vector can be per-item + int itemSize = (int)(br.BaseStream.Position - currentPos); - m = (int)size % 4; - if (m > 0 && !IsVariant) - br.ReadBytes(4 - m); // padding + int pad = (int)itemSize % 4; + if (pad > 0 && this.NeedsPadding) + br.ReadBytes(4 - pad); // padding + } + + this.propertyValue = res; + int size = (int)(br.BaseStream.Position - currentPos); + + int m = (int)size % 4; + if (m > 0 && this.NeedsPadding) + br.ReadBytes(4 - m); // padding + } break; default: break; @@ -120,7 +132,6 @@ public void Write(BinaryWriter bw) long currentPos = bw.BaseStream.Position; int size = 0; int m = 0; - bool needsPadding = HasPadding(); switch (this.PropertyDimensions) { @@ -133,7 +144,7 @@ public void Write(BinaryWriter bw) size = (int)(bw.BaseStream.Position - currentPos); m = (int)size % 4; - if (m > 0 && needsPadding) + if (m > 0 && this.NeedsPadding) for (int i = 0; i < 4 - m; i++) // padding bw.Write((byte)0); break; @@ -147,37 +158,23 @@ public void Write(BinaryWriter bw) for (int i = 0; i < ((List)this.propertyValue).Count; i++) { WriteScalarValue(bw, ((List)this.propertyValue)[i]); + + size = (int)(bw.BaseStream.Position - currentPos); + m = (int)size % 4; + + if (m > 0 && this.NeedsPadding) + for (int q = 0; q < 4 - m; q++) // padding + bw.Write((byte)0); } size = (int)(bw.BaseStream.Position - currentPos); m = (int)size % 4; - if (m > 0 && needsPadding) - for (int i = 0; i < m; i++) // padding + if (m > 0 && this.NeedsPadding) + for (int i = 0; i < 4 - m; i++) // padding bw.Write((byte)0); break; } } - - private bool HasPadding() - { - - VTPropertyType vt = (VTPropertyType)((ushort)this.VTType & 0x00FF); - - switch (vt) - { - case VTPropertyType.VT_LPSTR: - if (this.IsVariant) return false; - if (dim == PropertyDimensions.IsVector) return false; - break; - case VTPropertyType.VT_VARIANT_VECTOR: - if (dim == PropertyDimensions.IsVector) return false; - break; - default: - return true; - } - - return true; - } } } diff --git a/sources/Test/OpenMcdf.Extensions.Test/OLEPropertiesExtensionsTest.cs b/sources/Test/OpenMcdf.Extensions.Test/OLEPropertiesExtensionsTest.cs index 51a68dd7..824bc0e5 100644 --- a/sources/Test/OpenMcdf.Extensions.Test/OLEPropertiesExtensionsTest.cs +++ b/sources/Test/OpenMcdf.Extensions.Test/OLEPropertiesExtensionsTest.cs @@ -3,6 +3,7 @@ using System.IO; using Microsoft.VisualStudio.TestTools.UnitTesting; using System.Linq; +using System.Collections.Generic; using OpenMcdf.Extensions.OLEProperties; namespace OpenMcdf.Extensions.Test @@ -443,6 +444,28 @@ public void Test_DOCUMENT_SUMMARY_INFO_ADD_CUSTOM() } } + + // Try to read a document which contains Vector/String properties + // refs https://github.com/ironfede/openmcdf/issues/98 + [TestMethod] + public void Test_SUMMARY_INFO_READ_LPWSTRING_VECTOR() + { + using (CompoundFile cf = new CompoundFile("SampleWorkBook_bug98.xls")) + { + var co = cf.RootStorage.GetStream("\u0005DocumentSummaryInformation").AsOLEPropertiesContainer(); + + var docPartsProperty = co.Properties.FirstOrDefault(property => property.PropertyIdentifier == 13); //13 == PIDDSI_DOCPARTS + + Assert.IsNotNull(docPartsProperty); + + var docPartsValues = docPartsProperty.Value as IEnumerable; + Assert.AreEqual(3, docPartsValues.Count()); + Assert.AreEqual("Sheet1\0", docPartsValues.ElementAt(0)); + Assert.AreEqual("Sheet2\0", docPartsValues.ElementAt(1)); + Assert.AreEqual("Sheet3\0", docPartsValues.ElementAt(2)); + } + } + [TestMethod] public void Test_CLSID_PROPERTY() { @@ -452,6 +475,7 @@ public void Test_CLSID_PROPERTY() var co = cf.RootStorage.GetStream("\u0005C3teagxwOttdbfkuIaamtae3Ie").AsOLEPropertiesContainer(); var clsidProp = co.Properties.First(x => x.PropertyName == "DocumentID"); Assert.AreEqual(guid, clsidProp.Value); + } } } diff --git a/sources/Test/TestFiles/SampleWorkBook_bug98.xls b/sources/Test/TestFiles/SampleWorkBook_bug98.xls new file mode 100644 index 00000000..063c04f5 Binary files /dev/null and b/sources/Test/TestFiles/SampleWorkBook_bug98.xls differ