Skip to content

Commit

Permalink
works a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
richardlehane committed Dec 6, 2014
1 parent cc9331a commit a1e4192
Show file tree
Hide file tree
Showing 14 changed files with 551 additions and 322 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ Example usage:

Install with `go get github.com/richardlehane/msoleps`

*I'm being developed and am not yet ready...*
*Status: currently works for simple property sets like SummaryInformation. Not all types implemented yet (e.g. Vector, Array). Property set bags not implemented yet*

[![Build Status](https://travis-ci.org/richardlehane/msoleps.png?branch=master)](https://travis-ci.org/richardlehane/msoleps)
146 changes: 125 additions & 21 deletions msoleps.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,40 @@
// Copyright 2014 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package msoleps implements a reader for Microsoft OLE Property Set Data structures,
// (http://msdn.microsoft.com/en-au/library/dd942421.aspx) a generic persistence format
// for simple typed metadata

// Example:
// file, _ := os.Open("test/test.doc")
// defer file.Close()
// doc, err := mscfb.NewReader(file)
// if err != nil {
// log.Fatal(err)
// }
// props := msoleps.New()
// for entry, err := doc.Next(); err == nil; entry, err = doc.Next() {
// if msoleps.IsMSOLEPS(entry.Initial) {
// if oerr := props.Reset(doc); oerr != nil {
// log.Fatal(oerr)
// }
// for prop := range props.Property {
// fmt.Printf("Name: %s; Type: %s; Value: %v", prop.Name, prop.Type(), prop)
// }
// }
// }

package msoleps

import (
Expand All @@ -6,7 +43,6 @@ import (
"errors"
"io"

"github.com/richardlehane/msoleps/sets"
"github.com/richardlehane/msoleps/types"
)

Expand All @@ -16,23 +52,22 @@ var (
ErrSeek = errors.New("msoleps: can't seek backwards")
)

// check the first uint16 of an MSCFB name to see if this is a MSOLEPS stream
// IsMSOLEPS checks the first uint16 character of an mscfb name to test if it is a MSOLEPS stream
func IsMSOLEPS(i uint16) bool {
if i == 0x0005 {
return true
}
return false
}

// Reader is a reader for MS OLE Property Set Data structures
type Reader struct {
Property []*Property
CLSID types.Guid
SystemID uint32

b *bytes.Buffer
buf []byte
pSetStream *propertySetStream
pSets [2]*propertySet
b *bytes.Buffer
buf []byte
*propertySetStream
pSets [2]*propertySet
}

func New() *Reader {
Expand All @@ -58,35 +93,35 @@ func (r *Reader) start(rdr io.Reader) error {
}
r.buf = r.b.Bytes()
// read the header (property stream details)
r.pSetStream = &propertySetStream{}
if err := binary.Read(r.b, binary.LittleEndian, r.pSetStream); err != nil {
return ErrRead
pss, err := makePropertySetStream(r.buf)
if err != nil {
return err
}
// sanity checks to find obvious errors
switch {
case r.pSetStream.ByteOrder != 0xFFFE, r.pSetStream.Version > 0x0001, r.pSetStream.NumPropertySets > 0x00000002:
case pss.byteOrder != 0xFFFE, pss.version > 0x0001, pss.numPropertySets > 0x00000002:
return ErrFormat
}
r.propertySetStream = pss
// identify the property identifiers and offsets
ps, err := r.getPropertySet(r.pSetStream.OffsetA)
ps, err := r.getPropertySet(pss.offsetA)
if err != nil {
return err
}
plen := len(ps.idsOffs)
r.pSets[0] = ps
if r.pSetStream.NumPropertySets == 2 {
psb, err := r.getPropertySet(r.pSetStream.OffsetB)
var psb *propertySet
if pss.numPropertySets == 2 {
psb, err = r.getPropertySet(pss.offsetB)
if err != nil {
return err
}
r.pSets[1] = psb
plen += len(psb.idsOffs)
}
r.Property = make([]*Property, plen)
var dict map[uint32]string
if r.pSetStream.FmtidA == types.MustGuidFromString("{F29F85E0-4FF9-1068-AB91-08002B27B3D9}") {
dict = sets.SummaryInformation.Dict
} else {
dict, ok := propertySets[pss.fmtidA]
if !ok {
dict = ps.dict
if dict == nil {
dict = make(map[uint32]string)
Expand All @@ -95,14 +130,36 @@ func (r *Reader) start(rdr io.Reader) error {
for i, v := range ps.idsOffs {
r.Property[i] = &Property{}
r.Property[i].Name = dict[v.id]
t, _ := types.Evaluate(r.buf[int(v.offset+r.pSetStream.OffsetA):])
t, _ := types.Evaluate(r.buf[int(v.offset+pss.offsetA):])
if t.Type() == "CodeString" {
cs := t.(*types.CodeString)
cs.SetId(ps.code)
t = types.Type(cs)
}
r.Property[i].T = t
}
if pss.numPropertySets != 2 {
return nil
}
dict, ok = propertySets[pss.fmtidB]
if !ok {
dict = psb.dict
if dict == nil {
dict = make(map[uint32]string)
}
}
for i, v := range psb.idsOffs {
i += len(ps.idsOffs)
r.Property[i] = &Property{}
r.Property[i].Name = dict[v.id]
t, _ := types.Evaluate(r.buf[int(v.offset+pss.offsetB):])
if t.Type() == "CodeString" {
cs := t.(*types.CodeString)
cs.SetId(psb.code)
t = types.Type(cs)
}
r.Property[i].T = t
}
return nil
}

Expand All @@ -125,7 +182,54 @@ func (r *Reader) getPropertySet(o uint32) (*propertySet, error) {
}
}
if dictOff > 0 {
dictOff++ // just letting it compile - unfinished bit
var err error
pSet.dict, err = r.getDictionary(dictOff+o, pSet.code)
if err != nil {
return nil, err
}
}
return pSet, nil
}

func (r *Reader) getDictionary(o uint32, code types.CodePageID) (map[uint32]string, error) {
b := r.buf[int(o):]
e := 4
if len(b) < e {
return nil, ErrFormat
}
num := int(binary.LittleEndian.Uint32(b[:e]))
if num == 0 {
return nil, nil
}
dict := make(map[uint32]string)
for i := 0; i < num; i++ {
if len(b[e:]) < 8 {
return nil, ErrFormat
}
id, l := binary.LittleEndian.Uint32(b[e:e+4]), binary.LittleEndian.Uint32(b[e+4:e+8])
var s types.Type
var err error
if code == 0x04B0 {
var pad int
if l%2 != 0 {
pad = 2
}
s, err = types.MakeUnicode(b[e+4:])
if err != nil {
return nil, ErrFormat
}
e = e + 8 + pad + int(l)*2
} else {
s, err = types.MakeCodeString(b[e+4:])
if err != nil {
return nil, ErrFormat
}
cs := s.(*types.CodeString)
cs.SetId((code))
s = cs
e = e + 8 + int(l)
}
dict[id] = s.String()
}
return dict, nil
}
13 changes: 9 additions & 4 deletions msoleps_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package msoleps

import (
"fmt"
"os"
"testing"
)
Expand All @@ -19,9 +18,6 @@ func testFile(t *testing.T, path string) *Reader {
if err != nil {
t.Errorf("Error opening file; Returns error: ", err)
}
for _, prop := range doc.Property {
fmt.Printf("%s: %s\n", prop.Name, prop)
}
return doc
}

Expand All @@ -30,18 +26,27 @@ func TestDocSum(t *testing.T) {
if len(doc.Property) != 12 {
t.Error("Expecting 12 properties, got %d", len(doc.Property))
}
if doc.Property[1].String() != "Australian Broadcasting Corporation" {
t.Errorf("Expecting 'ABC' as second property, got %s", doc.Property[1])
}
}

func TestSum(t *testing.T) {
doc := testFile(t, testSum)
if len(doc.Property) != 17 {
t.Error("Expecting 17 properties, got %d", len(doc.Property))
}
if doc.Property[5].String() != "Normal" {
t.Errorf("Expecting 'Normal' as sixth property, got %s", doc.Property[5])
}
}

func TestSum1(t *testing.T) {
doc := testFile(t, testSum1)
if len(doc.Property) != 3 {
t.Error("Expecting 3 properties, got %d", len(doc.Property))
}
if doc.Property[0].String() != "Mail" {
t.Errorf("Expecting 'Mail' as first property, got %s", doc.Property[0])
}
}
59 changes: 51 additions & 8 deletions property.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
// Copyright 2014 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package msoleps

import "github.com/richardlehane/msoleps/types"
import (
"encoding/binary"
"github.com/richardlehane/msoleps/types"
)

type Property struct {
Name string
Expand All @@ -16,15 +33,41 @@ func (p *Property) Type() string {
}

type propertySetStream struct {
ByteOrder uint16
Version uint16
byteOrder uint16
version uint16
SystemID uint32
CLSID types.Guid
NumPropertySets uint32
FmtidA types.Guid
OffsetA uint32
FmtidB types.Guid
OffsetB uint32
numPropertySets uint32
fmtidA types.Guid
offsetA uint32
fmtidB types.Guid // This can be absent (i.e. not null)
offsetB uint32
}

func makePropertySetStream(b []byte) (*propertySetStream, error) {
if len(b) < 48 {
return nil, ErrFormat
}
ps := &propertySetStream{}
ps.byteOrder = binary.LittleEndian.Uint16(b[:2])
ps.version = binary.LittleEndian.Uint16(b[2:4])
ps.SystemID = binary.LittleEndian.Uint32(b[4:8])
g, _ := types.MakeGuid(b[8:])
ps.CLSID = g.(types.Guid)
ps.numPropertySets = binary.LittleEndian.Uint32(b[24:28])
g, _ = types.MakeGuid(b[28:])
ps.fmtidA, _ = g.(types.Guid)
ps.offsetA = binary.LittleEndian.Uint32(b[44:48])
if ps.numPropertySets != 2 {
return ps, nil
}
if len(b) < 68 {
return nil, ErrFormat
}
g, _ = types.MakeGuid(b[48:])
ps.fmtidB = g.(types.Guid)
ps.offsetB = binary.LittleEndian.Uint32(b[64:68])
return ps, nil
}

type propertySet struct {
Expand Down
8 changes: 0 additions & 8 deletions sets/sets.go

This file was deleted.

27 changes: 0 additions & 27 deletions sets/summaryInformation.go

This file was deleted.

Loading

0 comments on commit a1e4192

Please sign in to comment.