1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
package Spreadsheet::ParseXLSX;
use strict;
use warnings;
use Archive::Zip;
use Spreadsheet::ParseExcel;
use XML::Twig;
sub new {
bless {}, shift;
}
sub parse {
my $self = shift;
my ($filename) = @_;
$self->{Zip} = Archive::Zip->new;
die "Can't open $filename as zip file"
unless $self->{Zip}->read($filename) == Archive::Zip::AZ_OK;
$self->{Workbook} = $self->_parse_workbook;
}
sub _parse_workbook {
my $self = shift;
my $files = $self->_extract_files;
# ...
}
sub _extract_files {
my $self = shift;
my $type_base =
'http://schemas.openxmlformats.org/officeDocument/2006/relationships';
my $rels = $self->_parse_xml(
$self->_rels_for('')
);
my $wb_name = ($rels->find_nodes(
qq<//Relationship[\@Type="$type_base/officeDocument"]>
))[0]->att('Target');
my $wb_xml = $self->_parse_xml($wb_name);
my $path_base = $self->_base_path_for($wb_name);
my $wb_rels = $self->_parse_xml(
$self->_rels_for($wb_name)
);
my $strings_xml = $self->_parse_xml(
$path_base . ($wb_rels->find_nodes(
qq<//Relationship[\@Type="$type_base/sharedStrings"]>
))[0]->att('Target')
);
my $styles_xml = $self->_parse_xml(
$path_base . ($wb_rels->find_nodes(
qq<//Relationship[\@Type="$type_base/styles"]>
))[0]->att('Target')
);
my @worksheet_xml = map {
$self->_parse_xml($path_base . $_->att('Target'))
} $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/worksheet"]>);
my @themes_xml = map {
$self->_parse_xml($path_base . $_->att('Target'))
} $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/theme"]>);
return {
workbook => $wb_xml,
strings => $strings_xml,
styles => $styles_xml,
sheets => \@worksheet_xml,
themes => \@themes_xml,
};
}
sub _parse_xml {
my $self = shift;
my ($subfile) = @_;
my $member = $self->{Zip}->memberNamed($subfile);
die "no subfile named $subfile" unless $member;
my $xml = XML::Twig->new;
$xml->parse($member->contents);
return $xml;
}
sub _rels_for {
my $self = shift;
my ($file) = @_;
my @path = split '/', $file;
my $name = pop @path;
$name = '' unless defined $name;
push @path, '_rels';
push @path, "$name.rels";
return join '/', @path;
}
sub _base_path_for {
my $self = shift;
my ($file) = @_;
my @path = split '/', $file;
pop @path;
return join('/', @path) . '/';
}
1;
|