summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMeredith Howard <mhoward@roomag.org>2014-03-18 18:13:04 -0400
committerMeredith Howard <mhoward@roomag.org>2014-03-18 18:13:04 -0400
commitd8e9aaa9842494483eb6f60feb85b116c039f54a (patch)
treecb8559bb8d1d6af3921ca53347ad33e3edcc0430
parentd999c50d9444a89e4746006a25f7cfe9b56cddca (diff)
downloadspreadsheet-parsexlsx-d8e9aaa9842494483eb6f60feb85b116c039f54a.tar.gz
spreadsheet-parsexlsx-d8e9aaa9842494483eb6f60feb85b116c039f54a.zip
Parse Shared Strings in twig mode, rather than building a tree of the entire xml doc just to copy it into an array.
-rw-r--r--lib/Spreadsheet/ParseXLSX.pm38
1 files changed, 27 insertions, 11 deletions
diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm
index 1b2ddda..07b83a2 100644
--- a/lib/Spreadsheet/ParseXLSX.pm
+++ b/lib/Spreadsheet/ParseXLSX.pm
@@ -284,14 +284,24 @@ sub _parse_shared_strings {
my $self = shift;
my ($strings) = @_;
- return [
- map {
- my $node = $_;
- # XXX this discards information about formatting within cells
- # not sure how to represent that
- { Text => join('', map { $_->text } $node->find_nodes('.//t')) }
- } $strings->find_nodes('//si')
- ];
+ my $PkgStr = [];
+
+ if ($strings) {
+ my $xml = XML::Twig->new(
+ twig_handlers => {
+ 'si' => sub {
+ my ( $twig, $si ) = @_;
+
+ push @$PkgStr, {
+ Text => join( '', map { $_->text } $si->find_nodes('.//t') )
+ };
+ $twig->purge;
+ },
+ }
+ );
+ $xml->parse( $strings );
+ }
+ return $PkgStr;
}
sub _parse_themes {
@@ -573,9 +583,15 @@ sub _extract_files {
$zip,
$self->_rels_for($wb_name)
);
- my ($strings_xml) = map {
- $self->_parse_xml($zip, $path_base . $_->att('Target'))
- } $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/sharedStrings"]>);
+
+ my $strings_xml = eval {
+ $zip->memberNamed( $path_base
+ .( $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/sharedStrings"]>) )[0]->att('Target')
+ )->contents;
+ };
+ warn "got strings: " . length $strings_xml;
+ warn '$@: '. $@;
+
my $styles_xml = $self->_parse_xml(
$zip,
$path_base . ($wb_rels->find_nodes(