diff options
author | Meredith Howard <mhoward@roomag.org> | 2014-03-18 18:13:04 -0400 |
---|---|---|
committer | Meredith Howard <mhoward@roomag.org> | 2014-03-18 18:13:04 -0400 |
commit | d8e9aaa9842494483eb6f60feb85b116c039f54a (patch) | |
tree | cb8559bb8d1d6af3921ca53347ad33e3edcc0430 | |
parent | d999c50d9444a89e4746006a25f7cfe9b56cddca (diff) | |
download | spreadsheet-parsexlsx-d8e9aaa9842494483eb6f60feb85b116c039f54a.tar.gz spreadsheet-parsexlsx-d8e9aaa9842494483eb6f60feb85b116c039f54a.zip |
Parse Shared Strings in twig mode, rather than building a tree of the entire xml doc just to copy it into an array.
-rw-r--r-- | lib/Spreadsheet/ParseXLSX.pm | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index 1b2ddda..07b83a2 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -284,14 +284,24 @@ sub _parse_shared_strings { my $self = shift; my ($strings) = @_; - return [ - map { - my $node = $_; - # XXX this discards information about formatting within cells - # not sure how to represent that - { Text => join('', map { $_->text } $node->find_nodes('.//t')) } - } $strings->find_nodes('//si') - ]; + my $PkgStr = []; + + if ($strings) { + my $xml = XML::Twig->new( + twig_handlers => { + 'si' => sub { + my ( $twig, $si ) = @_; + + push @$PkgStr, { + Text => join( '', map { $_->text } $si->find_nodes('.//t') ) + }; + $twig->purge; + }, + } + ); + $xml->parse( $strings ); + } + return $PkgStr; } sub _parse_themes { @@ -573,9 +583,15 @@ sub _extract_files { $zip, $self->_rels_for($wb_name) ); - my ($strings_xml) = map { - $self->_parse_xml($zip, $path_base . $_->att('Target')) - } $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/sharedStrings"]>); + + my $strings_xml = eval { + $zip->memberNamed( $path_base + .( $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/sharedStrings"]>) )[0]->att('Target') + )->contents; + }; + warn "got strings: " . length $strings_xml; + warn '$@: '. $@; + my $styles_xml = $self->_parse_xml( $zip, $path_base . ($wb_rels->find_nodes( |