From d8e9aaa9842494483eb6f60feb85b116c039f54a Mon Sep 17 00:00:00 2001 From: Meredith Howard Date: Tue, 18 Mar 2014 18:13:04 -0400 Subject: Parse Shared Strings in twig mode, rather than building a tree of the entire xml doc just to copy it into an array. --- lib/Spreadsheet/ParseXLSX.pm | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index 1b2ddda..07b83a2 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -284,14 +284,24 @@ sub _parse_shared_strings { my $self = shift; my ($strings) = @_; - return [ - map { - my $node = $_; - # XXX this discards information about formatting within cells - # not sure how to represent that - { Text => join('', map { $_->text } $node->find_nodes('.//t')) } - } $strings->find_nodes('//si') - ]; + my $PkgStr = []; + + if ($strings) { + my $xml = XML::Twig->new( + twig_handlers => { + 'si' => sub { + my ( $twig, $si ) = @_; + + push @$PkgStr, { + Text => join( '', map { $_->text } $si->find_nodes('.//t') ) + }; + $twig->purge; + }, + } + ); + $xml->parse( $strings ); + } + return $PkgStr; } sub _parse_themes { @@ -573,9 +583,15 @@ sub _extract_files { $zip, $self->_rels_for($wb_name) ); - my ($strings_xml) = map { - $self->_parse_xml($zip, $path_base . $_->att('Target')) - } $wb_rels->find_nodes(qq); + + my $strings_xml = eval { + $zip->memberNamed( $path_base + .( $wb_rels->find_nodes(qq) )[0]->att('Target') + )->contents; + }; + warn "got strings: " . length $strings_xml; + warn '$@: '. $@; + my $styles_xml = $self->_parse_xml( $zip, $path_base . ($wb_rels->find_nodes( -- cgit v1.2.3-54-g00ecf