From 15859bd34334223f22ceb137b79a9cf5e8d8cc99 Mon Sep 17 00:00:00 2001 From: Jesse Luehrs Date: Sun, 22 May 2016 05:42:15 -0400 Subject: not all files have styles either --- lib/Spreadsheet/ParseXLSX.pm | 127 +++++++++++++++++++++++++++++-------------- t/bug-32.t | 18 +++++- t/data/bug-32-2.xlsx | Bin 0 -> 533859 bytes 3 files changed, 102 insertions(+), 43 deletions(-) create mode 100644 t/data/bug-32-2.xlsx diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index 34ae21e..2d46173 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -408,6 +408,7 @@ sub _parse_sheet { my $format_idx = $cell->att('s') || 0; my $format = $sheet->{_Book}{Format}[$format_idx]; + die "unknown format $format_idx" unless $format; $format->{Merged} = !!grep { $row == $_->[0] && $col == $_->[1] } @merged_cells; @@ -510,6 +511,81 @@ sub _parse_styles { my $self = shift; my ($workbook, $styles) = @_; + # these defaults are from + # http://social.msdn.microsoft.com/Forums/en-US/oxmlsdk/thread/e27aaf16-b900-4654-8210-83c5774a179c + my %default_format_str = ( + 0 => 'GENERAL', + 1 => '0', + 2 => '0.00', + 3 => '#,##0', + 4 => '#,##0.00', + 5 => '$#,##0_);($#,##0)', + 6 => '$#,##0_);[Red]($#,##0)', + 7 => '$#,##0.00_);($#,##0.00)', + 8 => '$#,##0.00_);[Red]($#,##0.00)', + 9 => '0%', + 10 => '0.00%', + 11 => '0.00E+00', + 12 => '# ?/?', + 13 => '# ??/??', + 14 => 'm/d/yyyy', + 15 => 'd-mmm-yy', + 16 => 'd-mmm', + 17 => 'mmm-yy', + 18 => 'h:mm AM/PM', + 19 => 'h:mm:ss AM/PM', + 20 => 'h:mm', + 21 => 'h:mm:ss', + 22 => 'm/d/yyyy h:mm', + 37 => '#,##0_);(#,##0)', + 38 => '#,##0_);[Red](#,##0)', + 39 => '#,##0.00_);(#,##0.00)', + 40 => '#,##0.00_);[Red](#,##0.00)', + 45 => 'mm:ss', + 46 => '[h]:mm:ss', + 47 => 'mm:ss.0', + 48 => '##0.0E+0', + 49 => '@', + ); + + if (!$styles) { + # XXX i guess? + my $font = Spreadsheet::ParseExcel::Font->new( + Height => 12, + Color => '#000000', + Name => '', + ); + my $format = Spreadsheet::ParseExcel::Format->new( + IgnoreFont => 1, + IgnoreFill => 1, + IgnoreBorder => 1, + IgnoreAlignment => 1, + IgnoreNumberFormat => 1, + IgnoreProtection => 1, + FontNo => 0, + Font => $font, + FmtIdx => 0, + Lock => 1, + Hidden => 0, + AlignH => 0, + Wrap => 0, + AlignV => 2, + Rotate => 0, + Indent => 0, + Shrink => 0, + BdrStyle => [0, 0, 0, 0], + BdrColor => [undef, undef, undef, undef], + BdrDiag => [0, 0, undef], + Fill => [0, undef, undef], + ); + + return { + FormatStr => \%default_format_str, + Font => [ $font ], + Format => [ $format ], + }; + } + my %halign = ( center => 2, centerContinuous => 6, @@ -613,41 +689,8 @@ sub _parse_styles { } } $styles->find_nodes('//s:borders/s:border'); - # these defaults are from - # http://social.msdn.microsoft.com/Forums/en-US/oxmlsdk/thread/e27aaf16-b900-4654-8210-83c5774a179c my %format_str = ( - 0 => 'GENERAL', - 1 => '0', - 2 => '0.00', - 3 => '#,##0', - 4 => '#,##0.00', - 5 => '$#,##0_);($#,##0)', - 6 => '$#,##0_);[Red]($#,##0)', - 7 => '$#,##0.00_);($#,##0.00)', - 8 => '$#,##0.00_);[Red]($#,##0.00)', - 9 => '0%', - 10 => '0.00%', - 11 => '0.00E+00', - 12 => '# ?/?', - 13 => '# ??/??', - 14 => 'm/d/yyyy', - 15 => 'd-mmm-yy', - 16 => 'd-mmm', - 17 => 'mmm-yy', - 18 => 'h:mm AM/PM', - 19 => 'h:mm:ss AM/PM', - 20 => 'h:mm', - 21 => 'h:mm:ss', - 22 => 'm/d/yyyy h:mm', - 37 => '#,##0_);(#,##0)', - 38 => '#,##0_);[Red](#,##0)', - 39 => '#,##0.00_);(#,##0.00)', - 40 => '#,##0.00_);[Red](#,##0.00)', - 45 => 'mm:ss', - 46 => '[h]:mm:ss', - 47 => 'mm:ss.0', - 48 => '##0.0E+0', - 49 => '@', + %default_format_str, (map { $_->att('numFmtId') => $_->att('formatCode') } $styles->find_nodes('//s:numFmts/s:numFmt')), @@ -794,12 +837,12 @@ sub _extract_files { $zip->memberNamed($get_path->($_->att('Target')))->contents } $wb_rels->find_nodes(qq); - my $styles_xml = $self->_parse_xml( - $zip, - $get_path->(($wb_rels->find_nodes( - qq - ))[0]->att('Target')) - ); + my ($styles_xml) = map { + $self->_parse_xml( + $zip, + $get_path->($_->att('Target')) + ) + } $wb_rels->find_nodes(qq); my %worksheet_xml = map { if ( my $sheetfile = $zip->memberNamed($get_path->($_->att('Target')))->contents ) { @@ -813,9 +856,11 @@ sub _extract_files { return { workbook => $wb_xml, - styles => $styles_xml, sheets => \%worksheet_xml, themes => \%themes_xml, + ($styles_xml + ? (styles => $styles_xml) + : ()), ($strings_xml ? (strings => $strings_xml) : ()), diff --git a/t/bug-32.t b/t/bug-32.t index c9f095a..57346f7 100644 --- a/t/bug-32.t +++ b/t/bug-32.t @@ -5,7 +5,21 @@ use Test::More; use Spreadsheet::ParseXLSX; -my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-32.xlsx'); -pass('it parses successfully'); +{ + my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-32.xlsx'); + + my $ws1 = $wb->worksheet(0); + like($ws1->get_cell(0, 0)->value, qr/^PURSUANT/); + + my $ws2 = $wb->worksheet(1); + like($ws2->get_cell(0, 0)->value, qr/^QMS/); +} + +{ + my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-32-2.xlsx'); + + my $ws = $wb->worksheet(0); + is($ws->get_cell(1, 1)->value, 93); +} done_testing; diff --git a/t/data/bug-32-2.xlsx b/t/data/bug-32-2.xlsx new file mode 100644 index 0000000..2aa0e0e Binary files /dev/null and b/t/data/bug-32-2.xlsx differ -- cgit v1.2.3-54-g00ecf