summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Luehrs <doy@tozt.net>2013-09-09 10:18:29 -0400
committerJesse Luehrs <doy@tozt.net>2013-09-09 10:18:29 -0400
commit70e0ccd7243ee9448a36118d70e4441fab267315 (patch)
treea05d95c0953a3cf475097f4f8fb20b5383cb45bf
parenta74ed93e5eba524e87271c702043c9cddae27d9d (diff)
downloadspreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.tar.gz
spreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.zip
fix string lookups when cells have complicated formatting (#6)
if a cell has multiple different formats, this shows up as multiple different rich text parts in the shared string table, so make sure to only count each shared string table entry once
-rw-r--r--lib/Spreadsheet/ParseXLSX.pm7
-rw-r--r--t/bug-6.t30
2 files changed, 35 insertions, 2 deletions
diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm
index 1aa14eb..6efc3b8 100644
--- a/lib/Spreadsheet/ParseXLSX.pm
+++ b/lib/Spreadsheet/ParseXLSX.pm
@@ -238,8 +238,11 @@ sub _parse_shared_strings {
return [
map {
- { Text => $_->text } # XXX are Unicode, Rich, or Ext important?
- } $strings->find_nodes('//t')
+ my $node = $_;
+ # XXX this discards information about formatting within cells
+ # not sure how to represent that
+ { Text => join('', map { $_->text } $node->find_nodes('t')) }
+ } $strings->find_nodes('//si')
];
}
diff --git a/t/bug-6.t b/t/bug-6.t
index 503d6fa..41e6fc0 100644
--- a/t/bug-6.t
+++ b/t/bug-6.t
@@ -8,4 +8,34 @@ use Spreadsheet::ParseXLSX;
my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-6.xlsx');
is($wb->worksheet_count, 8);
+my %cells = (
+ 7 => {
+ 0 => 'mfg fdproc',
+ 1 => 'Tom Forsythe',
+ },
+ 8 => {
+ 0 => 'ent bartend-402 data max prodigy max 203 dpi',
+ 1 => 'Dave Levos ; Tommy Holland',
+ },
+ 9 => {
+ 0 => 'ent bartend-402 inter px4i 400 dpi rw',
+ 1 => 'Tommy Holland; Dave Levos',
+ },
+ 10 => {
+ 0 => 'opr-mfg asmb inst ro',
+ 1 => 'Chris McGee',
+ },
+);
+
+my $ws = $wb->worksheet('DSGroups');
+my ($row_min, $row_max) = $ws->row_range;
+my ($col_min, $col_max) = $ws->col_range;
+for my $row (sort { $a <=> $b } keys %cells) {
+ for my $col (sort { $a <=> $b } keys %{ $cells{$row} }) {
+ my $cell = $ws->get_cell($row, $col);
+ next unless $cell;
+ is($cell->value, $cells{$row}{$col}, "correct value for ($row, $col)");
+ }
+}
+
done_testing;