fix string lookups when cells have complicated formatting (#6)

if a cell has multiple different formats, this shows up as multiple different rich text parts in the shared string table, so make sure to only count each shared string table entry once
author: Jesse Luehrs <doy@tozt.net> 2013-09-09 10:18:29 -0400
committer: Jesse Luehrs <doy@tozt.net> 2013-09-09 10:18:29 -0400
commit: 70e0ccd7243ee9448a36118d70e4441fab267315 (patch)
tree: a05d95c0953a3cf475097f4f8fb20b5383cb45bf
parent: a74ed93e5eba524e87271c702043c9cddae27d9d (diff)
download: spreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.tar.gz
spreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.zip
2 files changed, 35 insertions, 2 deletions
diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm
index 1aa14eb..6efc3b8 100644
--- a/lib/Spreadsheet/ParseXLSX.pm
+++ b/lib/Spreadsheet/ParseXLSX.pm
@@ -238,8 +238,11 @@ sub _parse_shared_strings {
 
     return [
         map {
-            { Text => $_->text } # XXX are Unicode, Rich, or Ext important?
-        } $strings->find_nodes('//t')
+            my $node = $_;
+            # XXX this discards information about formatting within cells
+            # not sure how to represent that
+            { Text => join('', map { $_->text } $node->find_nodes('t')) }
+        } $strings->find_nodes('//si')
     ];
 }
 
diff --git a/t/bug-6.t b/t/bug-6.t
index 503d6fa..41e6fc0 100644
--- a/t/bug-6.t
+++ b/t/bug-6.t
@@ -8,4 +8,34 @@ use Spreadsheet::ParseXLSX;
 my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-6.xlsx');
 is($wb->worksheet_count, 8);
 
+my %cells = (
+    7 => {
+        0 => 'mfg fdproc',
+        1 => 'Tom Forsythe',
+    },
+    8 => {
+        0 => 'ent bartend-402 data max prodigy max 203 dpi',
+        1 => 'Dave Levos ; Tommy Holland',
+    },
+    9 => {
+        0 => 'ent bartend-402 inter px4i 400 dpi rw',
+        1 => 'Tommy Holland; Dave Levos',
+    },
+    10 => {
+        0 => 'opr-mfg asmb inst ro',
+        1 => 'Chris McGee',
+    },
+);
+
+my $ws = $wb->worksheet('DSGroups');
+my ($row_min, $row_max) = $ws->row_range;
+my ($col_min, $col_max) = $ws->col_range;
+for my $row (sort { $a <=> $b } keys %cells) {
+    for my $col (sort { $a <=> $b } keys %{ $cells{$row} }) {
+        my $cell = $ws->get_cell($row, $col);
+        next unless $cell;
+        is($cell->value, $cells{$row}{$col}, "correct value for ($row, $col)");
+    }
+}
+
 done_testing;
author	Jesse Luehrs <doy@tozt.net>	2013-09-09 10:18:29 -0400
committer	Jesse Luehrs <doy@tozt.net>	2013-09-09 10:18:29 -0400
commit	70e0ccd7243ee9448a36118d70e4441fab267315 (patch)
tree	a05d95c0953a3cf475097f4f8fb20b5383cb45bf
parent	a74ed93e5eba524e87271c702043c9cddae27d9d (diff)
download	spreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.tar.gz spreadsheet-parsexlsx-70e0ccd7243ee9448a36118d70e4441fab267315.zip