From 51c7608a44671076096db3ce40b3710fb8affc37 Mon Sep 17 00:00:00 2001 From: Alexey Mazurin Date: Thu, 19 Mar 2015 10:21:25 +0400 Subject: Support of encrypted xlsx files Added support for reading encrypted xlsx files in accordance with [MS-OFFCRYPTO]: Office Document Cryptography Structure https://msdn.microsoft.com/en-us/library/cc313071(v=office.12).aspx Support of standard and agile encryption modified: lib/Spreadsheet/ParseXLSX.pm new file: t/data/encryption-agile-123q.xlsx new file: t/data/encryption-standard-default-password.xlsx new file: t/encryption.t --- lib/Spreadsheet/ParseXLSX.pm | 501 ++++++++++++++++++++++- t/data/encryption-agile-123q.xlsx | Bin 0 -> 13312 bytes t/data/encryption-standard-default-password.xlsx | Bin 0 -> 12288 bytes t/encryption.t | 30 ++ 4 files changed, 517 insertions(+), 14 deletions(-) create mode 100644 t/data/encryption-agile-123q.xlsx create mode 100644 t/data/encryption-standard-default-password.xlsx create mode 100644 t/encryption.t diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index ece46ab..a3c2c74 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -10,6 +10,15 @@ use Scalar::Util 'openhandle'; use Spreadsheet::ParseExcel 0.61; use XML::Twig; +use Crypt::Mode::CBC; +use Crypt::Mode::ECB; +use Digest::SHA (); + +use OLE::Storage_Lite; +use MIME::Base64 (); +use Encode (); +use File::Temp 'tempfile'; + =head1 SYNOPSIS use Spreadsheet::ParseXLSX; @@ -31,7 +40,18 @@ Returns a new parser instance. Takes no parameters. =cut sub new { - bless {}, shift; + my $self = bless {}, shift; + my ($param) = @_; + + if (ref($param) eq 'HASH') { + if (exists($param->{password})) { + $self->{password} = $param->{password}; + } + if (exists($param->{formatter})) { + $self->{formatter} = $param->{formatter}; + } + } + return $self; } =method parse($file, $formatter) @@ -45,26 +65,74 @@ The C<$formatter> argument is an optional formatter class as described in L{formatter}; + $password = $param1->{password}; + } else { + $formatter = $param1; + $password = $param2; + } + + $formatter = $formatter || $self->{formatter}; + $password = $password || $self->{password}; - my $zip = Archive::Zip->new; my $workbook = Spreadsheet::ParseExcel::Workbook->new; + if (openhandle($file)) { - bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh - $zip->readFromFileHandle($file) == Archive::Zip::AZ_OK - or die "Can't open filehandle as a zip file"; + if (ref($file) eq 'GLOB') { + read($file, $signature, 2); + seek($file, -2, IO::File::SEEK_CUR); + } else { + $file->read($signature, 2); + $file->seek(-2, IO::File::SEEK_CUR); + } $workbook->{File} = undef; + } elsif (!ref($file)) { + my $fh = IO::File->new(); + if ($fh->open("<$file")) { + $workbook->{File} = $file; + $fh->read($signature, 2); + $fh->seek(-2, IO::File::SEEK_CUR); + $file = $fh; + } } - elsif (!ref($file)) { - $zip->read($file) == Archive::Zip::AZ_OK - or die "Can't open file '$file' as a zip file"; - $workbook->{File} = $file; - } - else { - die "Argument to 'new' must be a filename or open filehandle"; + + if ($signature eq "\xd0\xcf") { + $tempfile = $file = Spreadsheet::ParseXLSX::decryptor->open($file, $password); } - return $self->_parse_workbook($zip, $workbook, $formatter); + eval { + my $zip = Archive::Zip->new; + if (openhandle($file)) { + bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh + $zip->readFromFileHandle($file) == Archive::Zip::AZ_OK + or die "Can't open filehandle as a zip file"; + } + elsif (!ref($file)) { + $zip->read($file) == Archive::Zip::AZ_OK + or die "Can't open file '$file' as a zip file"; + $workbook->{File} = $file; + } + else { + die "Argument to 'new' must be a filename or open filehandle"; + } + + $self->_parse_workbook($zip, $workbook, $formatter); + }; + if ($tempfile) { + unlink $tempfile; + }; + die $@ if $@; + + return $workbook; } sub _parse_workbook { @@ -870,6 +938,411 @@ sub _apply_tint { return scalar hls2rgb($h, $l, $s); } +package Spreadsheet::ParseXLSX::decryptor; + +use strict; +use warnings; + +sub open { + my $class = shift; + + my ($filename, $password) = @_; + + $password = $password || 'VelvetSweatshop'; + + my ($infoFile, $packageFile) = _getCompoundData($filename, ['EncryptionInfo', 'EncryptedPackage']); + + my $xlsx; + + eval { + my $infoFH = IO::File->new(); + $infoFH->open($infoFile); + $infoFH->binmode(); + + my $buffer; + $infoFH->read($buffer, 8); + my ($majorVers, $minorVers) = unpack('SS', $buffer); + + if ($majorVers == 4 && $minorVers == 4) { + $xlsx = agileDecryption($infoFH, $packageFile, $password); + } else { + $xlsx = standardDecryption($infoFH, $packageFile, $password); + } + $infoFH->close(); + }; + unlink $infoFile, $packageFile; + die $@ if $@; + + return $xlsx; +} + +sub _getCompoundData { + my $filename = shift; + my $names = shift; + + my @files; + + my $storage = OLE::Storage_Lite->new($filename); + + foreach my $name (@{$names}) { + my @data = $storage->getPpsSearch([OLE::Storage_Lite::Asc2Ucs($name)], 1, 1); + if ($#data < 0) { + push @files, undef; + } else { + my ($fh, $filename) = File::Temp::tempfile(); + my $out = IO::Handle->new_from_fd($fh, 'w') || die "TempFile error!"; + $out->write($data[0]->{Data}); + $out->close(); + push @files, $filename; + } + } + + return @files; +} + +sub standardDecryption { + my ($infoFH, $packageFile, $password) = @_; + + my $buffer; + my $n = $infoFH->read($buffer, 24); + + my ($encryptionHeaderSize, undef, undef, $algID, $algIDHash, $keyBits) = unpack('LLLLLL', $buffer); + + $infoFH->seek($encryptionHeaderSize - 0x14, IO::File::SEEK_CUR); + + $infoFH->read($buffer, 4); + + my $saltSize = unpack('L', $buffer); + + my ($salt, $encryptedVerifier, $verifierHashSize, $encryptedVerifierHash); + + $infoFH->read($salt, 16); + $infoFH->read($encryptedVerifier, 16); + + $infoFH->read($buffer, 4); + $verifierHashSize = unpack('L', $buffer); + + $infoFH->read($encryptedVerifierHash, 32); + $infoFH->close(); + + my ($cipherAlgorithm, $hashAlgorithm); + + if ($algID == 0x0000660E || $algID == 0x0000660F || $algID == 0x0000660E) { + $cipherAlgorithm = 'AES'; + } else { + die sprintf('Unsupported encryption algorithm: 0x%.8x', $algID); + } + + if ($algIDHash == 0x00008004) { + $hashAlgorithm = 'SHA-1'; + } else { + die sprintf('Unsupported hash algorithm: 0x%.8x', $algIDHash); + } + + my $decryptor = Spreadsheet::ParseXLSX::decryptor::Standard->new({ + cipherAlgorithm => $cipherAlgorithm, + cipherChaining => 'ECB', + hashAlgorithm => $hashAlgorithm, + salt => $salt, + password => $password, + keyBits => $keyBits, + spinCount => 50000 + }); + + $decryptor->verifyPassword($encryptedVerifier, $encryptedVerifierHash); + + my $in = new IO::File; + $in->open("<$packageFile") || die 'File/handle opening error'; + $in->binmode(); + + my ($fh, $filename) = File::Temp::tempfile(); + binmode($fh); + my $out = IO::Handle->new_from_fd($fh, 'w') || die "TempFile error!"; + + my $inbuf; + $in->read($inbuf, 8); + my $fileSize = unpack('L', $inbuf); + + $decryptor->decryptFile($in, $out, 1024, $fileSize); + + $in->close(); + $out->close(); + + return $filename; +} + +sub agileDecryption { + my ($infoFH, $packageFile, $password) = @_; + + my $xml = XML::Twig->new; + $xml->parse($infoFH); + + my ($info) = $xml->find_nodes('//encryption/keyEncryptors/keyEncryptor/p:encryptedKey'); + + my $encryptedVerifierHashInput = MIME::Base64::decode($info->att('encryptedVerifierHashInput')); + my $encryptedVerifierHashValue = MIME::Base64::decode($info->att('encryptedVerifierHashValue')); + my $encryptedKeyValue = MIME::Base64::decode($info->att('encryptedKeyValue')); + + my $keyDecryptor = Spreadsheet::ParseXLSX::decryptor::Agile->new({ + cipherAlgorithm => $info->att('cipherAlgorithm'), + cipherChaining => $info->att('cipherChaining'), + hashAlgorithm => $info->att('hashAlgorithm'), + salt => MIME::Base64::decode($info->att('saltValue')), + password => $password, + keyBits => 0 + $info->att('keyBits'), + spinCount => 0 + $info->att('spinCount'), + blockSize => 0 + $info->att('blockSize') + }); + + $keyDecryptor->verifyPassword($encryptedVerifierHashInput, $encryptedVerifierHashValue); + + my $key = $keyDecryptor->decrypt($encryptedKeyValue, "\x14\x6e\x0b\xe7\xab\xac\xd0\xd6"); + + ($info) = $xml->find_nodes('//encryption/keyData'); + + my $fileDecryptor = Spreadsheet::ParseXLSX::decryptor::Agile->new({ + cipherAlgorithm => $info->att('cipherAlgorithm'), + cipherChaining => $info->att('cipherChaining'), + hashAlgorithm => $info->att('hashAlgorithm'), + salt => MIME::Base64::decode($info->att('saltValue')), + password => $password, + keyBits => 0 + $info->att('keyBits'), + blockSize => 0 + $info->att('blockSize') + }); + + my $in = new IO::File; + $in->open("<$packageFile") || die 'File/handle opening error'; + $in->binmode(); + + my ($fh, $filename) = File::Temp::tempfile(); + binmode($fh); + my $out = IO::Handle->new_from_fd($fh, 'w') || die "TempFile error!"; + + my $inbuf; + $in->read($inbuf, 8); + my $fileSize = unpack('L', $inbuf); + + $fileDecryptor->decryptFile($in, $out, 4096, $key, $fileSize); + + $in->close(); + $out->close(); + + return $filename; +} + +sub new { + my $class = shift; + my $self = shift; + + $self->{keyLength} = $self->{keyBits} / 8; + + if ($self->{hashAlgorithm} eq 'SHA512') { + $self->{hashProc} = \&Digest::SHA::sha512; + } elsif ($self->{hashAlgorithm} eq 'SHA-1') { + $self->{hashProc} = \&Digest::SHA::sha1; + } elsif ($self->{hashAlgorithm} eq 'SHA256') { + $self->{hashProc} = \&Digest::SHA::sha256; + } else { + die "Unsupported hash algorithm: $self->{hashAlgorithm}"; + } + + return bless $self, $class; +} + +package Spreadsheet::ParseXLSX::decryptor::Agile; + +use strict; +use warnings; + +use parent -norequire, 'Spreadsheet::ParseXLSX::decryptor'; + +sub new { + my $class = shift; + my $self = Spreadsheet::ParseXLSX::decryptor->new(@_); + bless $self, $class; +} + +sub decrypt { + my $self = shift; + my ($encryptedValue, $blockKey) = @_; + + my $key = $self->_generateDecryptionKey($blockKey); + my $iv = $self->_generateInitializationVector('', $self->{blockSize}); + my $cbc = Crypt::Mode::CBC->new($self->{cipherAlgorithm}, 0); + return $cbc->decrypt($encryptedValue, $key, $iv); +} + +sub _generateDecryptionKey { + my $self = shift; + my ($blockKey) = @_; + + my $hash; + + unless ($self->{pregeneratedKey}) { + $hash = $self->{hashProc}->($self->{salt} . Encode::encode('UTF-16LE', $self->{password})); + for (my $i = 0; $i < $self->{spinCount}; $i++) { + $hash = $self->{hashProc}->(pack('L', $i) . $hash); + } + $self->{pregeneratedKey} = $hash; + } + + $hash = $self->{hashProc}->($self->{pregeneratedKey} . $blockKey); + + if (length($hash) > $self->{keyLength}) { + $hash = substr($hash, 0, $self->{keyLength}); + } elsif (length($hash) < $self->{keyLength}) { + $hash .= "\x36" x ($self->{keyLength} - length($hash)); + } + return $hash; +} + +sub _generateInitializationVector { + my $self = shift; + my ($blockKey, $blockSize) = @_; + + my $iv; + if ($blockKey) { + $iv = $self->{hashProc}->($self->{salt} . $blockKey); + } else { + $iv = $self->{salt}; + } + + if (length($iv) > $blockSize) { + $iv = substr($iv, 0, $blockSize); + } elsif (length($iv) < $blockSize) { + $iv = $iv . ("\x36" x ($blockSize - length($iv))); + } + + return $iv; +} + +sub decryptFile { + my $self = shift; + my ($inFile, $outFile, $bufferLength, $key, $fileSize) = @_; + + my $cbc = Crypt::Mode::CBC->new($self->{cipherAlgorithm}, 0); + + my $inbuf; + my $i = 0; + + while (($fileSize > 0) && (my $inlen = $inFile->read($inbuf, $bufferLength))) { + my $blockId = pack('L', $i); + + my $iv = $self->_generateInitializationVector($blockId, $self->{blockSize}); + + if ($inlen < $bufferLength) { + $inbuf .= "\x00" x ($bufferLength - $inlen); + } + + my $outbuf = $cbc->decrypt($inbuf, $key, $iv); + if ($fileSize < $inlen) { + $inlen = $fileSize; + } + + $outFile->write($outbuf, $inlen); + $i++; + $fileSize -= $inlen; + } +} + +sub verifyPassword { + my $self = shift; + + my ($encryptedVerifier, $encryptedVerifierHash) = @_; + + my $encryptedVerifierHash0 = $self->{hashProc}->($self->decrypt($encryptedVerifier, "\xfe\xa7\xd2\x76\x3b\x4b\x9e\x79")); + $encryptedVerifierHash = $self->decrypt($encryptedVerifierHash, "\xd7\xaa\x0f\x6d\x30\x61\x34\x4e"); + + die "Wrong password: $self" unless ($encryptedVerifierHash0 eq $encryptedVerifierHash); +} + +package Spreadsheet::ParseXLSX::decryptor::Standard; + +use strict; +use warnings; + +use parent -norequire, 'Spreadsheet::ParseXLSX::decryptor'; + +sub new { + my $class = shift; + my $self = Spreadsheet::ParseXLSX::decryptor->new(@_); + bless $self, $class; +} + +sub decrypt { + my $self = shift; + my ($encryptedValue) = @_; + + my $key = $self->_generateDecryptionKey("\x00" x 4); + my $ecb = Crypt::Mode::ECB->new($self->{cipherAlgorithm}, 0); + return $ecb->decrypt($encryptedValue, $key); +} + +sub decryptFile { + my $self = shift; + my ($inFile, $outFile, $bufferLength, $fileSize) = @_; + + my $key = $self->_generateDecryptionKey("\x00" x 4); + my $ecb = Crypt::Mode::ECB->new($self->{cipherAlgorithm}, 0); + + my $inbuf; + my $i = 0; + + while (($fileSize > 0) && (my $inlen = $inFile->read($inbuf, $bufferLength))) { + if ($inlen < $bufferLength) { + $inbuf .= "\x00" x ($bufferLength - $inlen); + } + + my $outbuf = $ecb->decrypt($inbuf, $key); + if ($fileSize < $inlen) { + $inlen = $fileSize; + } + + $outFile->write($outbuf, $inlen); + $i++; + $fileSize -= $inlen; + } +} + +sub _generateDecryptionKey { + my $self = shift; + my ($blockKey) = @_; + + my $hash; + unless ($self->{pregeneratedKey}) { + $hash = $self->{hashProc}->($self->{salt} . Encode::encode('UTF-16LE', $self->{password})); + for (my $i = 0; $i < $self->{spinCount}; $i++) { + $hash = $self->{hashProc}->(pack('L', $i) . $hash); + } + $self->{pregeneratedKey} = $hash; + } + + $hash = $self->{hashProc}->($self->{pregeneratedKey} . $blockKey); + + my $x1 = $self->{hashProc}->(("\x36" x 64) ^ $hash); + if (length($x1) >= $self->{keyLength}) { + $hash = substr($x1, 0, $self->{keyLength}); + } else { + my $x2 = $self->{hashProc}->(("\x5C" x 64) ^ $hash); + $hash = substr($x1 . $x2, 0, $self->{keyLength}); + } + + return $hash; +} + +sub verifyPassword { + my $self = shift; + + my ($encryptedVerifier, $encryptedVerifierHash) = @_; + + my $verifier = $self->decrypt($encryptedVerifier); + my $verifierHash = $self->decrypt($encryptedVerifierHash); + + my $verifierHash0 = $self->{hashProc}->($verifier); + + die "Wrong password: $self" unless ($verifierHash0 eq substr($verifierHash, 0, length($verifierHash0))); +} + =head1 INCOMPATIBILITIES This module returns data using classes from L, so for diff --git a/t/data/encryption-agile-123q.xlsx b/t/data/encryption-agile-123q.xlsx new file mode 100644 index 0000000..be3b561 Binary files /dev/null and b/t/data/encryption-agile-123q.xlsx differ diff --git a/t/data/encryption-standard-default-password.xlsx b/t/data/encryption-standard-default-password.xlsx new file mode 100644 index 0000000..a863c1b Binary files /dev/null and b/t/data/encryption-standard-default-password.xlsx differ diff --git a/t/encryption.t b/t/encryption.t new file mode 100644 index 0000000..6def0f4 --- /dev/null +++ b/t/encryption.t @@ -0,0 +1,30 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Test::More; + +use lib::Spreadsheet::ParseXLSX; + +my $parser = Spreadsheet::ParseXLSX->new(); +my $workbook = $parser->parse("t/data/encryption-agile-123q.xlsx", {password => '123q'}); + +my $worksheet; +my $cell; + +$worksheet = $workbook->worksheet(0); +ok(defined($workbook)); + +$cell = $worksheet->get_cell(1, 1); +ok(defined($cell) && $cell->value() eq 'abcdefgABCDEFG'); + + +open FH, "t/data/encryption-standard-default-password.xlsx"; +$workbook = $parser->parse(\*FH); + +ok(defined($workbook)); + +$worksheet = $workbook->worksheet(0); +$cell = $worksheet->get_cell(22, 8); +ok(defined($cell) && $cell->value() == 1911); + +done_testing; -- cgit v1.2.3-54-g00ecf From f1ea9d08ca15e498697dd8d28e11f555bcd5e19c Mon Sep 17 00:00:00 2001 From: Alexey Mazurin Date: Thu, 19 Mar 2015 10:58:09 +0400 Subject: t/encryption.t typo fix --- t/encryption.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/encryption.t b/t/encryption.t index 6def0f4..cbd913e 100644 --- a/t/encryption.t +++ b/t/encryption.t @@ -3,7 +3,7 @@ use strict; use warnings; use Test::More; -use lib::Spreadsheet::ParseXLSX; +use Spreadsheet::ParseXLSX; my $parser = Spreadsheet::ParseXLSX->new(); my $workbook = $parser->parse("t/data/encryption-agile-123q.xlsx", {password => '123q'}); -- cgit v1.2.3-54-g00ecf From a2104941f31a97bd3661b930dc135d2b019e4725 Mon Sep 17 00:00:00 2001 From: Alexey Mazurin Date: Thu, 19 Mar 2015 11:16:18 +0400 Subject: Removed tabs and extra spaces in ParseXLSX.pm --- lib/Spreadsheet/ParseXLSX.pm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index a3c2c74..dca3c7c 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -970,7 +970,7 @@ sub open { } $infoFH->close(); }; - unlink $infoFile, $packageFile; + unlink $infoFile, $packageFile; die $@ if $@; return $xlsx; @@ -1133,7 +1133,7 @@ sub agileDecryption { sub new { my $class = shift; my $self = shift; - + $self->{keyLength} = $self->{keyBits} / 8; if ($self->{hashAlgorithm} eq 'SHA512') { @@ -1250,10 +1250,10 @@ sub verifyPassword { my ($encryptedVerifier, $encryptedVerifierHash) = @_; - my $encryptedVerifierHash0 = $self->{hashProc}->($self->decrypt($encryptedVerifier, "\xfe\xa7\xd2\x76\x3b\x4b\x9e\x79")); - $encryptedVerifierHash = $self->decrypt($encryptedVerifierHash, "\xd7\xaa\x0f\x6d\x30\x61\x34\x4e"); + my $encryptedVerifierHash0 = $self->{hashProc}->($self->decrypt($encryptedVerifier, "\xfe\xa7\xd2\x76\x3b\x4b\x9e\x79")); + $encryptedVerifierHash = $self->decrypt($encryptedVerifierHash, "\xd7\xaa\x0f\x6d\x30\x61\x34\x4e"); - die "Wrong password: $self" unless ($encryptedVerifierHash0 eq $encryptedVerifierHash); + die "Wrong password: $self" unless ($encryptedVerifierHash0 eq $encryptedVerifierHash); } package Spreadsheet::ParseXLSX::decryptor::Standard; @@ -1340,7 +1340,7 @@ sub verifyPassword { my $verifierHash0 = $self->{hashProc}->($verifier); - die "Wrong password: $self" unless ($verifierHash0 eq substr($verifierHash, 0, length($verifierHash0))); + die "Wrong password: $self" unless ($verifierHash0 eq substr($verifierHash, 0, length($verifierHash0))); } =head1 INCOMPATIBILITIES -- cgit v1.2.3-54-g00ecf