From 3dbfe7e710feae4ac8fd9705cab46233291911d3 Mon Sep 17 00:00:00 2001 From: Thomas Mueller Date: Tue, 8 Jan 2013 00:20:11 +0100 Subject: [PATCH 1/4] generated zip archives now support utf8 file names refs #1086 --- lib/private/archive/zip.php | 196 +++++++++++++++++++++++++++++++++++- lib/private/files.php | 25 +++-- 2 files changed, 205 insertions(+), 16 deletions(-) diff --git a/lib/private/archive/zip.php b/lib/private/archive/zip.php index 8a866716a794..5f0e32e7afb4 100644 --- a/lib/private/archive/zip.php +++ b/lib/private/archive/zip.php @@ -12,12 +12,12 @@ class OC_Archive_ZIP extends OC_Archive{ */ private $zip=null; private $path; + private $modified=false; function __construct($source) { $this->path=$source; $this->zip=new ZipArchive(); - if($this->zip->open($source, ZipArchive::CREATE)) { - }else{ + if(!$this->zip->open($source, ZipArchive::CREATE)) { OCP\Util::writeLog('files_archive', 'Error while opening archive '.$source, OCP\Util::WARN); } } @@ -27,6 +27,7 @@ function __construct($source) { * @return bool */ function addFolder($path) { + $this->modified=true; return $this->zip->addEmptyDir($path); } /** @@ -42,7 +43,8 @@ function addFile($path, $source='') { $result=$this->zip->addFromString($path, $source); } if($result) { - $this->zip->close();//close and reopen to save the zip + $this->modified=true; + $this->close();//close and reopen to save the zip $this->zip->open($this->path); } return $result; @@ -57,6 +59,7 @@ function rename($source, $dest) { $source=$this->stripPath($source); $dest=$this->stripPath($dest); $this->zip->renameName($source, $dest); + $this->modified=true; } /** * get the uncompressed size of a file in the archive @@ -146,6 +149,7 @@ function fileExists($path) { * @return bool */ function remove($path) { + $this->modified=true; if($this->fileExists($path.'/')) { return $this->zip->deleteName($path.'/'); }else{ @@ -198,4 +202,190 @@ private function stripPath($path) { return $path; } } + + public function close() { + $this->zip->close(); + if ($this->modified) { + $this->fix_utf8_flags(); + $this->modified = false; + } + } + + + /** + * Add unicode flag to all files in archive. + * + * NOTE: single disk archives only, no ZIP64 support. + * Taken from https://github.com/skodak/moodle/blob/master/lib/filestorage/zip_archive.php + * Original license: GPL3 + * Re-licensed by Petr Škoda, https://github.com/skodak + * + * @return bool success, modifies the file contents + */ + protected function fix_utf8_flags() { + if (!file_exists($this->path)) { + return true; + } + + // Note: the ZIP structure is described at http://www.pkware.com/documents/casestudies/APPNOTE.TXT + if (!$fp = fopen($this->path, 'rb+')) { + return false; + } + if (!$filesize = filesize($this->path)) { + return false; + } + + $centralend = self::zip_get_central_end($fp, $filesize); + + if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xFFFFFFFF) { + // Single disk archives only and o support for ZIP64, sorry. + fclose($fp); + return false; + } + + fseek($fp, $centralend['offset']); + $data = fread($fp, $centralend['size']); + $pos = 0; + $files = array(); + for ($i = 0; $i < $centralend['entries']; $i++) { + $file = self::zip_parse_file_header($data, $centralend, $pos); + if ($file === false) { + // Wrong header, sorry. + fclose($fp); + return false; + } + + $newgeneral = $file['general'] | pow(2, 11); + if ($newgeneral === $file['general']) { + // Nothing to do with this file. + continue; + } + + if (preg_match('/^[a-zA-Z0-9_\-\.]*$/', $file['name'])) { + // ASCII file names are always ok. + continue; + } + if ($file['extra']) { + // Most probably not created by php zip ext, better to skip it. + continue; + } + if (self::fix_utf8($file['name']) !== $file['name']) { + // Does not look like a valid utf-8 encoded file name, skip it. + continue; + } + + // Read local file header. + fseek($fp, $file['local_offset']); + $localfile = unpack('Vsig/vversion_req/vgeneral/vmethod/vmtime/vmdate/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length', fread($fp, 30)); + if ($localfile['sig'] !== 0x04034b50) { + // Borked file! + fclose($fp); + return false; + } + + $file['local'] = $localfile; + $files[] = $file; + } + + foreach ($files as $file) { + $localfile = $file['local']; + // Add the unicode flag in central file header. + fseek($fp, $file['central_offset'] + 8); + if (ftell($fp) === $file['central_offset'] + 8) { + $newgeneral = $file['general'] | pow(2, 11); + fwrite($fp, pack('v', $newgeneral)); + } + // Modify local file header too. + fseek($fp, $file['local_offset'] + 6); + if (ftell($fp) === $file['local_offset'] + 6) { + $newgeneral = $localfile['general'] | pow(2, 11); + fwrite($fp, pack('v', $newgeneral)); + } + } + + fclose($fp); + return true; + } + + /** + * Read end of central signature of ZIP file. + * @internal + * @static + * @param resource $fp + * @param int $filesize + * @return array|bool + */ + public static function zip_get_central_end($fp, $filesize) { + // Find end of central directory record. + fseek($fp, $filesize - 22); + $info = unpack('Vsig', fread($fp, 4)); + if ($info['sig'] === 0x06054b50) { + // There is no comment. + fseek($fp, $filesize - 22); + $data = fread($fp, 22); + } else { + // There is some comment with 0xFF max size - that is 65557. + fseek($fp, $filesize - 65557); + $data = fread($fp, 65557); + } + + $pos = strpos($data, pack('V', 0x06054b50)); + if ($pos === false) { + // Borked ZIP structure! + return false; + } + $centralend = unpack('Vsig/vdisk/vdisk_start/vdisk_entries/ventries/Vsize/Voffset/vcomment_length', substr($data, $pos, 22)); + if ($centralend['comment_length']) { + $centralend['comment'] = substr($data, 22, $centralend['comment_length']); + } else { + $centralend['comment'] = ''; + } + + return $centralend; + } + + + /** + * Parse file header + * @internal + * @param string $data + * @param array $centralend + * @param int $pos (modified) + * @return array|bool file info + */ + public static function zip_parse_file_header($data, $centralend, &$pos) { + $file = unpack('Vsig/vversion/vversion_req/vgeneral/vmethod/Vmodified/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length/vcomment_length/vdisk/vattr/Vattrext/Vlocal_offset', substr($data, $pos, 46)); + $file['central_offset'] = $centralend['offset'] + $pos; + $pos = $pos + 46; + if ($file['sig'] !== 0x02014b50) { + // Borked ZIP structure! + return false; + } + $file['name'] = substr($data, $pos, $file['name_length']); + $pos = $pos + $file['name_length']; + $file['extra'] = array(); + $file['extra_data'] = ''; + if ($file['extra_length']) { + $extradata = substr($data, $pos, $file['extra_length']); + $file['extra_data'] = $extradata; + while (strlen($extradata) > 4) { + $extra = unpack('vid/vsize', substr($extradata, 0, 4)); + $extra['data'] = substr($extradata, 4, $extra['size']); + $extradata = substr($extradata, 4 + $extra['size']); + $file['extra'][] = $extra; + } + $pos = $pos + $file['extra_length']; + } + if ($file['comment_length']) { + $pos = $pos + $file['comment_length']; + $file['comment'] = substr($data, $pos, $file['comment_length']); + } else { + $file['comment'] = ''; + } + return $file; + } + + private static function fix_utf8($value) { + return iconv('UTF-8', 'UTF-8//IGNORE', $value); + } } diff --git a/lib/private/files.php b/lib/private/files.php index 6ffa14c0d91b..71ddabcf0317 100644 --- a/lib/private/files.php +++ b/lib/private/files.php @@ -42,6 +42,7 @@ static public function getDirectoryContent($path){ * @param string $dir * @param string $file ; separated list of files to download * @param boolean $only_header ; boolean to only send header of the request + * @internal param \file $file ; separated list of files to download */ public static function get($dir, $files, $only_header = false) { $xsendfile = false; @@ -59,18 +60,14 @@ public static function get($dir, $files, $only_header = false) { self::validateZipDownload($dir, $files); $executionTime = intval(ini_get('max_execution_time')); set_time_limit(0); - $zip = new ZipArchive(); $filename = OC_Helper::tmpFile('.zip'); - if ($zip->open($filename, ZIPARCHIVE::CREATE | ZIPARCHIVE::OVERWRITE)!==true) { - $l = OC_L10N::get('lib'); - throw new Exception($l->t('cannot open "%s"', array($filename))); - } + $zip = new OC_Archive_ZIP($filename); foreach ($files as $file) { $file = $dir . '/' . $file; if (\OC\Files\Filesystem::is_file($file)) { $tmpFile = \OC\Files\Filesystem::toTmpFile($file); self::$tmpFiles[] = $tmpFile; - $zip->addFile($tmpFile, basename($file)); + $zip->addFile(basename($file), $tmpFile); } elseif (\OC\Files\Filesystem::is_dir($file)) { self::zipAddDir($file, $zip); } @@ -91,12 +88,8 @@ public static function get($dir, $files, $only_header = false) { self::validateZipDownload($dir, $files); $executionTime = intval(ini_get('max_execution_time')); set_time_limit(0); - $zip = new ZipArchive(); $filename = OC_Helper::tmpFile('.zip'); - if ($zip->open($filename, ZIPARCHIVE::CREATE | ZIPARCHIVE::OVERWRITE)!==true) { - $l = OC_L10N::get('lib'); - throw new Exception($l->t('cannot open "%s"', array($filename))); - } ++ $zip = new OC_Archive_ZIP($filename); $file = $dir . '/' . $files; self::zipAddDir($file, $zip); $zip->close(); @@ -199,9 +192,15 @@ private static function addSendfileHeader($filename) { } } + /** + * Add directory to ZIP file + * @param $dir + * @param OC_Archive_ZIP $zip + * @param string $internalDir + */ public static function zipAddDir($dir, $zip, $internalDir='') { $dirname=basename($dir); - $zip->addEmptyDir($internalDir.$dirname); + $zip->addFolder($internalDir.$dirname); $internalDir.=$dirname.='/'; $files=OC_Files::getDirectoryContent($dir); foreach($files as $file) { @@ -210,7 +209,7 @@ public static function zipAddDir($dir, $zip, $internalDir='') { if(\OC\Files\Filesystem::is_file($file)) { $tmpFile=\OC\Files\Filesystem::toTmpFile($file); OC_Files::$tmpFiles[]=$tmpFile; - $zip->addFile($tmpFile, $internalDir.$filename); + $zip->addFile($internalDir.$filename, $tmpFile); }elseif(\OC\Files\Filesystem::is_dir($file)) { self::zipAddDir($file, $zip, $internalDir); } From 8d355e74d0471b75ccf6e799d5991c289c5ffa9e Mon Sep 17 00:00:00 2001 From: Vincent Petry Date: Tue, 10 Dec 2013 18:14:42 +0100 Subject: [PATCH 2/4] Now setting host OS flag of generated ZIP files The host OS flag needs to be set to "*nix" (0x03) instead of MS-DOS (0x00) so that some unzip tools like Linux "unzip" and "ark" can correctly interpret UTF-8 characters in file names --- lib/private/archive/zip.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/private/archive/zip.php b/lib/private/archive/zip.php index 5f0e32e7afb4..f2583b86e022 100644 --- a/lib/private/archive/zip.php +++ b/lib/private/archive/zip.php @@ -289,6 +289,20 @@ protected function fix_utf8_flags() { foreach ($files as $file) { $localfile = $file['local']; + // Set host OS + fseek($fp, $file['central_offset'] + 5); + if (ftell($fp) === $file['central_offset'] + 5) { + $hostOS = unpack('C', fread($fp, 1)); + // only change if ZipArchive didn't set anything + // this is a hint for some zip tools like Linux unzip + // and fixed encoding issues + if ($hostOS[1] === 0) { + fseek($fp, -1, SEEK_CUR); + // set to "*nix" (0x03) + fwrite($fp, pack('C', 3)); + } + } + // Add the unicode flag in central file header. fseek($fp, $file['central_offset'] + 8); if (ftell($fp) === $file['central_offset'] + 8) { From 44df20f9d75179258e0db375e827ae9ad04780d6 Mon Sep 17 00:00:00 2001 From: Vincent Petry Date: Wed, 11 Dec 2013 08:47:39 +0100 Subject: [PATCH 3/4] Fixed file/dir permissions in zip file entries --- lib/private/archive/zip.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/private/archive/zip.php b/lib/private/archive/zip.php index f2583b86e022..af26121d5abf 100644 --- a/lib/private/archive/zip.php +++ b/lib/private/archive/zip.php @@ -302,6 +302,25 @@ protected function fix_utf8_flags() { fwrite($fp, pack('C', 3)); } } + // Set correct file/dir permissions + fseek($fp, $file['central_offset'] + 38); + if (ftell($fp) === $file['central_offset'] + 38) { + $attrext = $file['attrext']; + if ($attrext === 0) { + // is dir ? + if (substr($file['name'], -1) === '/') { + // dir flag 04 + 0755 (rwxr-xr-x) octal + $attrext = 040755 << 0x10; + // MS-DOS directory flag + $attrext |= 0x10; + } + else { + // regular file flag 010 + 0644 (rw-r--r--) octal + $attrext = 0100644 << 0x10; + } + fwrite($fp, pack('V', $attrext)); + } + } // Add the unicode flag in central file header. fseek($fp, $file['central_offset'] + 8); From b45defd40d6e07c3557e2edaab66271bdfb443a1 Mon Sep 17 00:00:00 2001 From: Vincent Petry Date: Thu, 12 Dec 2013 11:43:24 +0100 Subject: [PATCH 4/4] Only process zip file once at the end Do not close and reopen for each file entry, but only do the utf-8 fixing once after the final close. --- lib/private/archive/zip.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/private/archive/zip.php b/lib/private/archive/zip.php index af26121d5abf..d1f74e6532e7 100644 --- a/lib/private/archive/zip.php +++ b/lib/private/archive/zip.php @@ -44,8 +44,6 @@ function addFile($path, $source='') { } if($result) { $this->modified=true; - $this->close();//close and reopen to save the zip - $this->zip->open($this->path); } return $result; }