SpreadsheetReader_ODS.php 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. <?php
  2. /**
  3. * Class for parsing ODS files
  4. *
  5. * @author Martins Pilsetnieks
  6. */
  7. class SpreadsheetReader_ODS implements Iterator, Countable
  8. {
  9. private $Options = array(
  10. 'TempDir' => '',
  11. 'ReturnDateTimeObjects' => false
  12. );
  13. /**
  14. * @var string Path to temporary content file
  15. */
  16. private $ContentPath = '';
  17. /**
  18. * @var XMLReader XML reader object
  19. */
  20. private $Content = false;
  21. /**
  22. * @var array Data about separate sheets in the file
  23. */
  24. private $Sheets = false;
  25. private $CurrentRow = null;
  26. /**
  27. * @var int Number of the sheet we're currently reading
  28. */
  29. private $CurrentSheet = 0;
  30. private $Index = 0;
  31. private $TableOpen = false;
  32. private $RowOpen = false;
  33. /**
  34. * @param string Path to file
  35. * @param array Options:
  36. * TempDir => string Temporary directory path
  37. * ReturnDateTimeObjects => bool True => dates and times will be returned as PHP DateTime objects, false => as strings
  38. */
  39. public function __construct($Filepath, array $Options = null)
  40. {
  41. if (!is_readable($Filepath))
  42. {
  43. throw new Exception('SpreadsheetReader_ODS: File not readable ('.$Filepath.')');
  44. }
  45. $this -> TempDir = isset($Options['TempDir']) && is_writable($Options['TempDir']) ?
  46. $Options['TempDir'] :
  47. sys_get_temp_dir();
  48. $this -> TempDir = rtrim($this -> TempDir, DIRECTORY_SEPARATOR);
  49. $this -> TempDir = $this -> TempDir.DIRECTORY_SEPARATOR.uniqid().DIRECTORY_SEPARATOR;
  50. $Zip = new ZipArchive;
  51. $Status = $Zip -> open($Filepath);
  52. if ($Status !== true)
  53. {
  54. throw new Exception('SpreadsheetReader_ODS: File not readable ('.$Filepath.') (Error '.$Status.')');
  55. }
  56. if ($Zip -> locateName('content.xml') !== false)
  57. {
  58. $Zip -> extractTo($this -> TempDir, 'content.xml');
  59. $this -> ContentPath = $this -> TempDir.'content.xml';
  60. }
  61. $Zip -> close();
  62. if ($this -> ContentPath && is_readable($this -> ContentPath))
  63. {
  64. $this -> Content = new XMLReader;
  65. $this -> Content -> open($this -> ContentPath);
  66. $this -> Valid = true;
  67. }
  68. }
  69. /**
  70. * Destructor, destroys all that remains (closes and deletes temp files)
  71. */
  72. public function __destruct()
  73. {
  74. if ($this -> Content && $this -> Content instanceof XMLReader)
  75. {
  76. $this -> Content -> close();
  77. unset($this -> Content);
  78. }
  79. if (file_exists($this -> ContentPath))
  80. {
  81. @unlink($this -> ContentPath);
  82. unset($this -> ContentPath);
  83. }
  84. }
  85. /**
  86. * Retrieves an array with information about sheets in the current file
  87. *
  88. * @return array List of sheets (key is sheet index, value is name)
  89. */
  90. public function Sheets()
  91. {
  92. if ($this -> Sheets === false)
  93. {
  94. $this -> Sheets = array();
  95. if ($this -> Valid)
  96. {
  97. $this -> SheetReader = new XMLReader;
  98. $this -> SheetReader -> open($this -> ContentPath);
  99. while ($this -> SheetReader -> read())
  100. {
  101. if ($this -> SheetReader -> name == 'table:table')
  102. {
  103. $this -> Sheets[] = $this -> SheetReader -> getAttribute('table:name');
  104. $this -> SheetReader -> next();
  105. }
  106. }
  107. $this -> SheetReader -> close();
  108. }
  109. }
  110. return $this -> Sheets;
  111. }
  112. /**
  113. * Changes the current sheet in the file to another
  114. *
  115. * @param int Sheet index
  116. *
  117. * @return bool True if sheet was successfully changed, false otherwise.
  118. */
  119. public function ChangeSheet($Index)
  120. {
  121. $Index = (int)$Index;
  122. $Sheets = $this -> Sheets();
  123. if (isset($Sheets[$Index]))
  124. {
  125. $this -> CurrentSheet = $Index;
  126. $this -> rewind();
  127. return true;
  128. }
  129. return false;
  130. }
  131. // !Iterator interface methods
  132. /**
  133. * Rewind the Iterator to the first element.
  134. * Similar to the reset() function for arrays in PHP
  135. */
  136. public function rewind()
  137. {
  138. if ($this -> Index > 0)
  139. {
  140. // If the worksheet was already iterated, XML file is reopened.
  141. // Otherwise it should be at the beginning anyway
  142. $this -> Content -> close();
  143. $this -> Content -> open($this -> ContentPath);
  144. $this -> Valid = true;
  145. $this -> TableOpen = false;
  146. $this -> RowOpen = false;
  147. $this -> CurrentRow = null;
  148. }
  149. $this -> Index = 0;
  150. }
  151. /**
  152. * Return the current element.
  153. * Similar to the current() function for arrays in PHP
  154. *
  155. * @return mixed current element from the collection
  156. */
  157. public function current()
  158. {
  159. if ($this -> Index == 0 && is_null($this -> CurrentRow))
  160. {
  161. $this -> next();
  162. $this -> Index--;
  163. }
  164. return $this -> CurrentRow;
  165. }
  166. /**
  167. * Move forward to next element.
  168. * Similar to the next() function for arrays in PHP
  169. */
  170. public function next()
  171. {
  172. $this -> Index++;
  173. $this -> CurrentRow = array();
  174. if (!$this -> TableOpen)
  175. {
  176. $TableCounter = 0;
  177. $SkipRead = false;
  178. while ($this -> Valid = ($SkipRead || $this -> Content -> read()))
  179. {
  180. if ($SkipRead)
  181. {
  182. $SkipRead = false;
  183. }
  184. if ($this -> Content -> name == 'table:table' && $this -> Content -> nodeType != XMLReader::END_ELEMENT)
  185. {
  186. if ($TableCounter == $this -> CurrentSheet)
  187. {
  188. $this -> TableOpen = true;
  189. break;
  190. }
  191. $TableCounter++;
  192. $this -> Content -> next();
  193. $SkipRead = true;
  194. }
  195. }
  196. }
  197. if ($this -> TableOpen && !$this -> RowOpen)
  198. {
  199. while ($this -> Valid = $this -> Content -> read())
  200. {
  201. switch ($this -> Content -> name)
  202. {
  203. case 'table:table':
  204. $this -> TableOpen = false;
  205. $this -> Content -> next('office:document-content');
  206. $this -> Valid = false;
  207. break 2;
  208. case 'table:table-row':
  209. if ($this -> Content -> nodeType != XMLReader::END_ELEMENT)
  210. {
  211. $this -> RowOpen = true;
  212. break 2;
  213. }
  214. break;
  215. }
  216. }
  217. }
  218. if ($this -> RowOpen)
  219. {
  220. $LastCellContent = '';
  221. while ($this -> Valid = $this -> Content -> read())
  222. {
  223. switch ($this -> Content -> name)
  224. {
  225. case 'table:table-cell':
  226. if ($this -> Content -> nodeType == XMLReader::END_ELEMENT || $this -> Content -> isEmptyElement)
  227. {
  228. if ($this -> Content -> nodeType == XMLReader::END_ELEMENT)
  229. {
  230. $CellValue = $LastCellContent;
  231. }
  232. elseif ($this -> Content -> isEmptyElement)
  233. {
  234. $LastCellContent = '';
  235. $CellValue = $LastCellContent;
  236. }
  237. $this -> CurrentRow[] = $LastCellContent;
  238. if ($this -> Content -> getAttribute('table:number-columns-repeated') !== null)
  239. {
  240. $RepeatedColumnCount = $this -> Content -> getAttribute('table:number-columns-repeated');
  241. // Checking if larger than one because the value is already added to the row once before
  242. if ($RepeatedColumnCount > 1)
  243. {
  244. $this -> CurrentRow = array_pad($this -> CurrentRow, count($this -> CurrentRow) + $RepeatedColumnCount - 1, $LastCellContent);
  245. }
  246. }
  247. }
  248. else
  249. {
  250. $LastCellContent = '';
  251. }
  252. case 'text:p':
  253. if ($this -> Content -> nodeType != XMLReader::END_ELEMENT)
  254. {
  255. $LastCellContent = $this -> Content -> readString();
  256. }
  257. break;
  258. case 'table:table-row':
  259. $this -> RowOpen = false;
  260. break 2;
  261. }
  262. }
  263. }
  264. return $this -> CurrentRow;
  265. }
  266. /**
  267. * Return the identifying key of the current element.
  268. * Similar to the key() function for arrays in PHP
  269. *
  270. * @return mixed either an integer or a string
  271. */
  272. public function key()
  273. {
  274. return $this -> Index;
  275. }
  276. /**
  277. * Check if there is a current element after calls to rewind() or next().
  278. * Used to check if we've iterated to the end of the collection
  279. *
  280. * @return boolean FALSE if there's nothing more to iterate over
  281. */
  282. public function valid()
  283. {
  284. return $this -> Valid;
  285. }
  286. // !Countable interface method
  287. /**
  288. * Ostensibly should return the count of the contained items but this just returns the number
  289. * of rows read so far. It's not really correct but at least coherent.
  290. */
  291. public function count()
  292. {
  293. return $this -> Index + 1;
  294. }
  295. }
  296. ?>