Data.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /* Data.cpp
  2. *
  3. * Copyright (C) 1992-2006,2008-2018 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. * See the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "Collection.h"
  19. Thing_implement (Daata, Thing, 0);
  20. structMelderDir Data_directoryBeingRead { };
  21. void structDaata :: v_copy (Daata /* thee */) {
  22. }
  23. bool structDaata :: v_equal (Daata /* thee */) {
  24. return true;
  25. } // names of "identical" objects are allowed to be different
  26. bool structDaata :: v_canWriteAsEncoding (int /* encoding */) {
  27. return true;
  28. }
  29. void structDaata :: v_writeText (MelderFile /* openFile */) {
  30. }
  31. void structDaata :: v_readText (MelderReadText, int /* formatVersion */) {
  32. }
  33. void structDaata :: v_writeBinary (FILE *) {
  34. }
  35. void structDaata :: v_readBinary (FILE *, int /*formatVersion*/) {
  36. }
  37. autoDaata _Data_copy (Daata me) {
  38. try {
  39. if (! me) return autoDaata();
  40. autoDaata thee = Thing_newFromClass (my classInfo).static_cast_move <structDaata> ();
  41. my v_copy (thee.get());
  42. Thing_setName (thee.get(), my name.get());
  43. return thee;
  44. } catch (MelderError) {
  45. Melder_throw (me, U": not copied.");
  46. }
  47. }
  48. bool Data_equal (Daata me, Daata thee) {
  49. if (my classInfo != thy classInfo) return false; // different class: not equal
  50. int offset = sizeof (struct structDaata); // we already compared the methods, and are going to skip the names
  51. if (! memcmp ((char *) me + offset, (char *) thee + offset, my classInfo -> size - offset)) // BUG: not necessarily portable
  52. return true; // no shallow differences
  53. return my v_equal (thee);
  54. }
  55. bool Data_canWriteAsEncoding (Daata me, int encoding) {
  56. return my v_canWriteAsEncoding (encoding);
  57. }
  58. bool Data_canWriteText (Daata me) {
  59. return my v_writable ();
  60. }
  61. void Data_writeText (Daata me, MelderFile openFile) {
  62. my v_writeText (openFile);
  63. if (ferror (openFile -> filePointer))
  64. Melder_throw (U"I/O error.");
  65. }
  66. MelderFile Data_createTextFile (Daata me, MelderFile file, bool verbose) {
  67. autoMelderFile mfile = MelderFile_create (file);
  68. #if defined (_WIN32)
  69. file -> requiresCRLF = true;
  70. #endif
  71. file -> verbose = verbose;
  72. file -> outputEncoding = (int) Melder_getOutputEncoding ();
  73. if (file -> outputEncoding == (int) kMelder_textOutputEncoding::ASCII_THEN_UTF16)
  74. file -> outputEncoding = Data_canWriteAsEncoding (me, kMelder_textOutputEncoding_ASCII) ?
  75. kMelder_textOutputEncoding_ASCII : (int) kMelder_textOutputEncoding::UTF16;
  76. else if (file -> outputEncoding == (int) kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16)
  77. file -> outputEncoding = Data_canWriteAsEncoding (me, kMelder_textOutputEncoding_ISO_LATIN1) ?
  78. kMelder_textOutputEncoding_ISO_LATIN1 : (int) kMelder_textOutputEncoding::UTF16;
  79. if (file -> outputEncoding == (int) kMelder_textOutputEncoding::UTF16) {
  80. binputu16 (0xfeff, file -> filePointer);
  81. }
  82. return mfile.transfer();
  83. }
  84. static void _Data_writeToTextFile (Daata me, MelderFile file, bool verbose) {
  85. try {
  86. if (! Data_canWriteText (me))
  87. Melder_throw (U"Objects of class ", my classInfo -> className, U" cannot be written to a text file.");
  88. autoMelderFile mfile = Data_createTextFile (me, file, verbose);
  89. #ifndef _WIN32
  90. flockfile (file -> filePointer); // BUG
  91. #endif
  92. MelderFile_write (file, U"File type = \"ooTextFile\"\nObject class = \"", my classInfo -> className);
  93. if (my classInfo -> version > 0)
  94. MelderFile_write (file, U" ", my classInfo -> version);
  95. MelderFile_write (file, U"\"\n");
  96. Data_writeText (me, file);
  97. MelderFile_writeCharacter (file, U'\n');
  98. #ifndef _WIN32
  99. if (file -> filePointer) funlockfile (file -> filePointer);
  100. #endif
  101. mfile.close ();
  102. } catch (MelderError) {
  103. #ifndef _WIN32
  104. if (file -> filePointer) funlockfile (file -> filePointer); // the file pointer is null before Data_createTextFile() and after mfile.close()
  105. #endif
  106. throw;
  107. }
  108. }
  109. void Data_writeToTextFile (Daata me, MelderFile file) {
  110. try {
  111. _Data_writeToTextFile (me, file, true);
  112. } catch (MelderError) {
  113. Melder_throw (me, U": not written to text file ", file, U".");
  114. }
  115. }
  116. void Data_writeToShortTextFile (Daata me, MelderFile file) {
  117. try {
  118. _Data_writeToTextFile (me, file, false);
  119. } catch (MelderError) {
  120. Melder_throw (me, U": not written to short text file ", file, U".");
  121. }
  122. }
  123. bool Data_canWriteBinary (Daata me) {
  124. return my v_writable ();
  125. }
  126. void Data_writeBinary (Daata me, FILE *f) {
  127. my v_writeBinary (f);
  128. if (ferror (f))
  129. Melder_throw (U"I/O error.");
  130. }
  131. void Data_writeToBinaryFile (Daata me, MelderFile file) {
  132. try {
  133. if (! Data_canWriteBinary (me))
  134. Melder_throw (U"Objects of class ", my classInfo -> className, U" cannot be written to a generic binary file.");
  135. autoMelderFile mfile = MelderFile_create (file);
  136. if (fprintf (file -> filePointer, "ooBinaryFile") < 0)
  137. Melder_throw (U"Cannot write first bytes of file.");
  138. binputw8 (
  139. my classInfo -> version > 0 ?
  140. Melder_cat (my classInfo -> className, U" ", my classInfo -> version) :
  141. my classInfo -> className,
  142. file -> filePointer);
  143. Data_writeBinary (me, file -> filePointer);
  144. mfile.close ();
  145. } catch (MelderError) {
  146. Melder_throw (me, U": not written to binary file ", file, U".");
  147. }
  148. }
  149. bool Data_canReadText (Daata me) {
  150. return my v_writable ();
  151. }
  152. void Data_readText (Daata me, MelderReadText text, int formatVersion) {
  153. try {
  154. my v_readText (text, formatVersion);
  155. my v_repair ();
  156. } catch (MelderError) {
  157. Melder_throw (Thing_className (me), U" not read.");
  158. }
  159. }
  160. autoDaata Data_readFromTextFile (MelderFile file) {
  161. try {
  162. autoMelderReadText text = MelderReadText_createFromFile (file);
  163. const mutablestring32 line = MelderReadText_readLine (text.get());
  164. if (! line)
  165. Melder_throw (U"No lines.");
  166. /*
  167. Allow for a future version of text files (we have no plans).
  168. This check was written on 2017-09-10.
  169. See below at `Data_readFromBinaryFile` for a more serious proposal.
  170. */
  171. if (str32str (line, U"ooText2File"))
  172. Melder_throw (U"This Praat version cannot read this Praat file. Please download a newer version of Praat.");
  173. char32 *end = str32str (line, U"ooTextFile"); // oo format?
  174. autoDaata me;
  175. int formatVersion;
  176. if (end) {
  177. autostring32 klas = texgetw16 (text.get());
  178. me = Thing_newFromClassName (klas.get(), & formatVersion).static_cast_move <structDaata> ();
  179. } else {
  180. end = str32str (line, U"TextFile");
  181. if (! end)
  182. Melder_throw (U"Not an old-type text file; should not occur.");
  183. *end = U'\0';
  184. me = Thing_newFromClassName (line, nullptr).static_cast_move <structDaata> ();
  185. formatVersion = -1; // old version
  186. }
  187. MelderFile_getParentDir (file, & Data_directoryBeingRead);
  188. Data_readText (me.get(), text.get(), formatVersion);
  189. file -> format = structMelderFile :: Format :: text;
  190. return me;
  191. } catch (MelderError) {
  192. Melder_throw (U"Data not read from text file ", file, U".");
  193. }
  194. }
  195. bool Data_canReadBinary (Daata me) {
  196. return my v_writable ();
  197. }
  198. void Data_readBinary (Daata me, FILE *f, int formatVersion) {
  199. try {
  200. my v_readBinary (f, formatVersion);
  201. if (feof (f))
  202. Melder_throw (U"Early end of file.");
  203. if (ferror (f))
  204. Melder_throw (U"I/O error.");
  205. my v_repair ();
  206. } catch (MelderError) {
  207. Melder_throw (Thing_className (me), U" not read.");
  208. }
  209. }
  210. autoDaata Data_readFromBinaryFile (MelderFile file) {
  211. try {
  212. autofile f = Melder_fopen (file, "rb");
  213. char line [200];
  214. size_t n = fread (line, 1, 199, f); line [n] = '\0';
  215. /*
  216. Allow for a future version of binary files, which can handle 64-bit integers
  217. and are perhaps written in little-endian format.
  218. This check was written on 2017-09-10, and should stay for at least a year;
  219. ooBinary2 files can therefore be implemented from some moment after 2018-09-10.
  220. Please compare with `Data_readFromTextFile` above.
  221. */
  222. if (strstr (line, "ooBinary2File"))
  223. Melder_throw (U"This Praat version cannot read this Praat file. Please download a newer version of Praat.");
  224. char *end = strstr (line, "ooBinaryFile");
  225. autoDaata me;
  226. int formatVersion;
  227. if (end) {
  228. fseek (f, strlen ("ooBinaryFile"), 0);
  229. autostring8 klas = bingets8 (f);
  230. me = Thing_newFromClassName (Melder_peek8to32 (klas.get()), & formatVersion).static_cast_move <structDaata> ();
  231. } else {
  232. end = strstr (line, "BinaryFile");
  233. if (! end) {
  234. Melder_throw (U"File ", file, U" is not a Data binary file.");
  235. }
  236. *end = '\0';
  237. me = Thing_newFromClassName (Melder_peek8to32 (line), nullptr).static_cast_move <structDaata> ();
  238. formatVersion = -1; // old version: override version number, which was set to 0 by newFromClassName
  239. rewind (f);
  240. fread (line, 1, (size_t) (end - line) + strlen ("BinaryFile"), f);
  241. }
  242. MelderFile_getParentDir (file, & Data_directoryBeingRead);
  243. Data_readBinary (me.get(), f, formatVersion);
  244. file -> format = structMelderFile :: Format :: binary;
  245. f.close (file);
  246. return me;
  247. } catch (MelderError) {
  248. Melder_throw (U"Data not read from binary file ", file, U".");
  249. }
  250. }
  251. static int defaultPublish (autoDaata /* me */) {
  252. return 0; // nothing published
  253. }
  254. static int (*thePublish) (autoDaata) = defaultPublish;
  255. int Data_publish (autoDaata me) {
  256. return thePublish (me.move());
  257. }
  258. void Data_setPublishProc (int (*publish) (autoDaata)) {
  259. thePublish = publish ? publish : defaultPublish;
  260. }
  261. /* Generic reading. */
  262. static int numFileTypeRecognizers = 0;
  263. static Data_FileTypeRecognizer fileTypeRecognizers [100];
  264. void Data_recognizeFileType (Data_FileTypeRecognizer recognizer) {
  265. Melder_assert (numFileTypeRecognizers < 100);
  266. fileTypeRecognizers [++ numFileTypeRecognizers] = recognizer;
  267. }
  268. autoDaata Data_readFromFile (MelderFile file) {
  269. char header [513];
  270. autofile f = Melder_fopen (file, "rb");
  271. size_t nread_u = fread (& header [0], 1, 512, f);
  272. integer nread = (integer) nread_u; // we know it cannot be more than 512
  273. f.close (file);
  274. header [nread] = 0;
  275. /***** 1. Is this file a text file as defined in Data.cpp? *****/
  276. if (nread > 11) {
  277. int numberOfBytesInFileType = 0;
  278. char *p = strstr (header, "TextFile");
  279. if (p) {
  280. numberOfBytesInFileType = 8;
  281. } else {
  282. p = strstr (header, "Text2File"); // future version?
  283. numberOfBytesInFileType = 9;
  284. }
  285. if (p && p - header < nread - numberOfBytesInFileType && p - header < 40)
  286. return Data_readFromTextFile (file);
  287. }
  288. if (nread > 22) {
  289. char headerCopy [101];
  290. memcpy (headerCopy, header, 100);
  291. headerCopy [100] = '\0';
  292. for (int i = 0; i < 100; i ++)
  293. if (headerCopy [i] == '\0') headerCopy [i] = '\001';
  294. char *p = strstr (headerCopy, "T\001e\001x\001t\001F\001i\001l\001e");
  295. if (p && p - headerCopy < nread - 15 && p - headerCopy < 80)
  296. return Data_readFromTextFile (file);
  297. }
  298. /***** 2. Is this file a binary file as defined in Data.cpp? *****/
  299. if (nread > 13) {
  300. int numberOfBytesInFileType = 0;
  301. char *p = strstr (header, "BinaryFile");
  302. if (p) {
  303. numberOfBytesInFileType = 10;
  304. } else {
  305. p = strstr (header, "Binary2File"); // future version
  306. numberOfBytesInFileType = 11;
  307. }
  308. if (p && p - header < nread - numberOfBytesInFileType && p - header < 40)
  309. return Data_readFromBinaryFile (file);
  310. }
  311. /***** 3. Is this file of a type for which a recognizer has been installed? *****/
  312. MelderFile_getParentDir (file, & Data_directoryBeingRead);
  313. for (int i = 1; i <= numFileTypeRecognizers; i ++) {
  314. autoDaata object = fileTypeRecognizers [i] (nread, header, file);
  315. if (object) {
  316. if (object -> classInfo == classDaata) // dummy object? the recognizer could have had a side effect, such as drawing a picture
  317. return autoDaata ();
  318. return object;
  319. }
  320. }
  321. /***** 4. Is this a common text file? *****/
  322. int i = 0;
  323. for (; i < nread; i ++)
  324. if (header [i] < 32 || header [i] > 126) // not ASCII? (note: this expression happens to work correctly for both signed and unsigned char)
  325. break;
  326. if (i >= nread) return Data_readFromTextFile (file);
  327. Melder_throw (U"File ", file, U" not recognized.");
  328. }
  329. /* Recursive routines for working with struct members. */
  330. int Data_Description_countMembers (Data_Description structDescription) {
  331. int count = 0;
  332. for (Data_Description desc = structDescription; desc -> name; desc ++)
  333. count ++;
  334. if (structDescription [0]. type == inheritwa) {
  335. Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
  336. if (parentDescription)
  337. return count + Data_Description_countMembers (parentDescription);
  338. }
  339. return count;
  340. }
  341. Data_Description Data_Description_findMatch (Data_Description structDescription, conststring32 name) {
  342. for (Data_Description desc = structDescription; desc -> name; desc ++)
  343. if (str32equ (name, desc -> name)) return desc;
  344. if (structDescription [0]. type == inheritwa) {
  345. Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
  346. if (parentDescription)
  347. return Data_Description_findMatch (parentDescription, name);
  348. }
  349. return nullptr; // not found
  350. }
  351. Data_Description Data_Description_findNumberUse (Data_Description structDescription, conststring32 string) {
  352. for (Data_Description desc = structDescription; desc -> name; desc ++) {
  353. if (desc -> max1 && str32equ (desc -> max1, string)) return desc;
  354. if (desc -> max2 && str32equ (desc -> max2, string)) return desc;
  355. }
  356. if (structDescription [0]. type == inheritwa) {
  357. Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
  358. if (parentDescription)
  359. return Data_Description_findNumberUse (parentDescription, string);
  360. }
  361. return nullptr;
  362. }
  363. /* Retrieving data from object + description. */
  364. int64 Data_Description_integer (void *address, Data_Description description) {
  365. switch (description -> type) {
  366. case bytewa: return * (signed char *) ((char *) address + description -> offset);
  367. case int16wa: return * (int16 *) ((char *) address + description -> offset);
  368. case intwa: return * (int *) ((char *) address + description -> offset);
  369. case integerwa: return * (integer *) ((char *) address + description -> offset);
  370. case ubytewa: return * (unsigned char *) ((char *) address + description -> offset);
  371. case uintwa: return * (unsigned int *) ((char *) address + description -> offset);
  372. case uintegerwa: return (int64) * (uinteger *) ((char *) address + description -> offset); // ignore numbers above 2^63 - 1
  373. case questionwa: return * (bool *) ((char *) address + description -> offset);
  374. case objectwa: return (* (Collection *) ((char *) address + description -> offset))->size; // FIXME: alignment not guaranteed
  375. case collectionofwa: return ( (Collection) ((char *) address + description -> offset))->size; // FIXME: alignment not guaranteed
  376. case collectionwa: return (* (Collection *) ((char *) address + description -> offset))->size; // FIXME: alignment not guaranteed
  377. default: return 0;
  378. }
  379. }
  380. int Data_Description_evaluateInteger (void *structAddress, Data_Description structDescription,
  381. conststring32 formula, integer *result)
  382. {
  383. if (! formula) { // this was a VECTOR_FROM array
  384. *result = 1;
  385. return 1;
  386. }
  387. if (formula [0] >= U'a' && formula [0] <= U'z') {
  388. char32 buffer [100], *minus1, *psize;
  389. Data_Description sizeDescription;
  390. str32cpy (buffer, formula);
  391. if ((minus1 = str32str (buffer, U" - 1")) != nullptr)
  392. *minus1 = U'\0'; // strip trailing " - 1", but remember
  393. if ((psize = str32str (buffer, U" -> size")) != nullptr)
  394. *psize = U'\0'; // strip trailing " -> size"
  395. if (! (sizeDescription = Data_Description_findMatch (structDescription, buffer))) {
  396. *result = 0;
  397. return 0 /*Melder_error ("Cannot find member \"%ls\".", buffer)*/;
  398. }
  399. *result = Data_Description_integer (structAddress, sizeDescription);
  400. if (minus1) *result -= 1;
  401. } else {
  402. *result = Melder_atoi (formula);
  403. }
  404. return 1;
  405. }
  406. /* End of file Data.cpp */