melder_files.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161
  1. /* melder_files.cpp
  2. *
  3. * Copyright (C) 1992-2008,2010-2018 Paul Boersma, 2013 Tom Naughton
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. * See the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. * pb 2002/03/07 GPL
  20. * rvs&pb 2002/03/07 url support
  21. * pb 2002/03/10 Mach compatibility
  22. * pb 2003/09/12 MelderFile_getMacType
  23. * pb 2003/09/14 MelderDir_relativePathToFile
  24. * pb 2004/09/25 use /tmp as temporary directory
  25. * pb 2004/10/16 C++ compatible structs
  26. * pb 2005/11/07 Windows: use %USERPROFILE% rather than %HOMESHARE%%HOMEPATH%
  27. * rvs&pb 2005/11/18 url support
  28. * pb 2006/01/21 MelderFile_writeText does not create temporary file
  29. * pb 2006/08/03 openForWriting
  30. * rvs 2006/08/12 curl: do not fail on error
  31. * pb 2006/08/12 check whether unicodeName exists
  32. * pb 2006/10/28 erased MacOS 9 stuff
  33. * Erez Volk 2007/05/14 FLAC support
  34. * pb 2007/05/28 wchar
  35. * pb 2007/06/09 more wchar
  36. * pb 2007/08/12 more wchar
  37. * pb 2007/10/05 FSFindFolder
  38. * pb 2008/11/01 warn after finding final tabs (not just spaces) in file names
  39. * pb 2010/12/14 more high Unicode compatibility
  40. * pb 2011/04/05 C++
  41. */
  42. #if defined (UNIX)
  43. #include <unistd.h>
  44. #include <sys/stat.h>
  45. #endif
  46. #if defined (CURLPRESENT)
  47. #include <curl/curl.h>
  48. #endif
  49. #ifdef _WIN32
  50. #include <windows.h>
  51. #endif
  52. #if defined (macintosh)
  53. #include "macport_on.h"
  54. #include <CoreFoundation/CoreFoundation.h>
  55. #include "macport_off.h"
  56. #endif
  57. #include <errno.h>
  58. #include "melder.h"
  59. #include "../kar/UnicodeData.h"
  60. //#include "flac_FLAC_stream_encoder.h"
  61. extern "C" int FLAC__stream_encoder_finish (FLAC__StreamEncoder *);
  62. extern "C" void FLAC__stream_encoder_delete (FLAC__StreamEncoder *);
  63. #if defined (macintosh)
  64. #include <sys/stat.h>
  65. #define UNIX
  66. #include <unistd.h>
  67. #endif
  68. static char32 theShellDirectory [kMelder_MAXPATH+1];
  69. void Melder_rememberShellDirectory () {
  70. structMelderDir shellDir { };
  71. Melder_getDefaultDir (& shellDir);
  72. str32cpy (theShellDirectory, Melder_dirToPath (& shellDir));
  73. }
  74. conststring32 Melder_getShellDirectory () {
  75. return & theShellDirectory [0];
  76. }
  77. void Melder_str32To8bitFileRepresentation_inplace (conststring32 string, char *utf8) {
  78. #if defined (macintosh)
  79. /*
  80. On the Mac, the POSIX path name is stored in canonically decomposed UTF-8 encoding.
  81. The path is probably in precomposed UTF-32.
  82. So we first convert to UTF-16, then turn into CFString, then decompose, then convert to UTF-8.
  83. */
  84. UniChar unipath [kMelder_MAXPATH+1];
  85. int64 n = str32len (string), n_utf16 = 0;
  86. for (int64 i = 0; i < n; i ++) {
  87. char32 kar = (char32) string [i]; // change sign (bit 32 is never used)
  88. if (kar <= 0x00'FFFF) {
  89. unipath [n_utf16 ++] = (UniChar) kar; // including null byte; guarded truncation
  90. } else if (kar <= 0x10'FFFF) {
  91. kar -= 0x01'0000;
  92. unipath [n_utf16 ++] = (UniChar) (0x00'D800 | (kar >> 10)); // correct truncation, because UTF-32 has fewer than 27 bits (in fact it has 21 bits)
  93. unipath [n_utf16 ++] = (UniChar) (0x00'DC00 | (kar & 0x00'03FF));
  94. } else {
  95. unipath [n_utf16 ++] = UNICODE_REPLACEMENT_CHARACTER;
  96. }
  97. }
  98. unipath [n_utf16] = u'\0';
  99. CFStringRef cfpath = CFStringCreateWithCharacters (nullptr, unipath, n_utf16);
  100. CFMutableStringRef cfpath2 = CFStringCreateMutableCopy (nullptr, 0, cfpath);
  101. CFRelease (cfpath);
  102. CFStringNormalize (cfpath2, kCFStringNormalizationFormD); // Mac requires decomposed characters
  103. CFStringGetCString (cfpath2, (char *) utf8, kMelder_MAXPATH+1, kCFStringEncodingUTF8); // Mac POSIX requires UTF-8
  104. CFRelease (cfpath2);
  105. #elif defined (UNIX) || defined (__CYGWIN__)
  106. Melder_32to8_inplace (string, utf8);
  107. #elif defined (_WIN32)
  108. int n = str32len (string), i, j;
  109. for (i = 0, j = 0; i < n; i ++) {
  110. utf8 [j ++] = string [i] <= 255 ? string [i] : '?'; // the usual replacement on Windows
  111. }
  112. utf8 [j] = '\0';
  113. #else
  114. //#error Unsupported platform.
  115. #endif
  116. }
  117. #if defined (UNIX)
  118. void Melder_8bitFileRepresentationToStr32_inplace (const char *path8, char32 *path32) {
  119. #if defined (macintosh)
  120. CFStringRef cfpath = CFStringCreateWithCString (nullptr, path8, kCFStringEncodingUTF8);
  121. if (! cfpath) {
  122. /*
  123. Probably something wrong, like a disk was disconnected in the meantime.
  124. */
  125. Melder_8to32_inplace (path8, path32, kMelder_textInputEncoding::UTF8);
  126. Melder_throw (U"Unusual error finding or creating file ", path32, U".");
  127. }
  128. CFMutableStringRef cfpath2 = CFStringCreateMutableCopy (nullptr, 0, cfpath);
  129. CFRelease (cfpath);
  130. CFStringNormalize (cfpath2, kCFStringNormalizationFormC); // Praat requires composed characters
  131. integer n_utf16 = CFStringGetLength (cfpath2);
  132. integer n_utf32 = 0;
  133. for (integer i = 0; i < n_utf16; i ++) {
  134. char32 kar1 = CFStringGetCharacterAtIndex (cfpath2, i);
  135. if (kar1 >= 0x00'D800 && kar1 <= 0x00'DBFF) {
  136. char32 kar2 = (char32) CFStringGetCharacterAtIndex (cfpath2, ++ i); // convert up
  137. if (kar2 >= 0x00'DC00 && kar2 <= 0x00'DFFF) {
  138. kar1 = (((kar1 & 0x3FF) << 10) | (kar2 & 0x3FF)) + 0x01'0000;
  139. } else {
  140. kar1 = UNICODE_REPLACEMENT_CHARACTER;
  141. }
  142. }
  143. path32 [n_utf32 ++] = kar1;
  144. }
  145. path32 [n_utf32] = U'\0';
  146. CFRelease (cfpath2);
  147. #else
  148. Melder_8to32_inplace (path8, path32, kMelder_textInputEncoding::UTF8);
  149. #endif
  150. }
  151. #endif
  152. conststring32 MelderFile_name (MelderFile file) {
  153. #if defined (UNIX)
  154. char32 *slash = str32rchr (file -> path, U'/');
  155. return slash ? slash + 1 : file -> path;
  156. #elif defined (_WIN32)
  157. char32 *backslash = str32rchr (file -> path, U'\\');
  158. return backslash ? backslash + 1 : file -> path;
  159. #else
  160. return nullptr;
  161. #endif
  162. }
  163. conststring32 MelderDir_name (MelderDir dir) {
  164. #if defined (UNIX)
  165. char32 *slash = str32rchr (dir -> path, U'/');
  166. return slash ? slash + 1 : dir -> path;
  167. #elif defined (_WIN32)
  168. char32 *backslash = str32rchr (dir -> path, U'\\');
  169. return backslash ? backslash + 1 : dir -> path;
  170. #else
  171. return nullptr;
  172. #endif
  173. }
  174. void Melder_pathToDir (conststring32 path, MelderDir dir) {
  175. str32cpy (dir -> path, path);
  176. }
  177. void Melder_pathToFile (conststring32 path, MelderFile file) {
  178. /*
  179. * This handles complete path names only.
  180. *
  181. * Used if we know for sure that we have a complete path name,
  182. * i.e. if the program determined the name (fileselector, printing, prefs).
  183. */
  184. str32cpy (file -> path, path);
  185. }
  186. void Melder_relativePathToFile (conststring32 path, MelderFile file) {
  187. /*
  188. * This handles complete and partial path names,
  189. * and translates slashes to native directory separators.
  190. *
  191. * Used if we do not know for sure that we have a complete path name,
  192. * i.e. if the user determined the name (scripting).
  193. */
  194. #if defined (UNIX)
  195. /*
  196. * We assume that Unix complete path names start with a slash.
  197. */
  198. if (path [0] == U'~' && path [1] == U'/') {
  199. Melder_sprint (file -> path,kMelder_MAXPATH+1, Melder_peek8to32 (getenv ("HOME")), & path [1]);
  200. } else if (path [0] == U'/' || str32equ (path, U"<stdout>") || str32str (path, U"://")) {
  201. str32cpy (file -> path, path);
  202. } else {
  203. structMelderDir dir { };
  204. Melder_getDefaultDir (& dir); // BUG
  205. if (dir. path [0] == U'/' && dir. path [1] == U'\0') {
  206. Melder_sprint (file -> path,kMelder_MAXPATH+1, U"/", path);
  207. } else {
  208. Melder_sprint (file -> path,kMelder_MAXPATH+1, dir. path, U"/", path);
  209. }
  210. }
  211. #elif defined (_WIN32)
  212. /*
  213. * We assume that Win32 complete path names look like:
  214. * C:\WINDOWS\CTRL32.DLL
  215. * LPT1:
  216. * \\host\path
  217. */
  218. structMelderDir dir { };
  219. if (path [0] == U'~' && path [1] == U'/') {
  220. Melder_getHomeDir (& dir);
  221. Melder_sprint (file -> path,kMelder_MAXPATH+1, dir. path, & path [1]);
  222. for (;;) {
  223. char32 *slash = str32chr (file -> path, U'/');
  224. if (! slash) break;
  225. *slash = U'\\';
  226. }
  227. return;
  228. }
  229. if (str32chr (path, U'/') && ! str32str (path, U"://")) {
  230. char32 winPath [kMelder_MAXPATH+1];
  231. Melder_sprint (winPath,kMelder_MAXPATH+1, path);
  232. for (;;) {
  233. char32 *slash = str32chr (winPath, U'/');
  234. if (! slash) break;
  235. *slash = U'\\';
  236. }
  237. Melder_relativePathToFile (winPath, file);
  238. return;
  239. }
  240. if (str32chr (path, U':') || path [0] == U'\\' && path [1] == U'\\' || str32equ (path, U"<stdout>")) {
  241. Melder_sprint (file -> path,kMelder_MAXPATH+1, path);
  242. } else {
  243. Melder_getDefaultDir (& dir); // BUG
  244. Melder_sprint (file -> path,kMelder_MAXPATH+1,
  245. dir. path,
  246. dir. path [0] != U'\0' && dir. path [str32len (dir. path) - 1] == U'\\' ? U"" : U"\\",
  247. path);
  248. }
  249. #endif
  250. }
  251. conststring32 Melder_dirToPath (MelderDir dir) {
  252. return & dir -> path [0];
  253. }
  254. conststring32 Melder_fileToPath (MelderFile file) {
  255. return & file -> path [0];
  256. }
  257. void MelderFile_copy (MelderFile file, MelderFile copy) {
  258. str32cpy (copy -> path, file -> path);
  259. }
  260. void MelderDir_copy (MelderDir dir, MelderDir copy) {
  261. str32cpy (copy -> path, dir -> path);
  262. }
  263. bool MelderFile_equal (MelderFile file1, MelderFile file2) {
  264. return str32equ (file1 -> path, file2 -> path);
  265. }
  266. bool MelderDir_equal (MelderDir dir1, MelderDir dir2) {
  267. return str32equ (dir1 -> path, dir2 -> path);
  268. }
  269. void MelderFile_setToNull (MelderFile file) {
  270. file -> path [0] = U'\0';
  271. }
  272. bool MelderFile_isNull (MelderFile file) {
  273. return ! file || file -> path [0] == U'\0';
  274. }
  275. void MelderDir_setToNull (MelderDir dir) {
  276. dir -> path [0] = U'\0';
  277. }
  278. bool MelderDir_isNull (MelderDir dir) {
  279. return dir -> path [0] == U'\0';
  280. }
  281. void MelderDir_getFile (MelderDir parent, conststring32 fileName, MelderFile file) {
  282. #if defined (UNIX)
  283. if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
  284. Melder_sprint (file -> path,kMelder_MAXPATH+1, U"/", fileName);
  285. } else {
  286. Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, U"/", fileName);
  287. }
  288. #elif defined (_WIN32)
  289. if (str32rchr (file -> path, U'\\') - file -> path == str32len (file -> path) - 1) {
  290. Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, fileName);
  291. } else {
  292. Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, U"\\", fileName);
  293. }
  294. #endif
  295. }
  296. void MelderDir_relativePathToFile (MelderDir dir, conststring32 path, MelderFile file) {
  297. structMelderDir saveDir { };
  298. Melder_getDefaultDir (& saveDir);
  299. Melder_setDefaultDir (dir);
  300. Melder_relativePathToFile (path, file);
  301. Melder_setDefaultDir (& saveDir);
  302. }
  303. #ifndef UNIX
  304. static void Melder_getDesktop (MelderDir dir) {
  305. dir -> path [0] = U'\0';
  306. }
  307. #endif
  308. void MelderFile_getParentDir (MelderFile file, MelderDir parent) {
  309. #if defined (UNIX)
  310. /*
  311. The parent of /usr/hello.txt is /usr.
  312. The parent of /hello.txt is /.
  313. */
  314. str32cpy (parent -> path, file -> path);
  315. char32 *slash = str32rchr (parent -> path, U'/');
  316. if (slash) *slash = U'\0';
  317. if (parent -> path [0] == U'\0') str32cpy (parent -> path, U"/");
  318. #elif defined (_WIN32)
  319. /*
  320. The parent of C:\WINDOWS\CTRL.DLL is C:\WINDOWS.
  321. The parent of E:\Praat.exe is E:\.
  322. The parent of \\Swine\Apps\init.txt is \\Swine\Apps.
  323. The parent of \\Swine\init.txt is \\Swine\. (BUG ?)
  324. */
  325. str32cpy (parent -> path, file -> path);
  326. char32 *colon = str32chr (parent -> path, U':');
  327. if (colon) {
  328. char32 *backslash = str32rchr (parent -> path, U'\\');
  329. if (backslash) { // C:\WINDOWS\CTRL.DLL or C:\AUTOEXEC.BAT
  330. if (backslash - colon == 1) { // C:\AUTOEXEC.BAT
  331. * (backslash + 1) = U'\0'; // C:\ - !!! aargh this was a bug after converting this line to line comments
  332. } else { // C:\WINDOWS\CTRL.DLL
  333. *backslash = U'\0'; // C:\WINDOWS
  334. }
  335. } else { /* ??? */
  336. Melder_getDesktop (parent); // empty string
  337. }
  338. } else if (parent -> path [0] == U'\\' && parent -> path [1] == U'\\') {
  339. char32 *backslash = str32rchr (parent -> path + 2, U'\\');
  340. if (backslash) { // \\Swine\Apps\init.txt or \\Swine\init.txt
  341. char32 *leftBackslash = str32chr (parent -> path + 2, U'\\');
  342. if (backslash - leftBackslash == 0) { // \\Swine\init.txt
  343. * (backslash + 1) = U'\0'; // \\Swine\ - !!! dear developer, don't delete this hyphen, lest the line ends in a backslash
  344. } else { // \\Swine\Apps\init.txt
  345. *backslash = U'\0'; // \\Swine\Apps
  346. }
  347. } else { // \\init.txt ???
  348. Melder_getDesktop (parent); // empty string
  349. }
  350. } else { // unknown path type
  351. Melder_getDesktop (parent); // empty string
  352. }
  353. #endif
  354. }
  355. void MelderDir_getParentDir (MelderDir dir, MelderDir parent) {
  356. #if defined (UNIX)
  357. /*
  358. The parent of /usr/local is /usr.
  359. The parent of /usr is /.
  360. The parent of / is "".
  361. */
  362. str32cpy (parent -> path, dir -> path);
  363. char32 *slash = str32rchr (parent -> path, U'/');
  364. if (slash) {
  365. if (slash - parent -> path == 0) {
  366. if (slash [1] == U'\0') { // child is "/"
  367. parent -> path [0] = U'\0'; // parent is ""
  368. } else { // child is "/usr"
  369. slash [1] = '\0'; // parent is "/"
  370. }
  371. } else { // child is "/usr/local"
  372. *slash = U'\0'; // parent is "/usr"
  373. }
  374. } else {
  375. parent -> path [0] = U'\0'; // some failure; desktop
  376. }
  377. #elif defined (_WIN32)
  378. /*
  379. The parent of C:\WINDOWS is C:\.
  380. The parent of E:\ is the desktop.
  381. The parent of \\Swine\ is the desktop. (BUG ?)
  382. */
  383. str32cpy (parent -> path, dir -> path);
  384. char32 *colon = str32chr (parent -> path, U':');
  385. if (colon) {
  386. int length = str32len (parent -> path);
  387. char32 *backslash = str32rchr (parent -> path, U'\\');
  388. if (backslash) { // C:\WINDOWS\FONTS or C:\WINDOWS or C:\ - (cannot add a line comment with a backslash)
  389. if (backslash - parent -> path == length - 1) { // C:\ -
  390. Melder_getDesktop (parent); // empty string
  391. } else if (backslash - colon == 1) { // C:\WINDOWS
  392. * (backslash + 1) = U'\0'; // C:\ -
  393. } else { // C:\WINDOWS\FONTS
  394. *backslash = U'\0'; // C:\WINDOWS
  395. }
  396. } else { // LPT1: ???
  397. Melder_getDesktop (parent); // empty string
  398. }
  399. } else if (parent -> path [0] == U'\\' && parent -> path [1] == U'\\') {
  400. int length = str32len (parent -> path);
  401. char32 *backslash = str32rchr (parent -> path + 2, U'\\');
  402. if (backslash) { // \\Swine\Apps\Praats or \\Swine\Apps or \\Swine\ -
  403. if (backslash - parent -> path == length - 1) { // \\Swine\ -
  404. Melder_getDesktop (parent); // empty string
  405. } else { // \\Swine\Apps\Praats or \\Swine\Apps
  406. char32 *leftBackslash = str32chr (parent -> path + 2, U'\\');
  407. if (backslash - leftBackslash == 0) { // \\Swine\Apps
  408. * (backslash + 1) = U'\0'; // \\Swine\ -
  409. } else { // \\Swine\Apps\Praats
  410. *backslash = U'\0'; // \\Swine\Apps
  411. }
  412. }
  413. } else { // \\Swine ???
  414. Melder_getDesktop (parent); // empty string
  415. }
  416. } else { // unknown path type.
  417. Melder_getDesktop (parent); // empty string
  418. }
  419. #endif
  420. }
  421. bool MelderDir_isDesktop (MelderDir dir) {
  422. return dir -> path [0] == U'\0';
  423. }
  424. void MelderDir_getSubdir (MelderDir parent, conststring32 subdirName, MelderDir subdir) {
  425. #if defined (UNIX)
  426. if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
  427. Melder_sprint (subdir -> path,kMelder_MAXPATH+1, U"/", subdirName);
  428. } else {
  429. Melder_sprint (subdir -> path,kMelder_MAXPATH+1, parent -> path, U"/", subdirName);
  430. }
  431. #elif defined (_WIN32)
  432. int length = str32len (parent -> path);
  433. char32 *backslash = str32rchr (parent -> path, U'\\');
  434. if (backslash && backslash - parent -> path == length - 1) { // C:\ or \\Swine\ -
  435. Melder_sprint (subdir -> path, kMelder_MAXPATH+1, parent -> path, subdirName);
  436. } else { // C:\WINDOWS or \\Swine\Apps or even C:
  437. Melder_sprint (subdir -> path,kMelder_MAXPATH+1, parent -> path, U"\\", subdirName);
  438. }
  439. #endif
  440. }
  441. void Melder_getHomeDir (MelderDir homeDir) {
  442. #if defined (UNIX)
  443. char *home = getenv ("HOME");
  444. str32cpy (homeDir -> path, home ? Melder_peek8to32 (home) : U"/");
  445. #elif defined (_WIN32)
  446. WCHAR driveW [kMelder_MAXPATH+1], pathW [kMelder_MAXPATH+1];
  447. DWORD n = GetEnvironmentVariableW (L"USERPROFILE", pathW, kMelder_MAXPATH+1);
  448. if (n > kMelder_MAXPATH) Melder_throw (U"Home directory name too long.");
  449. if (n > 0) {
  450. Melder_sprint (homeDir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (pathW));
  451. return;
  452. }
  453. n = GetEnvironmentVariableW (L"HOMEDRIVE", driveW, kMelder_MAXPATH+1);
  454. if (n > kMelder_MAXPATH) Melder_throw (U"Home drive name too long.");
  455. if (n > 0) {
  456. GetEnvironmentVariable (L"HOMEPATH", pathW, kMelder_MAXPATH+1);
  457. Melder_sprint (homeDir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (driveW), Melder_peekWto32 (pathW));
  458. return;
  459. }
  460. MelderDir_setToNull (homeDir); // Windows 95 and 98: alas
  461. #endif
  462. }
  463. void Melder_getPrefDir (MelderDir prefDir) {
  464. #if defined (macintosh)
  465. structMelderDir homeDir { };
  466. Melder_getHomeDir (& homeDir);
  467. Melder_sprint (prefDir -> path,kMelder_MAXPATH+1, homeDir. path, U"/Library/Preferences");
  468. #elif defined (UNIX)
  469. /*
  470. * Preferences files go into the home directory.
  471. */
  472. Melder_getHomeDir (prefDir);
  473. #elif defined (_WIN32)
  474. /*
  475. * On Windows 95, preferences files went in the Windows directory.
  476. * On shared systems (NT, 2000, XP), preferences files go into the home directory.
  477. * TODO: at some point, these files should be moved to HOME\AppData\Roaming\Praat.
  478. */
  479. Melder_getHomeDir (prefDir);
  480. #endif
  481. }
  482. void Melder_getTempDir (MelderDir tempDir) {
  483. #if defined (macintosh)
  484. Melder_sprint (tempDir -> path,kMelder_MAXPATH+1, Melder_peek8to32 (getenv ("TMPDIR"))); // or append /TemporaryItems
  485. // confstr with _CS_DARWIN_USER_TEMP_DIR
  486. #else
  487. (void) tempDir;
  488. #endif
  489. }
  490. #ifdef CURLPRESENT
  491. static int curl_initialized = 0;
  492. static size_t write_URL_data_to_file (void *buffer, size_t size, size_t nmemb, void *userp) {
  493. return fwrite (buffer, size, nmemb, userp);
  494. }
  495. static size_t read_URL_data_from_file (void *buffer, size_t size, size_t nmemb, void *userp) {
  496. return fread (buffer, size, nmemb, userp);
  497. }
  498. #endif
  499. FILE * Melder_fopen (MelderFile file, const char *type) {
  500. if (MelderFile_isNull (file)) Melder_throw (U"Cannot open null file.");
  501. if (! Melder_isTracing)
  502. Melder_assert (str32equ (Melder_double (1.5), U"1.5")); // check locale settings; because of the required file portability Praat cannot stand "1,5"
  503. /*
  504. * On the Unix-like systems (including MacOS), the path has to be converted to 8-bit characters in UTF-8 encoding.
  505. * On MacOS, the characters also have to be decomposed.
  506. */
  507. char utf8path [kMelder_MAXPATH+1];
  508. Melder_str32To8bitFileRepresentation_inplace (file -> path, utf8path);
  509. FILE *f;
  510. file -> openForWriting = ( type [0] == 'w' || type [0] == 'a' || strchr (type, '+') );
  511. if (str32equ (file -> path, U"<stdout>") && file -> openForWriting) {
  512. f = stdout;
  513. #ifdef CURLPRESENT
  514. } else if (strstr (utf8path, "://") && file -> openForWriting) {
  515. Melder_assert (type [0] == 'w'); // reject "append" and "random" access
  516. f = tmpfile (); // open a temporary file for writing
  517. } else if (strstr (utf8path, "://") && ! file -> openForWriting) {
  518. CURLcode CURLreturn;
  519. CURL *CURLhandle;
  520. char errorbuffer [CURL_ERROR_SIZE] = "";
  521. f = tmpfile (); // open a temporary file for writing
  522. if (! curl_initialized) {
  523. CURLreturn = curl_global_init (CURL_GLOBAL_ALL);
  524. curl_initialized = 1;
  525. };
  526. CURLhandle = curl_easy_init (); // initialize session
  527. /*
  528. * Set up the connection parameters.
  529. */
  530. /* Debugging: Verbose messages */
  531. /* CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1); */
  532. /* Do not fail on error. */
  533. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FAILONERROR, 0);
  534. /* Store error messages in a buffer. */
  535. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_ERRORBUFFER, errorbuffer);
  536. /* The file stream to store the URL. */
  537. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FILE, f);
  538. /* The function to write to the file, necessary for Win32. */
  539. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_WRITEFUNCTION, write_URL_data_to_file);
  540. /* The actual URL to handle. */
  541. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_URL, utf8path);
  542. /* Get the URL and write it to the given file. */
  543. CURLreturn = curl_easy_perform (CURLhandle);
  544. /* Handle errors. */
  545. if (CURLreturn) {
  546. Melder_appendError (Melder_peek8to32 (errorbuffer));
  547. f = nullptr;
  548. };
  549. /* Clean up session. */
  550. curl_easy_cleanup (CURLhandle);
  551. /* Do something with the file. Why? */
  552. if (f) rewind (f);
  553. #endif
  554. } else {
  555. #if defined (_WIN32) && ! defined (__CYGWIN__)
  556. f = _wfopen (Melder_peek32toW (file -> path), Melder_peek32toW (Melder_peek8to32 (type)));
  557. #else
  558. f = fopen ((char *) utf8path, type);
  559. #endif
  560. }
  561. if (! f) {
  562. char32 *path = file -> path;
  563. Melder_appendError (U"Cannot ", type [0] == 'r' ? U"open" : type [0] == 'a' ? U"append to" : U"create",
  564. U" file ", file, U".");
  565. if (path [0] == U'\0')
  566. Melder_appendError (U"Hint: empty file name.");
  567. else if (path [0] == U' ' || path [0] == U'\t')
  568. Melder_appendError (U"Hint: file name starts with a space or tab.");
  569. else if (path [str32len (path) - 1] == U' ' || path [str32len (path) - 1] == U'\t')
  570. Melder_appendError (U"Hint: file name ends in a space or tab.");
  571. else if (str32chr (path, U'\n'))
  572. Melder_appendError (U"Hint: file name contains a newline symbol.");
  573. throw MelderError ();
  574. return nullptr;
  575. }
  576. return f;
  577. }
  578. void Melder_fclose (MelderFile file, FILE *f) {
  579. if (! f) return;
  580. #if defined (CURLPRESENT)
  581. if (str32str (file -> wpath, U"://") && file -> openForWriting) {
  582. unsigned char utf8path [kMelder_MAXPATH+1];
  583. Melder_str32To8bitFileRepresentation_inplace (file -> path, utf8path);
  584. /* Rewind the file. */
  585. if (f) rewind (f);
  586. CURLcode CURLreturn;
  587. CURL *CURLhandle;
  588. char errorbuffer [CURL_ERROR_SIZE] = "";
  589. /* Start global init (necessary only ONCE). */
  590. if (! curl_initialized) {
  591. CURLreturn = curl_global_init (CURL_GLOBAL_ALL);
  592. curl_initialized = 1;
  593. };
  594. CURLhandle = curl_easy_init (); /* Initialize session. */
  595. /*
  596. * Set up the connection parameters.
  597. */
  598. /* Debugging: Verbose messages */
  599. /* CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1); */
  600. /* Catch FILE: protocol errors. No solution yet */
  601. if (str32str (file -> path, U"file://") || str32str (file -> path, U"FILE://")) {
  602. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1);
  603. }
  604. /* Do not return Error pages, just fail. */
  605. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FAILONERROR, 1);
  606. /* Store error messages in a buffer. */
  607. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_ERRORBUFFER, errorbuffer);
  608. /* Send header. */
  609. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_HEADER, 1);
  610. /* Upload. */
  611. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_UPLOAD, 1);
  612. /* The actual URL to handle. */
  613. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_URL, utf8path);
  614. /* The function to write to the peer, necessary for Win32. */
  615. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_READFUNCTION, read_URL_data_from_file);
  616. CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_READDATA, f);
  617. /* Get the URL and write the file to it. */
  618. CURLreturn = curl_easy_perform (CURLhandle);
  619. /* Handle errors. */
  620. if (CURLreturn) {
  621. curl_easy_cleanup (CURLhandle);
  622. f = nullptr;
  623. Melder_throw (Melder_peek8to32 (errorbuffer), U"\n");
  624. };
  625. /* Clean up session */
  626. curl_easy_cleanup (CURLhandle);
  627. }
  628. #endif
  629. if (f != stdout && fclose (f) == EOF)
  630. Melder_throw (U"Error closing file ", file, U".");
  631. }
  632. void Melder_files_cleanUp () {
  633. #if defined (CURLPRESENT)
  634. if (curl_initialized) {
  635. curl_global_cleanup ();
  636. curl_initialized = 0;
  637. };
  638. #endif
  639. }
  640. bool MelderFile_exists (MelderFile file) {
  641. #if defined (UNIX)
  642. char utf8path [kMelder_MAXPATH+1];
  643. Melder_str32To8bitFileRepresentation_inplace (file -> path, utf8path);
  644. struct stat statistics;
  645. return ! stat (utf8path, & statistics);
  646. #else
  647. try {
  648. autofile f = Melder_fopen (file, "rb");
  649. f.close (file);
  650. return true;
  651. } catch (MelderError) {
  652. Melder_clearError ();
  653. return false;
  654. }
  655. #endif
  656. }
  657. bool MelderFile_readable (MelderFile file) {
  658. try {
  659. autofile f = Melder_fopen (file, "rb");
  660. f.close (file);
  661. return true;
  662. } catch (MelderError) {
  663. Melder_clearError ();
  664. return false;
  665. }
  666. }
  667. integer MelderFile_length (MelderFile file) {
  668. #if defined (UNIX)
  669. char utf8path [kMelder_MAXPATH+1];
  670. Melder_str32To8bitFileRepresentation_inplace (file -> path, utf8path);
  671. struct stat statistics;
  672. if (stat ((char *) utf8path, & statistics) != 0) return -1;
  673. return statistics. st_size;
  674. #else
  675. try {
  676. autofile f = Melder_fopen (file, "r");
  677. fseek (f, 0, SEEK_END);
  678. integer length = ftell (f);
  679. f.close (file);
  680. return length;
  681. } catch (MelderError) {
  682. Melder_clearError ();
  683. return -1;
  684. }
  685. #endif
  686. }
  687. void MelderFile_delete (MelderFile file) {
  688. if (! file) return;
  689. #if defined (UNIX)
  690. char utf8path [kMelder_MAXPATH+1];
  691. Melder_str32To8bitFileRepresentation_inplace (file -> path, utf8path);
  692. remove ((char *) utf8path);
  693. #elif defined (_WIN32)
  694. DeleteFile (Melder_peek32toW (file -> path));
  695. #endif
  696. }
  697. char32 * Melder_peekExpandBackslashes (conststring32 message) {
  698. static char32 names [11] [kMelder_MAXPATH+1];
  699. static int index = 0;
  700. if (++ index == 11) index = 0;
  701. char32 *to = & names [index] [0];
  702. for (const char32 *from = & message [0]; *from != '\0'; from ++, to ++) {
  703. *to = *from;
  704. if (*from == U'\\') { * ++ to = U'b'; * ++ to = U's'; }
  705. }
  706. *to = U'\0';
  707. return & names [index] [0];
  708. }
  709. conststring32 MelderFile_messageName (MelderFile file) {
  710. return Melder_cat (U"“", file -> path, U"”"); // BUG: is cat allowed here?
  711. }
  712. void Melder_getDefaultDir (MelderDir dir) {
  713. #if defined (UNIX)
  714. char path [kMelder_MAXPATH+1];
  715. getcwd (path, kMelder_MAXPATH+1);
  716. Melder_8bitFileRepresentationToStr32_inplace (path, dir -> path);
  717. #elif defined (_WIN32)
  718. static WCHAR dirPathW [kMelder_MAXPATH+1];
  719. GetCurrentDirectory (kMelder_MAXPATH+1, dirPathW);
  720. Melder_sprint (dir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (dirPathW));
  721. #endif
  722. }
  723. void Melder_setDefaultDir (MelderDir dir) {
  724. #if defined (UNIX)
  725. chdir (Melder_peek32to8 (dir -> path));
  726. #elif defined (_WIN32)
  727. SetCurrentDirectory (Melder_peek32toW (dir -> path));
  728. #endif
  729. }
  730. void MelderFile_setDefaultDir (MelderFile file) {
  731. structMelderDir dir { };
  732. MelderFile_getParentDir (file, & dir);
  733. Melder_setDefaultDir (& dir);
  734. }
  735. void Melder_createDirectory (MelderDir parent, conststring32 dirName, int mode) {
  736. #if defined (UNIX)
  737. structMelderFile file { };
  738. if (dirName [0] == U'/') {
  739. Melder_sprint (file. path,kMelder_MAXPATH+1, dirName); // absolute path
  740. } else if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
  741. Melder_sprint (file. path,kMelder_MAXPATH+1, U"/", dirName); // relative path in root directory
  742. } else {
  743. Melder_sprint (file. path,kMelder_MAXPATH+1, parent -> path, U"/", dirName); // relative path
  744. }
  745. char utf8path [kMelder_MAXPATH+1];
  746. Melder_str32To8bitFileRepresentation_inplace (file. path, utf8path);
  747. if (mkdir (utf8path, mode) == -1 && errno != EEXIST) // ignore if directory already exists
  748. Melder_throw (U"Cannot create directory ", & file, U".");
  749. #elif defined (_WIN32)
  750. structMelderFile file { };
  751. SECURITY_ATTRIBUTES sa;
  752. (void) mode;
  753. sa. nLength = sizeof (SECURITY_ATTRIBUTES);
  754. sa. lpSecurityDescriptor = nullptr;
  755. sa. bInheritHandle = false;
  756. if (str32chr (dirName, U':') || dirName [0] == U'/' && dirName [1] == U'/') {
  757. Melder_sprint (file. path,kMelder_MAXPATH+1, dirName); // absolute path
  758. } else {
  759. Melder_sprint (file. path,kMelder_MAXPATH+1, parent -> path, U"/", dirName); // relative path
  760. }
  761. if (! CreateDirectoryW (Melder_peek32toW (file. path), & sa) && GetLastError () != ERROR_ALREADY_EXISTS) // ignore if directory already exists
  762. Melder_throw (U"Cannot create directory ", & file, U".");
  763. #else
  764. //#error Unsupported operating system.
  765. #endif
  766. }
  767. static size_t fread_multi (char *buffer, size_t numberOfBytes, FILE *f) {
  768. off_t offset = 0;
  769. size_t numberOfBytesRead = 0;
  770. const size_t chunkSize = 1'000'000'000;
  771. while (numberOfBytes > chunkSize) {
  772. size_t numberOfBytesReadInChunk = fread (buffer + offset, sizeof (char), chunkSize, f);
  773. numberOfBytesRead += numberOfBytesReadInChunk;
  774. if (numberOfBytesReadInChunk < chunkSize)
  775. return numberOfBytesRead;
  776. numberOfBytes -= chunkSize;
  777. offset += chunkSize;
  778. }
  779. size_t numberOfBytesReadInLastChunk = fread (buffer + offset, sizeof (char), numberOfBytes, f);
  780. numberOfBytesRead += numberOfBytesReadInLastChunk;
  781. return numberOfBytesRead;
  782. }
  783. autostring32 MelderFile_readText (MelderFile file, autostring8 *string8) {
  784. try {
  785. int type = 0; // 8-bit
  786. autostring32 text;
  787. autofile f = Melder_fopen (file, "rb");
  788. if (fseeko (f, 0, SEEK_END) < 0)
  789. Melder_throw (U"Cannot count the bytes in the file.");
  790. Melder_assert (sizeof (off_t) >= 8);
  791. int64 length = ftello (f);
  792. rewind (f);
  793. if (length >= 2) {
  794. int firstByte = fgetc (f), secondByte = fgetc (f);
  795. if (firstByte == 0xFE && secondByte == 0xFF) {
  796. type = 1; // big-endian 16-bit
  797. } else if (firstByte == 0xFF && secondByte == 0xFE) {
  798. type = 2; // little-endian 16-bit
  799. } else if (firstByte == 0xEF && secondByte == 0xBB && length >= 3) {
  800. int thirdByte = fgetc (f);
  801. if (thirdByte == 0xBF)
  802. type = -1; // UTF-8 with BOM
  803. }
  804. }
  805. if (type <= 0) {
  806. if (type == -1) {
  807. length -= 3;
  808. fseeko (f, 3, SEEK_SET);
  809. } else {
  810. rewind (f); // length and type already set correctly.
  811. }
  812. autostring8 text8bit (length);
  813. Melder_assert (text8bit);
  814. size_t numberOfBytesRead = fread_multi (text8bit.get(), (size_t) length, f);
  815. Melder_require ((int64) numberOfBytesRead == length,
  816. U"The file contains ", length, U" bytes",
  817. type == -1 ? U" after the byte-order mark" : U"",
  818. U", but we could read only ", numberOfBytesRead, U" of them."
  819. );
  820. text8bit [length] = '\0';
  821. /*
  822. * Count and repair null bytes.
  823. */
  824. if (length > 0) {
  825. int64 numberOfNullBytes = 0;
  826. for (char *p = & text8bit [length - 1]; (int64) (p - text8bit.get()) >= 0; p --) {
  827. if (*p == '\0') {
  828. numberOfNullBytes += 1;
  829. /*
  830. * Shift.
  831. */
  832. for (char *q = p; (int64) (q - text8bit.get()) < length; q ++)
  833. *q = q [1];
  834. }
  835. }
  836. if (numberOfNullBytes > 0) {
  837. Melder_warning (U"Ignored ", numberOfNullBytes, U" null bytes in text file ", file, U".");
  838. }
  839. }
  840. if (string8) {
  841. *string8 = text8bit.move();
  842. (void) Melder_killReturns_inplace (string8->get());
  843. return autostring32(); // OK
  844. } else {
  845. text = Melder_8to32 (text8bit.get(), kMelder_textInputEncoding::UNDEFINED);
  846. }
  847. } else {
  848. length = length / 2 - 1; // Byte Order Mark subtracted. Length = number of UTF-16 codes
  849. text = autostring32 (length + 1);
  850. if (type == 1) {
  851. for (int64 i = 0; i < length; i ++) {
  852. char16 kar1 = bingetu16 (f);
  853. if (kar1 < 0xD800) {
  854. text [i] = (char32) kar1; // convert up without sign extension
  855. } else if (kar1 < 0xDC00) {
  856. length --;
  857. char16 kar2 = bingetu16 (f);
  858. if (kar2 >= 0xDC00 && kar2 <= 0xDFFF) {
  859. text [i] = (char32) (0x010000 +
  860. (char32) (((char32) kar1 & 0x0003FF) << 10) +
  861. (char32) ((char32) kar2 & 0x0003FF));
  862. } else {
  863. text [i] = UNICODE_REPLACEMENT_CHARACTER;
  864. }
  865. } else if (kar1 < 0xE000) {
  866. text [i] = UNICODE_REPLACEMENT_CHARACTER;
  867. } else {
  868. text [i] = (char32) kar1; // convert up without sign extension
  869. }
  870. }
  871. } else {
  872. for (int64 i = 0; i < length; i ++) {
  873. char16 kar1 = bingetu16LE (f);
  874. if (kar1 < 0xD800) {
  875. text [i] = (char32) kar1; // convert up without sign extension
  876. } else if (kar1 < 0xDC00) {
  877. length --;
  878. char16 kar2 = bingetu16LE (f);
  879. if (kar2 >= 0xDC00 && kar2 <= 0xDFFF) {
  880. text [i] = (char32) (0x01'0000 +
  881. (char32) (((char32) kar1 & 0x00'03FF) << 10) +
  882. (char32) ((char32) kar2 & 0x00'03FF));
  883. } else {
  884. text [i] = UNICODE_REPLACEMENT_CHARACTER;
  885. }
  886. } else if (kar1 < 0xE000) {
  887. text [i] = UNICODE_REPLACEMENT_CHARACTER;
  888. } else if (kar1 <= 0xFFFF) {
  889. text [i] = (char32) kar1; // convert up without sign extension
  890. } else {
  891. Melder_fatal (U"MelderFile_readText: unsigned short greater than 0xFFFF: should not occur.");
  892. }
  893. }
  894. }
  895. text [length] = U'\0';
  896. (void) Melder_killReturns_inplace (text.get());
  897. }
  898. f.close (file);
  899. return text;
  900. } catch (MelderError) {
  901. Melder_throw (U"Error reading file ", file, U".");
  902. }
  903. }
  904. void Melder_fwrite32to8 (conststring32 string, FILE *f) {
  905. /*
  906. * Precondition:
  907. * the string's encoding is UTF-32.
  908. * Failure:
  909. * if the precondition does not hold, we don't crash,
  910. * but the characters that are written may be incorrect.
  911. */
  912. for (const char32* p = string; *p != U'\0'; p ++) {
  913. char32 kar = *p;
  914. if (kar <= 0x00'007F) {
  915. #ifdef _WIN32
  916. if (kar == U'\n')
  917. fputc (13, f);
  918. #endif
  919. fputc ((int) kar, f); // because fputc wants an int instead of an uint8 (guarded conversion)
  920. } else if (kar <= 0x00'07FF) {
  921. fputc (0xC0 | (kar >> 6), f);
  922. fputc (0x80 | (kar & 0x00'003F), f);
  923. } else if (kar <= 0x00FFFF) {
  924. fputc (0xE0 | (kar >> 12), f);
  925. fputc (0x80 | ((kar >> 6) & 0x00'003F), f);
  926. fputc (0x80 | (kar & 0x00'003F), f);
  927. } else {
  928. fputc (0xF0 | (kar >> 18), f);
  929. fputc (0x80 | ((kar >> 12) & 0x00'003F), f);
  930. fputc (0x80 | ((kar >> 6) & 0x00'003F), f);
  931. fputc (0x80 | (kar & 0x00'003F), f);
  932. }
  933. }
  934. }
  935. void MelderFile_writeText (MelderFile file, conststring32 text, kMelder_textOutputEncoding outputEncoding) {
  936. if (! text)
  937. text = U"";
  938. autofile f = Melder_fopen (file, "wb");
  939. if (outputEncoding == kMelder_textOutputEncoding::UTF8) {
  940. Melder_fwrite32to8 (text, f);
  941. } else if ((outputEncoding == kMelder_textOutputEncoding::ASCII_THEN_UTF16 && Melder_isValidAscii (text)) ||
  942. (outputEncoding == kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ISO_LATIN1)))
  943. {
  944. #ifdef _WIN32
  945. #define flockfile(f) (void) 0
  946. #define funlockfile(f) (void) 0
  947. #define putc_unlocked putc
  948. #endif
  949. flockfile (f);
  950. size_t n = str32len (text);
  951. for (size_t i = 0; i < n; i ++) {
  952. char32 kar = text [i];
  953. #ifdef _WIN32
  954. if (kar == U'\n')
  955. putc_unlocked (13, f);
  956. #endif
  957. putc_unlocked (kar, f);
  958. }
  959. funlockfile (f);
  960. } else {
  961. binputu16 (0xFEFF, f); // Byte Order Mark
  962. size_t n = str32len (text);
  963. for (size_t i = 0; i < n; i ++) {
  964. char32 kar = text [i];
  965. #ifdef _WIN32
  966. if (kar == U'\n')
  967. binputu16 (13, f);
  968. #endif
  969. if (kar <= 0x00'FFFF) {
  970. binputu16 ((char16) kar, f); // guarded conversion down
  971. } else if (kar <= 0x10'FFFF) {
  972. kar -= 0x010000;
  973. binputu16 (0xD800 | (uint16) (kar >> 10), f);
  974. binputu16 (0xDC00 | (uint16) ((char16) kar & 0x3ff), f);
  975. } else {
  976. binputu16 (UNICODE_REPLACEMENT_CHARACTER, f);
  977. }
  978. }
  979. }
  980. f.close (file);
  981. }
  982. void MelderFile_appendText (MelderFile file, conststring32 text) {
  983. if (! text) text = U"";
  984. autofile f1;
  985. try {
  986. f1.reset (Melder_fopen (file, "rb"));
  987. } catch (MelderError) {
  988. Melder_clearError (); // it's OK if the file didn't exist yet...
  989. MelderFile_writeText (file, text, Melder_getOutputEncoding ()); // because then we just "write"
  990. return;
  991. }
  992. /*
  993. * The file already exists and is open. Determine its type.
  994. */
  995. int firstByte = fgetc (f1), secondByte = fgetc (f1);
  996. f1.close (file);
  997. int type = 0;
  998. if (firstByte == 0xfe && secondByte == 0xff) {
  999. type = 1; // big-endian 16-bit
  1000. } else if (firstByte == 0xff && secondByte == 0xfe) {
  1001. type = 2; // little-endian 16-bit
  1002. }
  1003. if (type == 0) {
  1004. kMelder_textOutputEncoding outputEncoding = Melder_getOutputEncoding ();
  1005. if (outputEncoding == kMelder_textOutputEncoding::UTF8) { // TODO: read as file's encoding
  1006. autofile f2 = Melder_fopen (file, "ab");
  1007. Melder_fwrite32to8 (text, f2);
  1008. f2.close (file);
  1009. } else if ((outputEncoding == kMelder_textOutputEncoding::ASCII_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ASCII))
  1010. || (outputEncoding == kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ISO_LATIN1)))
  1011. {
  1012. /*
  1013. * Append ASCII or ISOLatin1 text to ASCII or ISOLatin1 file.
  1014. */
  1015. autofile f2 = Melder_fopen (file, "ab");
  1016. int64 n = str32len (text);
  1017. for (int64 i = 0; i < n; i ++) {
  1018. char32 kar = text [i];
  1019. #ifdef _WIN32
  1020. if (kar == U'\n')
  1021. fputc (13, f2);
  1022. #endif
  1023. fputc ((char8) kar, f2);
  1024. }
  1025. f2.close (file);
  1026. } else {
  1027. /*
  1028. * Convert to wide character file.
  1029. */
  1030. autostring32 oldText = MelderFile_readText (file);
  1031. autofile f2 = Melder_fopen (file, "wb");
  1032. binputu16 (0xfeff, f2);
  1033. int64 n = str32len (oldText.get());
  1034. for (int64 i = 0; i < n; i ++) {
  1035. char32 kar = oldText [i];
  1036. #ifdef _WIN32
  1037. if (kar == U'\n')
  1038. binputu16 (13, f2);
  1039. #endif
  1040. if (kar <= 0x00'FFFF) {
  1041. binputu16 ((uint16) kar, f2); // guarded conversion down
  1042. } else if (kar <= 0x10'FFFF) {
  1043. kar -= 0x01'0000;
  1044. binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
  1045. binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
  1046. } else {
  1047. binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
  1048. }
  1049. }
  1050. n = str32len (text);
  1051. for (int64 i = 0; i < n; i ++) {
  1052. char32 kar = text [i];
  1053. #ifdef _WIN32
  1054. if (kar == U'\n')
  1055. binputu16 (13, f2);
  1056. #endif
  1057. if (kar <= 0x00FFFF) {
  1058. binputu16 ((uint16) kar, f2); // guarded conversion down
  1059. } else if (kar <= 0x10'FFFF) {
  1060. kar -= 0x01'0000;
  1061. binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
  1062. binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
  1063. } else {
  1064. binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
  1065. }
  1066. }
  1067. f2.close (file);
  1068. }
  1069. } else {
  1070. autofile f2 = Melder_fopen (file, "ab");
  1071. int64 n = str32len (text);
  1072. for (int64 i = 0; i < n; i ++) {
  1073. if (type == 1) {
  1074. char32 kar = text [i];
  1075. #ifdef _WIN32
  1076. if (kar == U'\n')
  1077. binputu16 (13, f2);
  1078. #endif
  1079. if (kar <= 0x00'FFFF) {
  1080. binputu16 ((uint16) kar, f2); // guarded conversion down
  1081. } else if (kar <= 0x10'FFFF) {
  1082. kar -= 0x01'0000;
  1083. binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
  1084. binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
  1085. } else {
  1086. binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
  1087. }
  1088. } else {
  1089. char32 kar = text [i];
  1090. #ifdef _WIN32
  1091. if (kar == U'\n')
  1092. binputu16LE (13, f2);
  1093. #endif
  1094. if (kar <= 0x00'FFFF) {
  1095. binputu16LE ((uint16) kar, f2); // guarded conversion down
  1096. } else if (kar <= 0x10FFFF) {
  1097. kar -= 0x01'0000;
  1098. binputu16LE ((uint16) (0x00'D800 | (kar >> 10)), f2);
  1099. binputu16LE ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
  1100. } else {
  1101. binputu16LE (UNICODE_REPLACEMENT_CHARACTER, f2);
  1102. }
  1103. }
  1104. }
  1105. f2.close (file);
  1106. }
  1107. }
  1108. /* End of file melder_files.cpp */