os_unix.c 82 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759
  1. /*
  2. ** 2004 May 22
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This file contains code that is specific to Unix systems.
  14. */
  15. #include "sqliteInt.h"
  16. #if OS_UNIX /* This file is used on unix only */
  17. /* #define SQLITE_ENABLE_LOCKING_STYLE 0 */
  18. /*
  19. ** These #defines should enable >2GB file support on Posix if the
  20. ** underlying operating system supports it. If the OS lacks
  21. ** large file support, these should be no-ops.
  22. **
  23. ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
  24. ** on the compiler command line. This is necessary if you are compiling
  25. ** on a recent machine (ex: RedHat 7.2) but you want your code to work
  26. ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
  27. ** without this option, LFS is enable. But LFS does not exist in the kernel
  28. ** in RedHat 6.0, so the code won't work. Hence, for maximum binary
  29. ** portability you should omit LFS.
  30. */
  31. #ifndef SQLITE_DISABLE_LFS
  32. # define _LARGE_FILE 1
  33. # ifndef _FILE_OFFSET_BITS
  34. # define _FILE_OFFSET_BITS 64
  35. # endif
  36. # define _LARGEFILE_SOURCE 1
  37. #endif
  38. /*
  39. ** standard include files.
  40. */
  41. #include <sys/types.h>
  42. #include <sys/stat.h>
  43. #include <fcntl.h>
  44. #include <unistd.h>
  45. #include <time.h>
  46. #include <sys/time.h>
  47. #include <errno.h>
  48. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  49. #include <sys/ioctl.h>
  50. #include <sys/param.h>
  51. #include <sys/mount.h>
  52. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  53. /*
  54. ** If we are to be thread-safe, include the pthreads header and define
  55. ** the SQLITE_UNIX_THREADS macro.
  56. */
  57. #if SQLITE_THREADSAFE
  58. # include <pthread.h>
  59. # define SQLITE_UNIX_THREADS 1
  60. #endif
  61. /*
  62. ** Default permissions when creating a new file
  63. */
  64. #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
  65. # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
  66. #endif
  67. /*
  68. ** Maximum supported path-length.
  69. */
  70. #define MAX_PATHNAME 512
  71. /*
  72. ** The unixFile structure is subclass of sqlite3_file specific for the unix
  73. ** protability layer.
  74. */
  75. typedef struct unixFile unixFile;
  76. struct unixFile {
  77. sqlite3_io_methods const *pMethod; /* Always the first entry */
  78. #ifdef SQLITE_TEST
  79. /* In test mode, increase the size of this structure a bit so that
  80. ** it is larger than the struct CrashFile defined in test6.c.
  81. */
  82. char aPadding[32];
  83. #endif
  84. struct openCnt *pOpen; /* Info about all open fd's on this inode */
  85. struct lockInfo *pLock; /* Info about locks on this inode */
  86. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  87. void *lockingContext; /* Locking style specific state */
  88. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  89. int h; /* The file descriptor */
  90. unsigned char locktype; /* The type of lock held on this fd */
  91. int dirfd; /* File descriptor for the directory */
  92. #if SQLITE_THREADSAFE
  93. pthread_t tid; /* The thread that "owns" this unixFile */
  94. #endif
  95. };
  96. /*
  97. ** Include code that is common to all os_*.c files
  98. */
  99. #include "os_common.h"
  100. /*
  101. ** Define various macros that are missing from some systems.
  102. */
  103. #ifndef O_LARGEFILE
  104. # define O_LARGEFILE 0
  105. #endif
  106. #ifdef SQLITE_DISABLE_LFS
  107. # undef O_LARGEFILE
  108. # define O_LARGEFILE 0
  109. #endif
  110. #ifndef O_NOFOLLOW
  111. # define O_NOFOLLOW 0
  112. #endif
  113. #ifndef O_BINARY
  114. # define O_BINARY 0
  115. #endif
  116. /*
  117. ** The DJGPP compiler environment looks mostly like Unix, but it
  118. ** lacks the fcntl() system call. So redefine fcntl() to be something
  119. ** that always succeeds. This means that locking does not occur under
  120. ** DJGPP. But it is DOS - what did you expect?
  121. */
  122. #ifdef __DJGPP__
  123. # define fcntl(A,B,C) 0
  124. #endif
  125. /*
  126. ** The threadid macro resolves to the thread-id or to 0. Used for
  127. ** testing and debugging only.
  128. */
  129. #if SQLITE_THREADSAFE
  130. #define threadid pthread_self()
  131. #else
  132. #define threadid 0
  133. #endif
  134. /*
  135. ** Set or check the unixFile.tid field. This field is set when an unixFile
  136. ** is first opened. All subsequent uses of the unixFile verify that the
  137. ** same thread is operating on the unixFile. Some operating systems do
  138. ** not allow locks to be overridden by other threads and that restriction
  139. ** means that sqlite3* database handles cannot be moved from one thread
  140. ** to another. This logic makes sure a user does not try to do that
  141. ** by mistake.
  142. **
  143. ** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
  144. ** another as long as we are running on a system that supports threads
  145. ** overriding each others locks (which now the most common behavior)
  146. ** or if no locks are held. But the unixFile.pLock field needs to be
  147. ** recomputed because its key includes the thread-id. See the
  148. ** transferOwnership() function below for additional information
  149. */
  150. #if SQLITE_THREADSAFE
  151. # define SET_THREADID(X) (X)->tid = pthread_self()
  152. # define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
  153. !pthread_equal((X)->tid, pthread_self()))
  154. #else
  155. # define SET_THREADID(X)
  156. # define CHECK_THREADID(X) 0
  157. #endif
  158. /*
  159. ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
  160. ** section 6.5.2.2 lines 483 through 490 specify that when a process
  161. ** sets or clears a lock, that operation overrides any prior locks set
  162. ** by the same process. It does not explicitly say so, but this implies
  163. ** that it overrides locks set by the same process using a different
  164. ** file descriptor. Consider this test case:
  165. **
  166. ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
  167. ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
  168. **
  169. ** Suppose ./file1 and ./file2 are really the same file (because
  170. ** one is a hard or symbolic link to the other) then if you set
  171. ** an exclusive lock on fd1, then try to get an exclusive lock
  172. ** on fd2, it works. I would have expected the second lock to
  173. ** fail since there was already a lock on the file due to fd1.
  174. ** But not so. Since both locks came from the same process, the
  175. ** second overrides the first, even though they were on different
  176. ** file descriptors opened on different file names.
  177. **
  178. ** Bummer. If you ask me, this is broken. Badly broken. It means
  179. ** that we cannot use POSIX locks to synchronize file access among
  180. ** competing threads of the same process. POSIX locks will work fine
  181. ** to synchronize access for threads in separate processes, but not
  182. ** threads within the same process.
  183. **
  184. ** To work around the problem, SQLite has to manage file locks internally
  185. ** on its own. Whenever a new database is opened, we have to find the
  186. ** specific inode of the database file (the inode is determined by the
  187. ** st_dev and st_ino fields of the stat structure that fstat() fills in)
  188. ** and check for locks already existing on that inode. When locks are
  189. ** created or removed, we have to look at our own internal record of the
  190. ** locks to see if another thread has previously set a lock on that same
  191. ** inode.
  192. **
  193. ** The sqlite3_file structure for POSIX is no longer just an integer file
  194. ** descriptor. It is now a structure that holds the integer file
  195. ** descriptor and a pointer to a structure that describes the internal
  196. ** locks on the corresponding inode. There is one locking structure
  197. ** per inode, so if the same inode is opened twice, both unixFile structures
  198. ** point to the same locking structure. The locking structure keeps
  199. ** a reference count (so we will know when to delete it) and a "cnt"
  200. ** field that tells us its internal lock status. cnt==0 means the
  201. ** file is unlocked. cnt==-1 means the file has an exclusive lock.
  202. ** cnt>0 means there are cnt shared locks on the file.
  203. **
  204. ** Any attempt to lock or unlock a file first checks the locking
  205. ** structure. The fcntl() system call is only invoked to set a
  206. ** POSIX lock if the internal lock structure transitions between
  207. ** a locked and an unlocked state.
  208. **
  209. ** 2004-Jan-11:
  210. ** More recent discoveries about POSIX advisory locks. (The more
  211. ** I discover, the more I realize the a POSIX advisory locks are
  212. ** an abomination.)
  213. **
  214. ** If you close a file descriptor that points to a file that has locks,
  215. ** all locks on that file that are owned by the current process are
  216. ** released. To work around this problem, each unixFile structure contains
  217. ** a pointer to an openCnt structure. There is one openCnt structure
  218. ** per open inode, which means that multiple unixFile can point to a single
  219. ** openCnt. When an attempt is made to close an unixFile, if there are
  220. ** other unixFile open on the same inode that are holding locks, the call
  221. ** to close() the file descriptor is deferred until all of the locks clear.
  222. ** The openCnt structure keeps a list of file descriptors that need to
  223. ** be closed and that list is walked (and cleared) when the last lock
  224. ** clears.
  225. **
  226. ** First, under Linux threads, because each thread has a separate
  227. ** process ID, lock operations in one thread do not override locks
  228. ** to the same file in other threads. Linux threads behave like
  229. ** separate processes in this respect. But, if you close a file
  230. ** descriptor in linux threads, all locks are cleared, even locks
  231. ** on other threads and even though the other threads have different
  232. ** process IDs. Linux threads is inconsistent in this respect.
  233. ** (I'm beginning to think that linux threads is an abomination too.)
  234. ** The consequence of this all is that the hash table for the lockInfo
  235. ** structure has to include the process id as part of its key because
  236. ** locks in different threads are treated as distinct. But the
  237. ** openCnt structure should not include the process id in its
  238. ** key because close() clears lock on all threads, not just the current
  239. ** thread. Were it not for this goofiness in linux threads, we could
  240. ** combine the lockInfo and openCnt structures into a single structure.
  241. **
  242. ** 2004-Jun-28:
  243. ** On some versions of linux, threads can override each others locks.
  244. ** On others not. Sometimes you can change the behavior on the same
  245. ** system by setting the LD_ASSUME_KERNEL environment variable. The
  246. ** POSIX standard is silent as to which behavior is correct, as far
  247. ** as I can tell, so other versions of unix might show the same
  248. ** inconsistency. There is no little doubt in my mind that posix
  249. ** advisory locks and linux threads are profoundly broken.
  250. **
  251. ** To work around the inconsistencies, we have to test at runtime
  252. ** whether or not threads can override each others locks. This test
  253. ** is run once, the first time any lock is attempted. A static
  254. ** variable is set to record the results of this test for future
  255. ** use.
  256. */
  257. /*
  258. ** An instance of the following structure serves as the key used
  259. ** to locate a particular lockInfo structure given its inode.
  260. **
  261. ** If threads cannot override each others locks, then we set the
  262. ** lockKey.tid field to the thread ID. If threads can override
  263. ** each others locks then tid is always set to zero. tid is omitted
  264. ** if we compile without threading support.
  265. */
  266. struct lockKey {
  267. dev_t dev; /* Device number */
  268. ino_t ino; /* Inode number */
  269. #if SQLITE_THREADSAFE
  270. pthread_t tid; /* Thread ID or zero if threads can override each other */
  271. #endif
  272. };
  273. /*
  274. ** An instance of the following structure is allocated for each open
  275. ** inode on each thread with a different process ID. (Threads have
  276. ** different process IDs on linux, but not on most other unixes.)
  277. **
  278. ** A single inode can have multiple file descriptors, so each unixFile
  279. ** structure contains a pointer to an instance of this object and this
  280. ** object keeps a count of the number of unixFile pointing to it.
  281. */
  282. struct lockInfo {
  283. struct lockKey key; /* The lookup key */
  284. int cnt; /* Number of SHARED locks held */
  285. int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
  286. int nRef; /* Number of pointers to this structure */
  287. };
  288. /*
  289. ** An instance of the following structure serves as the key used
  290. ** to locate a particular openCnt structure given its inode. This
  291. ** is the same as the lockKey except that the thread ID is omitted.
  292. */
  293. struct openKey {
  294. dev_t dev; /* Device number */
  295. ino_t ino; /* Inode number */
  296. };
  297. /*
  298. ** An instance of the following structure is allocated for each open
  299. ** inode. This structure keeps track of the number of locks on that
  300. ** inode. If a close is attempted against an inode that is holding
  301. ** locks, the close is deferred until all locks clear by adding the
  302. ** file descriptor to be closed to the pending list.
  303. */
  304. struct openCnt {
  305. struct openKey key; /* The lookup key */
  306. int nRef; /* Number of pointers to this structure */
  307. int nLock; /* Number of outstanding locks */
  308. int nPending; /* Number of pending close() operations */
  309. int *aPending; /* Malloced space holding fd's awaiting a close() */
  310. };
  311. /*
  312. ** These hash tables map inodes and file descriptors (really, lockKey and
  313. ** openKey structures) into lockInfo and openCnt structures. Access to
  314. ** these hash tables must be protected by a mutex.
  315. */
  316. static Hash lockHash = {SQLITE_HASH_BINARY, 0, 0, 0, 0, 0};
  317. static Hash openHash = {SQLITE_HASH_BINARY, 0, 0, 0, 0, 0};
  318. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  319. /*
  320. ** The locking styles are associated with the different file locking
  321. ** capabilities supported by different file systems.
  322. **
  323. ** POSIX locking style fully supports shared and exclusive byte-range locks
  324. ** ADP locking only supports exclusive byte-range locks
  325. ** FLOCK only supports a single file-global exclusive lock
  326. ** DOTLOCK isn't a true locking style, it refers to the use of a special
  327. ** file named the same as the database file with a '.lock' extension, this
  328. ** can be used on file systems that do not offer any reliable file locking
  329. ** NO locking means that no locking will be attempted, this is only used for
  330. ** read-only file systems currently
  331. ** UNSUPPORTED means that no locking will be attempted, this is only used for
  332. ** file systems that are known to be unsupported
  333. */
  334. typedef enum {
  335. posixLockingStyle = 0, /* standard posix-advisory locks */
  336. afpLockingStyle, /* use afp locks */
  337. flockLockingStyle, /* use flock() */
  338. dotlockLockingStyle, /* use <file>.lock files */
  339. noLockingStyle, /* useful for read-only file system */
  340. unsupportedLockingStyle /* indicates unsupported file system */
  341. } sqlite3LockingStyle;
  342. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  343. /*
  344. ** Helper functions to obtain and relinquish the global mutex.
  345. */
  346. static void enterMutex(){
  347. sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER));
  348. }
  349. static void leaveMutex(){
  350. sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER));
  351. }
  352. #if SQLITE_THREADSAFE
  353. /*
  354. ** This variable records whether or not threads can override each others
  355. ** locks.
  356. **
  357. ** 0: No. Threads cannot override each others locks.
  358. ** 1: Yes. Threads can override each others locks.
  359. ** -1: We don't know yet.
  360. **
  361. ** On some systems, we know at compile-time if threads can override each
  362. ** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
  363. ** will be set appropriately. On other systems, we have to check at
  364. ** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
  365. ** undefined.
  366. **
  367. ** This variable normally has file scope only. But during testing, we make
  368. ** it a global so that the test code can change its value in order to verify
  369. ** that the right stuff happens in either case.
  370. */
  371. #ifndef SQLITE_THREAD_OVERRIDE_LOCK
  372. # define SQLITE_THREAD_OVERRIDE_LOCK -1
  373. #endif
  374. #ifdef SQLITE_TEST
  375. int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
  376. #else
  377. static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
  378. #endif
  379. /*
  380. ** This structure holds information passed into individual test
  381. ** threads by the testThreadLockingBehavior() routine.
  382. */
  383. struct threadTestData {
  384. int fd; /* File to be locked */
  385. struct flock lock; /* The locking operation */
  386. int result; /* Result of the locking operation */
  387. };
  388. #ifdef SQLITE_LOCK_TRACE
  389. /*
  390. ** Print out information about all locking operations.
  391. **
  392. ** This routine is used for troubleshooting locks on multithreaded
  393. ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
  394. ** command-line option on the compiler. This code is normally
  395. ** turned off.
  396. */
  397. static int lockTrace(int fd, int op, struct flock *p){
  398. char *zOpName, *zType;
  399. int s;
  400. int savedErrno;
  401. if( op==F_GETLK ){
  402. zOpName = "GETLK";
  403. }else if( op==F_SETLK ){
  404. zOpName = "SETLK";
  405. }else{
  406. s = fcntl(fd, op, p);
  407. sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
  408. return s;
  409. }
  410. if( p->l_type==F_RDLCK ){
  411. zType = "RDLCK";
  412. }else if( p->l_type==F_WRLCK ){
  413. zType = "WRLCK";
  414. }else if( p->l_type==F_UNLCK ){
  415. zType = "UNLCK";
  416. }else{
  417. assert( 0 );
  418. }
  419. assert( p->l_whence==SEEK_SET );
  420. s = fcntl(fd, op, p);
  421. savedErrno = errno;
  422. sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
  423. threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
  424. (int)p->l_pid, s);
  425. if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
  426. struct flock l2;
  427. l2 = *p;
  428. fcntl(fd, F_GETLK, &l2);
  429. if( l2.l_type==F_RDLCK ){
  430. zType = "RDLCK";
  431. }else if( l2.l_type==F_WRLCK ){
  432. zType = "WRLCK";
  433. }else if( l2.l_type==F_UNLCK ){
  434. zType = "UNLCK";
  435. }else{
  436. assert( 0 );
  437. }
  438. sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
  439. zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
  440. }
  441. errno = savedErrno;
  442. return s;
  443. }
  444. #define fcntl lockTrace
  445. #endif /* SQLITE_LOCK_TRACE */
  446. /*
  447. ** The testThreadLockingBehavior() routine launches two separate
  448. ** threads on this routine. This routine attempts to lock a file
  449. ** descriptor then returns. The success or failure of that attempt
  450. ** allows the testThreadLockingBehavior() procedure to determine
  451. ** whether or not threads can override each others locks.
  452. */
  453. static void *threadLockingTest(void *pArg){
  454. struct threadTestData *pData = (struct threadTestData*)pArg;
  455. pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
  456. return pArg;
  457. }
  458. /*
  459. ** This procedure attempts to determine whether or not threads
  460. ** can override each others locks then sets the
  461. ** threadsOverrideEachOthersLocks variable appropriately.
  462. */
  463. static void testThreadLockingBehavior(int fd_orig){
  464. int fd;
  465. struct threadTestData d[2];
  466. pthread_t t[2];
  467. fd = dup(fd_orig);
  468. if( fd<0 ) return;
  469. memset(d, 0, sizeof(d));
  470. d[0].fd = fd;
  471. d[0].lock.l_type = F_RDLCK;
  472. d[0].lock.l_len = 1;
  473. d[0].lock.l_start = 0;
  474. d[0].lock.l_whence = SEEK_SET;
  475. d[1] = d[0];
  476. d[1].lock.l_type = F_WRLCK;
  477. pthread_create(&t[0], 0, threadLockingTest, &d[0]);
  478. pthread_create(&t[1], 0, threadLockingTest, &d[1]);
  479. pthread_join(t[0], 0);
  480. pthread_join(t[1], 0);
  481. close(fd);
  482. threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
  483. }
  484. #endif /* SQLITE_THREADSAFE */
  485. /*
  486. ** Release a lockInfo structure previously allocated by findLockInfo().
  487. */
  488. static void releaseLockInfo(struct lockInfo *pLock){
  489. if (pLock == NULL)
  490. return;
  491. pLock->nRef--;
  492. if( pLock->nRef==0 ){
  493. sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
  494. sqlite3_free(pLock);
  495. }
  496. }
  497. /*
  498. ** Release a openCnt structure previously allocated by findLockInfo().
  499. */
  500. static void releaseOpenCnt(struct openCnt *pOpen){
  501. if (pOpen == NULL)
  502. return;
  503. pOpen->nRef--;
  504. if( pOpen->nRef==0 ){
  505. sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
  506. free(pOpen->aPending);
  507. sqlite3_free(pOpen);
  508. }
  509. }
  510. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  511. /*
  512. ** Tests a byte-range locking query to see if byte range locks are
  513. ** supported, if not we fall back to dotlockLockingStyle.
  514. */
  515. static sqlite3LockingStyle sqlite3TestLockingStyle(
  516. const char *filePath,
  517. int fd
  518. ){
  519. /* test byte-range lock using fcntl */
  520. struct flock lockInfo;
  521. lockInfo.l_len = 1;
  522. lockInfo.l_start = 0;
  523. lockInfo.l_whence = SEEK_SET;
  524. lockInfo.l_type = F_RDLCK;
  525. if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
  526. return posixLockingStyle;
  527. }
  528. /* testing for flock can give false positives. So if if the above test
  529. ** fails, then we fall back to using dot-lock style locking.
  530. */
  531. return dotlockLockingStyle;
  532. }
  533. /*
  534. ** Examines the f_fstypename entry in the statfs structure as returned by
  535. ** stat() for the file system hosting the database file, assigns the
  536. ** appropriate locking style based on its value. These values and
  537. ** assignments are based on Darwin/OSX behavior and have not been tested on
  538. ** other systems.
  539. */
  540. static sqlite3LockingStyle sqlite3DetectLockingStyle(
  541. const char *filePath,
  542. int fd
  543. ){
  544. #ifdef SQLITE_FIXED_LOCKING_STYLE
  545. return (sqlite3LockingStyle)SQLITE_FIXED_LOCKING_STYLE;
  546. #else
  547. struct statfs fsInfo;
  548. if (statfs(filePath, &fsInfo) == -1)
  549. return sqlite3TestLockingStyle(filePath, fd);
  550. if (fsInfo.f_flags & MNT_RDONLY)
  551. return noLockingStyle;
  552. if( (!strcmp(fsInfo.f_fstypename, "hfs")) ||
  553. (!strcmp(fsInfo.f_fstypename, "ufs")) )
  554. return posixLockingStyle;
  555. if(!strcmp(fsInfo.f_fstypename, "afpfs"))
  556. return afpLockingStyle;
  557. if(!strcmp(fsInfo.f_fstypename, "nfs"))
  558. return sqlite3TestLockingStyle(filePath, fd);
  559. if(!strcmp(fsInfo.f_fstypename, "smbfs"))
  560. return flockLockingStyle;
  561. if(!strcmp(fsInfo.f_fstypename, "msdos"))
  562. return dotlockLockingStyle;
  563. if(!strcmp(fsInfo.f_fstypename, "webdav"))
  564. return unsupportedLockingStyle;
  565. return sqlite3TestLockingStyle(filePath, fd);
  566. #endif /* SQLITE_FIXED_LOCKING_STYLE */
  567. }
  568. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  569. /*
  570. ** Given a file descriptor, locate lockInfo and openCnt structures that
  571. ** describes that file descriptor. Create new ones if necessary. The
  572. ** return values might be uninitialized if an error occurs.
  573. **
  574. ** Return the number of errors.
  575. */
  576. static int findLockInfo(
  577. int fd, /* The file descriptor used in the key */
  578. struct lockInfo **ppLock, /* Return the lockInfo structure here */
  579. struct openCnt **ppOpen /* Return the openCnt structure here */
  580. ){
  581. int rc;
  582. struct lockKey key1;
  583. struct openKey key2;
  584. struct stat statbuf;
  585. struct lockInfo *pLock;
  586. struct openCnt *pOpen;
  587. rc = fstat(fd, &statbuf);
  588. if( rc!=0 ) return 1;
  589. memset(&key1, 0, sizeof(key1));
  590. key1.dev = statbuf.st_dev;
  591. key1.ino = statbuf.st_ino;
  592. #if SQLITE_THREADSAFE
  593. if( threadsOverrideEachOthersLocks<0 ){
  594. testThreadLockingBehavior(fd);
  595. }
  596. key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
  597. #endif
  598. memset(&key2, 0, sizeof(key2));
  599. key2.dev = statbuf.st_dev;
  600. key2.ino = statbuf.st_ino;
  601. pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
  602. if( pLock==0 ){
  603. struct lockInfo *pOld;
  604. pLock = sqlite3_malloc( sizeof(*pLock) );
  605. if( pLock==0 ){
  606. rc = 1;
  607. goto exit_findlockinfo;
  608. }
  609. pLock->key = key1;
  610. pLock->nRef = 1;
  611. pLock->cnt = 0;
  612. pLock->locktype = 0;
  613. pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
  614. if( pOld!=0 ){
  615. assert( pOld==pLock );
  616. sqlite3_free(pLock);
  617. rc = 1;
  618. goto exit_findlockinfo;
  619. }
  620. }else{
  621. pLock->nRef++;
  622. }
  623. *ppLock = pLock;
  624. if( ppOpen!=0 ){
  625. pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
  626. if( pOpen==0 ){
  627. struct openCnt *pOld;
  628. pOpen = sqlite3_malloc( sizeof(*pOpen) );
  629. if( pOpen==0 ){
  630. releaseLockInfo(pLock);
  631. rc = 1;
  632. goto exit_findlockinfo;
  633. }
  634. pOpen->key = key2;
  635. pOpen->nRef = 1;
  636. pOpen->nLock = 0;
  637. pOpen->nPending = 0;
  638. pOpen->aPending = 0;
  639. pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
  640. if( pOld!=0 ){
  641. assert( pOld==pOpen );
  642. sqlite3_free(pOpen);
  643. releaseLockInfo(pLock);
  644. rc = 1;
  645. goto exit_findlockinfo;
  646. }
  647. }else{
  648. pOpen->nRef++;
  649. }
  650. *ppOpen = pOpen;
  651. }
  652. exit_findlockinfo:
  653. return rc;
  654. }
  655. #ifdef SQLITE_DEBUG
  656. /*
  657. ** Helper function for printing out trace information from debugging
  658. ** binaries. This returns the string represetation of the supplied
  659. ** integer lock-type.
  660. */
  661. static const char *locktypeName(int locktype){
  662. switch( locktype ){
  663. case NO_LOCK: return "NONE";
  664. case SHARED_LOCK: return "SHARED";
  665. case RESERVED_LOCK: return "RESERVED";
  666. case PENDING_LOCK: return "PENDING";
  667. case EXCLUSIVE_LOCK: return "EXCLUSIVE";
  668. }
  669. return "ERROR";
  670. }
  671. #endif
  672. /*
  673. ** If we are currently in a different thread than the thread that the
  674. ** unixFile argument belongs to, then transfer ownership of the unixFile
  675. ** over to the current thread.
  676. **
  677. ** A unixFile is only owned by a thread on systems where one thread is
  678. ** unable to override locks created by a different thread. RedHat9 is
  679. ** an example of such a system.
  680. **
  681. ** Ownership transfer is only allowed if the unixFile is currently unlocked.
  682. ** If the unixFile is locked and an ownership is wrong, then return
  683. ** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
  684. */
  685. #if SQLITE_THREADSAFE
  686. static int transferOwnership(unixFile *pFile){
  687. int rc;
  688. pthread_t hSelf;
  689. if( threadsOverrideEachOthersLocks ){
  690. /* Ownership transfers not needed on this system */
  691. return SQLITE_OK;
  692. }
  693. hSelf = pthread_self();
  694. if( pthread_equal(pFile->tid, hSelf) ){
  695. /* We are still in the same thread */
  696. OSTRACE1("No-transfer, same thread\n");
  697. return SQLITE_OK;
  698. }
  699. if( pFile->locktype!=NO_LOCK ){
  700. /* We cannot change ownership while we are holding a lock! */
  701. return SQLITE_MISUSE;
  702. }
  703. OSTRACE4("Transfer ownership of %d from %d to %d\n",
  704. pFile->h, pFile->tid, hSelf);
  705. pFile->tid = hSelf;
  706. if (pFile->pLock != NULL) {
  707. releaseLockInfo(pFile->pLock);
  708. rc = findLockInfo(pFile->h, &pFile->pLock, 0);
  709. OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
  710. locktypeName(pFile->locktype),
  711. locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
  712. return rc;
  713. } else {
  714. return SQLITE_OK;
  715. }
  716. }
  717. #else
  718. /* On single-threaded builds, ownership transfer is a no-op */
  719. # define transferOwnership(X) SQLITE_OK
  720. #endif
  721. /*
  722. ** Seek to the offset passed as the second argument, then read cnt
  723. ** bytes into pBuf. Return the number of bytes actually read.
  724. **
  725. ** NB: If you define USE_PREAD or USE_PREAD64, then it might also
  726. ** be necessary to define _XOPEN_SOURCE to be 500. This varies from
  727. ** one system to another. Since SQLite does not define USE_PREAD
  728. ** any any form by default, we will not attempt to define _XOPEN_SOURCE.
  729. ** See tickets #2741 and #2681.
  730. */
  731. static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
  732. int got;
  733. i64 newOffset;
  734. TIMER_START;
  735. #if defined(USE_PREAD)
  736. got = pread(id->h, pBuf, cnt, offset);
  737. SimulateIOError( got = -1 );
  738. #elif defined(USE_PREAD64)
  739. got = pread64(id->h, pBuf, cnt, offset);
  740. SimulateIOError( got = -1 );
  741. #else
  742. newOffset = lseek(id->h, offset, SEEK_SET);
  743. SimulateIOError( newOffset-- );
  744. if( newOffset!=offset ){
  745. return -1;
  746. }
  747. got = read(id->h, pBuf, cnt);
  748. #endif
  749. TIMER_END;
  750. OSTRACE5("READ %-3d %5d %7lld %d\n", id->h, got, offset, TIMER_ELAPSED);
  751. return got;
  752. }
  753. /*
  754. ** Read data from a file into a buffer. Return SQLITE_OK if all
  755. ** bytes were read successfully and SQLITE_IOERR if anything goes
  756. ** wrong.
  757. */
  758. static int unixRead(
  759. sqlite3_file *id,
  760. void *pBuf,
  761. int amt,
  762. sqlite3_int64 offset
  763. ){
  764. int got;
  765. assert( id );
  766. got = seekAndRead((unixFile*)id, offset, pBuf, amt);
  767. if( got==amt ){
  768. return SQLITE_OK;
  769. }else if( got<0 ){
  770. return SQLITE_IOERR_READ;
  771. }else{
  772. memset(&((char*)pBuf)[got], 0, amt-got);
  773. return SQLITE_IOERR_SHORT_READ;
  774. }
  775. }
  776. /*
  777. ** Seek to the offset in id->offset then read cnt bytes into pBuf.
  778. ** Return the number of bytes actually read. Update the offset.
  779. */
  780. static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
  781. int got;
  782. i64 newOffset;
  783. TIMER_START;
  784. #if defined(USE_PREAD)
  785. got = pwrite(id->h, pBuf, cnt, offset);
  786. #elif defined(USE_PREAD64)
  787. got = pwrite64(id->h, pBuf, cnt, offset);
  788. #else
  789. newOffset = lseek(id->h, offset, SEEK_SET);
  790. if( newOffset!=offset ){
  791. return -1;
  792. }
  793. got = write(id->h, pBuf, cnt);
  794. #endif
  795. TIMER_END;
  796. OSTRACE5("WRITE %-3d %5d %7lld %d\n", id->h, got, offset, TIMER_ELAPSED);
  797. return got;
  798. }
  799. /*
  800. ** Write data from a buffer into a file. Return SQLITE_OK on success
  801. ** or some other error code on failure.
  802. */
  803. static int unixWrite(
  804. sqlite3_file *id,
  805. const void *pBuf,
  806. int amt,
  807. sqlite3_int64 offset
  808. ){
  809. int wrote = 0;
  810. assert( id );
  811. assert( amt>0 );
  812. while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
  813. amt -= wrote;
  814. offset += wrote;
  815. pBuf = &((char*)pBuf)[wrote];
  816. }
  817. SimulateIOError(( wrote=(-1), amt=1 ));
  818. SimulateDiskfullError(( wrote=0, amt=1 ));
  819. if( amt>0 ){
  820. if( wrote<0 ){
  821. return SQLITE_IOERR_WRITE;
  822. }else{
  823. return SQLITE_FULL;
  824. }
  825. }
  826. return SQLITE_OK;
  827. }
  828. #ifdef SQLITE_TEST
  829. /*
  830. ** Count the number of fullsyncs and normal syncs. This is used to test
  831. ** that syncs and fullsyncs are occuring at the right times.
  832. */
  833. int sqlite3_sync_count = 0;
  834. int sqlite3_fullsync_count = 0;
  835. #endif
  836. /*
  837. ** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
  838. ** Otherwise use fsync() in its place.
  839. */
  840. #ifndef HAVE_FDATASYNC
  841. # define fdatasync fsync
  842. #endif
  843. /*
  844. ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
  845. ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
  846. ** only available on Mac OS X. But that could change.
  847. */
  848. #ifdef F_FULLFSYNC
  849. # define HAVE_FULLFSYNC 1
  850. #else
  851. # define HAVE_FULLFSYNC 0
  852. #endif
  853. /*
  854. ** The fsync() system call does not work as advertised on many
  855. ** unix systems. The following procedure is an attempt to make
  856. ** it work better.
  857. **
  858. ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
  859. ** for testing when we want to run through the test suite quickly.
  860. ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
  861. ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
  862. ** or power failure will likely corrupt the database file.
  863. */
  864. static int full_fsync(int fd, int fullSync, int dataOnly){
  865. int rc;
  866. /* Record the number of times that we do a normal fsync() and
  867. ** FULLSYNC. This is used during testing to verify that this procedure
  868. ** gets called with the correct arguments.
  869. */
  870. #ifdef SQLITE_TEST
  871. if( fullSync ) sqlite3_fullsync_count++;
  872. sqlite3_sync_count++;
  873. #endif
  874. /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
  875. ** no-op
  876. */
  877. #ifdef SQLITE_NO_SYNC
  878. rc = SQLITE_OK;
  879. #else
  880. #if HAVE_FULLFSYNC
  881. if( fullSync ){
  882. rc = fcntl(fd, F_FULLFSYNC, 0);
  883. }else{
  884. rc = 1;
  885. }
  886. /* If the FULLFSYNC failed, fall back to attempting an fsync().
  887. * It shouldn't be possible for fullfsync to fail on the local
  888. * file system (on OSX), so failure indicates that FULLFSYNC
  889. * isn't supported for this file system. So, attempt an fsync
  890. * and (for now) ignore the overhead of a superfluous fcntl call.
  891. * It'd be better to detect fullfsync support once and avoid
  892. * the fcntl call every time sync is called.
  893. */
  894. if( rc ) rc = fsync(fd);
  895. #else
  896. if( dataOnly ){
  897. rc = fdatasync(fd);
  898. }else{
  899. rc = fsync(fd);
  900. }
  901. #endif /* HAVE_FULLFSYNC */
  902. #endif /* defined(SQLITE_NO_SYNC) */
  903. return rc;
  904. }
  905. /*
  906. ** Make sure all writes to a particular file are committed to disk.
  907. **
  908. ** If dataOnly==0 then both the file itself and its metadata (file
  909. ** size, access time, etc) are synced. If dataOnly!=0 then only the
  910. ** file data is synced.
  911. **
  912. ** Under Unix, also make sure that the directory entry for the file
  913. ** has been created by fsync-ing the directory that contains the file.
  914. ** If we do not do this and we encounter a power failure, the directory
  915. ** entry for the journal might not exist after we reboot. The next
  916. ** SQLite to access the file will not know that the journal exists (because
  917. ** the directory entry for the journal was never created) and the transaction
  918. ** will not roll back - possibly leading to database corruption.
  919. */
  920. static int unixSync(sqlite3_file *id, int flags){
  921. int rc;
  922. unixFile *pFile = (unixFile*)id;
  923. int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
  924. int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
  925. /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
  926. assert((flags&0x0F)==SQLITE_SYNC_NORMAL
  927. || (flags&0x0F)==SQLITE_SYNC_FULL
  928. );
  929. assert( pFile );
  930. OSTRACE2("SYNC %-3d\n", pFile->h);
  931. rc = full_fsync(pFile->h, isFullsync, isDataOnly);
  932. SimulateIOError( rc=1 );
  933. if( rc ){
  934. return SQLITE_IOERR_FSYNC;
  935. }
  936. if( pFile->dirfd>=0 ){
  937. OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
  938. HAVE_FULLFSYNC, isFullsync);
  939. #ifndef SQLITE_DISABLE_DIRSYNC
  940. /* The directory sync is only attempted if full_fsync is
  941. ** turned off or unavailable. If a full_fsync occurred above,
  942. ** then the directory sync is superfluous.
  943. */
  944. if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
  945. /*
  946. ** We have received multiple reports of fsync() returning
  947. ** errors when applied to directories on certain file systems.
  948. ** A failed directory sync is not a big deal. So it seems
  949. ** better to ignore the error. Ticket #1657
  950. */
  951. /* return SQLITE_IOERR; */
  952. }
  953. #endif
  954. close(pFile->dirfd); /* Only need to sync once, so close the directory */
  955. pFile->dirfd = -1; /* when we are done. */
  956. }
  957. return SQLITE_OK;
  958. }
  959. /*
  960. ** Truncate an open file to a specified size
  961. */
  962. static int unixTruncate(sqlite3_file *id, i64 nByte){
  963. int rc;
  964. assert( id );
  965. rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
  966. SimulateIOError( rc=1 );
  967. if( rc ){
  968. return SQLITE_IOERR_TRUNCATE;
  969. }else{
  970. return SQLITE_OK;
  971. }
  972. }
  973. /*
  974. ** Determine the current size of a file in bytes
  975. */
  976. static int unixFileSize(sqlite3_file *id, i64 *pSize){
  977. int rc;
  978. struct stat buf;
  979. assert( id );
  980. rc = fstat(((unixFile*)id)->h, &buf);
  981. SimulateIOError( rc=1 );
  982. if( rc!=0 ){
  983. return SQLITE_IOERR_FSTAT;
  984. }
  985. *pSize = buf.st_size;
  986. return SQLITE_OK;
  987. }
  988. /*
  989. ** This routine checks if there is a RESERVED lock held on the specified
  990. ** file by this or any other process. If such a lock is held, return
  991. ** non-zero. If the file is unlocked or holds only SHARED locks, then
  992. ** return zero.
  993. */
  994. static int unixCheckReservedLock(sqlite3_file *id){
  995. int r = 0;
  996. unixFile *pFile = (unixFile*)id;
  997. assert( pFile );
  998. enterMutex(); /* Because pFile->pLock is shared across threads */
  999. /* Check if a thread in this process holds such a lock */
  1000. if( pFile->pLock->locktype>SHARED_LOCK ){
  1001. r = 1;
  1002. }
  1003. /* Otherwise see if some other process holds it.
  1004. */
  1005. if( !r ){
  1006. struct flock lock;
  1007. lock.l_whence = SEEK_SET;
  1008. lock.l_start = RESERVED_BYTE;
  1009. lock.l_len = 1;
  1010. lock.l_type = F_WRLCK;
  1011. fcntl(pFile->h, F_GETLK, &lock);
  1012. if( lock.l_type!=F_UNLCK ){
  1013. r = 1;
  1014. }
  1015. }
  1016. leaveMutex();
  1017. OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
  1018. return r;
  1019. }
  1020. /*
  1021. ** Lock the file with the lock specified by parameter locktype - one
  1022. ** of the following:
  1023. **
  1024. ** (1) SHARED_LOCK
  1025. ** (2) RESERVED_LOCK
  1026. ** (3) PENDING_LOCK
  1027. ** (4) EXCLUSIVE_LOCK
  1028. **
  1029. ** Sometimes when requesting one lock state, additional lock states
  1030. ** are inserted in between. The locking might fail on one of the later
  1031. ** transitions leaving the lock state different from what it started but
  1032. ** still short of its goal. The following chart shows the allowed
  1033. ** transitions and the inserted intermediate states:
  1034. **
  1035. ** UNLOCKED -> SHARED
  1036. ** SHARED -> RESERVED
  1037. ** SHARED -> (PENDING) -> EXCLUSIVE
  1038. ** RESERVED -> (PENDING) -> EXCLUSIVE
  1039. ** PENDING -> EXCLUSIVE
  1040. **
  1041. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  1042. ** routine to lower a locking level.
  1043. */
  1044. static int unixLock(sqlite3_file *id, int locktype){
  1045. /* The following describes the implementation of the various locks and
  1046. ** lock transitions in terms of the POSIX advisory shared and exclusive
  1047. ** lock primitives (called read-locks and write-locks below, to avoid
  1048. ** confusion with SQLite lock names). The algorithms are complicated
  1049. ** slightly in order to be compatible with windows systems simultaneously
  1050. ** accessing the same database file, in case that is ever required.
  1051. **
  1052. ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
  1053. ** byte', each single bytes at well known offsets, and the 'shared byte
  1054. ** range', a range of 510 bytes at a well known offset.
  1055. **
  1056. ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
  1057. ** byte'. If this is successful, a random byte from the 'shared byte
  1058. ** range' is read-locked and the lock on the 'pending byte' released.
  1059. **
  1060. ** A process may only obtain a RESERVED lock after it has a SHARED lock.
  1061. ** A RESERVED lock is implemented by grabbing a write-lock on the
  1062. ** 'reserved byte'.
  1063. **
  1064. ** A process may only obtain a PENDING lock after it has obtained a
  1065. ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
  1066. ** on the 'pending byte'. This ensures that no new SHARED locks can be
  1067. ** obtained, but existing SHARED locks are allowed to persist. A process
  1068. ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
  1069. ** This property is used by the algorithm for rolling back a journal file
  1070. ** after a crash.
  1071. **
  1072. ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
  1073. ** implemented by obtaining a write-lock on the entire 'shared byte
  1074. ** range'. Since all other locks require a read-lock on one of the bytes
  1075. ** within this range, this ensures that no other locks are held on the
  1076. ** database.
  1077. **
  1078. ** The reason a single byte cannot be used instead of the 'shared byte
  1079. ** range' is that some versions of windows do not support read-locks. By
  1080. ** locking a random byte from a range, concurrent SHARED locks may exist
  1081. ** even if the locking primitive used is always a write-lock.
  1082. */
  1083. int rc = SQLITE_OK;
  1084. unixFile *pFile = (unixFile*)id;
  1085. struct lockInfo *pLock = pFile->pLock;
  1086. struct flock lock;
  1087. int s;
  1088. assert( pFile );
  1089. OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
  1090. locktypeName(locktype), locktypeName(pFile->locktype),
  1091. locktypeName(pLock->locktype), pLock->cnt , getpid());
  1092. /* If there is already a lock of this type or more restrictive on the
  1093. ** unixFile, do nothing. Don't use the end_lock: exit path, as
  1094. ** enterMutex() hasn't been called yet.
  1095. */
  1096. if( pFile->locktype>=locktype ){
  1097. OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
  1098. locktypeName(locktype));
  1099. return SQLITE_OK;
  1100. }
  1101. /* Make sure the locking sequence is correct
  1102. */
  1103. assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
  1104. assert( locktype!=PENDING_LOCK );
  1105. assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
  1106. /* This mutex is needed because pFile->pLock is shared across threads
  1107. */
  1108. enterMutex();
  1109. /* Make sure the current thread owns the pFile.
  1110. */
  1111. rc = transferOwnership(pFile);
  1112. if( rc!=SQLITE_OK ){
  1113. leaveMutex();
  1114. return rc;
  1115. }
  1116. pLock = pFile->pLock;
  1117. /* If some thread using this PID has a lock via a different unixFile*
  1118. ** handle that precludes the requested lock, return BUSY.
  1119. */
  1120. if( (pFile->locktype!=pLock->locktype &&
  1121. (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
  1122. ){
  1123. rc = SQLITE_BUSY;
  1124. goto end_lock;
  1125. }
  1126. /* If a SHARED lock is requested, and some thread using this PID already
  1127. ** has a SHARED or RESERVED lock, then increment reference counts and
  1128. ** return SQLITE_OK.
  1129. */
  1130. if( locktype==SHARED_LOCK &&
  1131. (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
  1132. assert( locktype==SHARED_LOCK );
  1133. assert( pFile->locktype==0 );
  1134. assert( pLock->cnt>0 );
  1135. pFile->locktype = SHARED_LOCK;
  1136. pLock->cnt++;
  1137. pFile->pOpen->nLock++;
  1138. goto end_lock;
  1139. }
  1140. lock.l_len = 1L;
  1141. lock.l_whence = SEEK_SET;
  1142. /* A PENDING lock is needed before acquiring a SHARED lock and before
  1143. ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
  1144. ** be released.
  1145. */
  1146. if( locktype==SHARED_LOCK
  1147. || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
  1148. ){
  1149. lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
  1150. lock.l_start = PENDING_BYTE;
  1151. s = fcntl(pFile->h, F_SETLK, &lock);
  1152. if( s==(-1) ){
  1153. rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1154. goto end_lock;
  1155. }
  1156. }
  1157. /* If control gets to this point, then actually go ahead and make
  1158. ** operating system calls for the specified lock.
  1159. */
  1160. if( locktype==SHARED_LOCK ){
  1161. assert( pLock->cnt==0 );
  1162. assert( pLock->locktype==0 );
  1163. /* Now get the read-lock */
  1164. lock.l_start = SHARED_FIRST;
  1165. lock.l_len = SHARED_SIZE;
  1166. s = fcntl(pFile->h, F_SETLK, &lock);
  1167. /* Drop the temporary PENDING lock */
  1168. lock.l_start = PENDING_BYTE;
  1169. lock.l_len = 1L;
  1170. lock.l_type = F_UNLCK;
  1171. if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
  1172. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1173. goto end_lock;
  1174. }
  1175. if( s==(-1) ){
  1176. rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1177. }else{
  1178. pFile->locktype = SHARED_LOCK;
  1179. pFile->pOpen->nLock++;
  1180. pLock->cnt = 1;
  1181. }
  1182. }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
  1183. /* We are trying for an exclusive lock but another thread in this
  1184. ** same process is still holding a shared lock. */
  1185. rc = SQLITE_BUSY;
  1186. }else{
  1187. /* The request was for a RESERVED or EXCLUSIVE lock. It is
  1188. ** assumed that there is a SHARED or greater lock on the file
  1189. ** already.
  1190. */
  1191. assert( 0!=pFile->locktype );
  1192. lock.l_type = F_WRLCK;
  1193. switch( locktype ){
  1194. case RESERVED_LOCK:
  1195. lock.l_start = RESERVED_BYTE;
  1196. break;
  1197. case EXCLUSIVE_LOCK:
  1198. lock.l_start = SHARED_FIRST;
  1199. lock.l_len = SHARED_SIZE;
  1200. break;
  1201. default:
  1202. assert(0);
  1203. }
  1204. s = fcntl(pFile->h, F_SETLK, &lock);
  1205. if( s==(-1) ){
  1206. rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
  1207. }
  1208. }
  1209. if( rc==SQLITE_OK ){
  1210. pFile->locktype = locktype;
  1211. pLock->locktype = locktype;
  1212. }else if( locktype==EXCLUSIVE_LOCK ){
  1213. pFile->locktype = PENDING_LOCK;
  1214. pLock->locktype = PENDING_LOCK;
  1215. }
  1216. end_lock:
  1217. leaveMutex();
  1218. OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
  1219. rc==SQLITE_OK ? "ok" : "failed");
  1220. return rc;
  1221. }
  1222. /*
  1223. ** Lower the locking level on file descriptor pFile to locktype. locktype
  1224. ** must be either NO_LOCK or SHARED_LOCK.
  1225. **
  1226. ** If the locking level of the file descriptor is already at or below
  1227. ** the requested locking level, this routine is a no-op.
  1228. */
  1229. static int unixUnlock(sqlite3_file *id, int locktype){
  1230. struct lockInfo *pLock;
  1231. struct flock lock;
  1232. int rc = SQLITE_OK;
  1233. unixFile *pFile = (unixFile*)id;
  1234. assert( pFile );
  1235. OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
  1236. pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
  1237. assert( locktype<=SHARED_LOCK );
  1238. if( pFile->locktype<=locktype ){
  1239. return SQLITE_OK;
  1240. }
  1241. if( CHECK_THREADID(pFile) ){
  1242. return SQLITE_MISUSE;
  1243. }
  1244. enterMutex();
  1245. pLock = pFile->pLock;
  1246. assert( pLock->cnt!=0 );
  1247. if( pFile->locktype>SHARED_LOCK ){
  1248. assert( pLock->locktype==pFile->locktype );
  1249. if( locktype==SHARED_LOCK ){
  1250. lock.l_type = F_RDLCK;
  1251. lock.l_whence = SEEK_SET;
  1252. lock.l_start = SHARED_FIRST;
  1253. lock.l_len = SHARED_SIZE;
  1254. if( fcntl(pFile->h, F_SETLK, &lock)==(-1) ){
  1255. /* This should never happen */
  1256. rc = SQLITE_IOERR_RDLOCK;
  1257. }
  1258. }
  1259. lock.l_type = F_UNLCK;
  1260. lock.l_whence = SEEK_SET;
  1261. lock.l_start = PENDING_BYTE;
  1262. lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
  1263. if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
  1264. pLock->locktype = SHARED_LOCK;
  1265. }else{
  1266. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1267. }
  1268. }
  1269. if( locktype==NO_LOCK ){
  1270. struct openCnt *pOpen;
  1271. /* Decrement the shared lock counter. Release the lock using an
  1272. ** OS call only when all threads in this same process have released
  1273. ** the lock.
  1274. */
  1275. pLock->cnt--;
  1276. if( pLock->cnt==0 ){
  1277. lock.l_type = F_UNLCK;
  1278. lock.l_whence = SEEK_SET;
  1279. lock.l_start = lock.l_len = 0L;
  1280. if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
  1281. pLock->locktype = NO_LOCK;
  1282. }else{
  1283. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1284. }
  1285. }
  1286. /* Decrement the count of locks against this same file. When the
  1287. ** count reaches zero, close any other file descriptors whose close
  1288. ** was deferred because of outstanding locks.
  1289. */
  1290. pOpen = pFile->pOpen;
  1291. pOpen->nLock--;
  1292. assert( pOpen->nLock>=0 );
  1293. if( pOpen->nLock==0 && pOpen->nPending>0 ){
  1294. int i;
  1295. for(i=0; i<pOpen->nPending; i++){
  1296. close(pOpen->aPending[i]);
  1297. }
  1298. free(pOpen->aPending);
  1299. pOpen->nPending = 0;
  1300. pOpen->aPending = 0;
  1301. }
  1302. }
  1303. leaveMutex();
  1304. pFile->locktype = locktype;
  1305. return rc;
  1306. }
  1307. /*
  1308. ** Close a file.
  1309. */
  1310. static int unixClose(sqlite3_file *id){
  1311. unixFile *pFile = (unixFile *)id;
  1312. if( !pFile ) return SQLITE_OK;
  1313. unixUnlock(id, NO_LOCK);
  1314. if( pFile->dirfd>=0 ) close(pFile->dirfd);
  1315. pFile->dirfd = -1;
  1316. enterMutex();
  1317. if( pFile->pOpen->nLock ){
  1318. /* If there are outstanding locks, do not actually close the file just
  1319. ** yet because that would clear those locks. Instead, add the file
  1320. ** descriptor to pOpen->aPending. It will be automatically closed when
  1321. ** the last lock is cleared.
  1322. */
  1323. int *aNew;
  1324. struct openCnt *pOpen = pFile->pOpen;
  1325. aNew = realloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
  1326. if( aNew==0 ){
  1327. /* If a malloc fails, just leak the file descriptor */
  1328. }else{
  1329. pOpen->aPending = aNew;
  1330. pOpen->aPending[pOpen->nPending] = pFile->h;
  1331. pOpen->nPending++;
  1332. }
  1333. }else{
  1334. /* There are no outstanding locks so we can close the file immediately */
  1335. close(pFile->h);
  1336. }
  1337. releaseLockInfo(pFile->pLock);
  1338. releaseOpenCnt(pFile->pOpen);
  1339. leaveMutex();
  1340. OSTRACE2("CLOSE %-3d\n", pFile->h);
  1341. OpenCounter(-1);
  1342. memset(pFile, 0, sizeof(unixFile));
  1343. return SQLITE_OK;
  1344. }
  1345. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  1346. #pragma mark AFP Support
  1347. /*
  1348. ** The afpLockingContext structure contains all afp lock specific state
  1349. */
  1350. typedef struct afpLockingContext afpLockingContext;
  1351. struct afpLockingContext {
  1352. unsigned long long sharedLockByte;
  1353. char *filePath;
  1354. };
  1355. struct ByteRangeLockPB2
  1356. {
  1357. unsigned long long offset; /* offset to first byte to lock */
  1358. unsigned long long length; /* nbr of bytes to lock */
  1359. unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
  1360. unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
  1361. unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
  1362. int fd; /* file desc to assoc this lock with */
  1363. };
  1364. #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
  1365. /*
  1366. ** Return 0 on success, 1 on failure. To match the behavior of the
  1367. ** normal posix file locking (used in unixLock for example), we should
  1368. ** provide 'richer' return codes - specifically to differentiate between
  1369. ** 'file busy' and 'file system error' results.
  1370. */
  1371. static int _AFPFSSetLock(
  1372. const char *path,
  1373. int fd,
  1374. unsigned long long offset,
  1375. unsigned long long length,
  1376. int setLockFlag
  1377. ){
  1378. struct ByteRangeLockPB2 pb;
  1379. int err;
  1380. pb.unLockFlag = setLockFlag ? 0 : 1;
  1381. pb.startEndFlag = 0;
  1382. pb.offset = offset;
  1383. pb.length = length;
  1384. pb.fd = fd;
  1385. OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n",
  1386. (setLockFlag?"ON":"OFF"), fd, offset, length);
  1387. err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
  1388. if ( err==-1 ) {
  1389. OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, errno,
  1390. strerror(errno));
  1391. return 1; /* error */
  1392. } else {
  1393. return 0;
  1394. }
  1395. }
  1396. /*
  1397. ** This routine checks if there is a RESERVED lock held on the specified
  1398. ** file by this or any other process. If such a lock is held, return
  1399. ** non-zero. If the file is unlocked or holds only SHARED locks, then
  1400. ** return zero.
  1401. */
  1402. static int afpUnixCheckReservedLock(sqlite3_file *id){
  1403. int r = 0;
  1404. unixFile *pFile = (unixFile*)id;
  1405. assert( pFile );
  1406. afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1407. /* Check if a thread in this process holds such a lock */
  1408. if( pFile->locktype>SHARED_LOCK ){
  1409. r = 1;
  1410. }
  1411. /* Otherwise see if some other process holds it.
  1412. */
  1413. if ( !r ) {
  1414. /* lock the byte */
  1415. int failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
  1416. if (failed) {
  1417. /* if we failed to get the lock then someone else must have it */
  1418. r = 1;
  1419. } else {
  1420. /* if we succeeded in taking the reserved lock, unlock it to restore
  1421. ** the original state */
  1422. _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0);
  1423. }
  1424. }
  1425. OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
  1426. return r;
  1427. }
  1428. /* AFP-style locking following the behavior of unixLock, see the unixLock
  1429. ** function comments for details of lock management. */
  1430. static int afpUnixLock(sqlite3_file *id, int locktype)
  1431. {
  1432. int rc = SQLITE_OK;
  1433. unixFile *pFile = (unixFile*)id;
  1434. afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1435. int gotPendingLock = 0;
  1436. assert( pFile );
  1437. OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
  1438. locktypeName(locktype), locktypeName(pFile->locktype), getpid());
  1439. /* If there is already a lock of this type or more restrictive on the
  1440. ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
  1441. ** enterMutex() hasn't been called yet.
  1442. */
  1443. if( pFile->locktype>=locktype ){
  1444. OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
  1445. locktypeName(locktype));
  1446. return SQLITE_OK;
  1447. }
  1448. /* Make sure the locking sequence is correct
  1449. */
  1450. assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
  1451. assert( locktype!=PENDING_LOCK );
  1452. assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
  1453. /* This mutex is needed because pFile->pLock is shared across threads
  1454. */
  1455. enterMutex();
  1456. /* Make sure the current thread owns the pFile.
  1457. */
  1458. rc = transferOwnership(pFile);
  1459. if( rc!=SQLITE_OK ){
  1460. leaveMutex();
  1461. return rc;
  1462. }
  1463. /* A PENDING lock is needed before acquiring a SHARED lock and before
  1464. ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
  1465. ** be released.
  1466. */
  1467. if( locktype==SHARED_LOCK
  1468. || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
  1469. ){
  1470. int failed = _AFPFSSetLock(context->filePath, pFile->h,
  1471. PENDING_BYTE, 1, 1);
  1472. if (failed) {
  1473. rc = SQLITE_BUSY;
  1474. goto afp_end_lock;
  1475. }
  1476. }
  1477. /* If control gets to this point, then actually go ahead and make
  1478. ** operating system calls for the specified lock.
  1479. */
  1480. if( locktype==SHARED_LOCK ){
  1481. int lk, failed;
  1482. int tries = 0;
  1483. /* Now get the read-lock */
  1484. /* note that the quality of the randomness doesn't matter that much */
  1485. lk = random();
  1486. context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
  1487. failed = _AFPFSSetLock(context->filePath, pFile->h,
  1488. SHARED_FIRST+context->sharedLockByte, 1, 1);
  1489. /* Drop the temporary PENDING lock */
  1490. if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)) {
  1491. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1492. goto afp_end_lock;
  1493. }
  1494. if( failed ){
  1495. rc = SQLITE_BUSY;
  1496. } else {
  1497. pFile->locktype = SHARED_LOCK;
  1498. }
  1499. }else{
  1500. /* The request was for a RESERVED or EXCLUSIVE lock. It is
  1501. ** assumed that there is a SHARED or greater lock on the file
  1502. ** already.
  1503. */
  1504. int failed = 0;
  1505. assert( 0!=pFile->locktype );
  1506. if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
  1507. /* Acquire a RESERVED lock */
  1508. failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
  1509. }
  1510. if (!failed && locktype == EXCLUSIVE_LOCK) {
  1511. /* Acquire an EXCLUSIVE lock */
  1512. /* Remove the shared lock before trying the range. we'll need to
  1513. ** reestablish the shared lock if we can't get the afpUnixUnlock
  1514. */
  1515. if (!_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
  1516. context->sharedLockByte, 1, 0)) {
  1517. /* now attemmpt to get the exclusive lock range */
  1518. failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
  1519. SHARED_SIZE, 1);
  1520. if (failed && _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
  1521. context->sharedLockByte, 1, 1)) {
  1522. rc = SQLITE_IOERR_RDLOCK; /* this should never happen */
  1523. }
  1524. } else {
  1525. /* */
  1526. rc = SQLITE_IOERR_UNLOCK; /* this should never happen */
  1527. }
  1528. }
  1529. if( failed && rc == SQLITE_OK){
  1530. rc = SQLITE_BUSY;
  1531. }
  1532. }
  1533. if( rc==SQLITE_OK ){
  1534. pFile->locktype = locktype;
  1535. }else if( locktype==EXCLUSIVE_LOCK ){
  1536. pFile->locktype = PENDING_LOCK;
  1537. }
  1538. afp_end_lock:
  1539. leaveMutex();
  1540. OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
  1541. rc==SQLITE_OK ? "ok" : "failed");
  1542. return rc;
  1543. }
  1544. /*
  1545. ** Lower the locking level on file descriptor pFile to locktype. locktype
  1546. ** must be either NO_LOCK or SHARED_LOCK.
  1547. **
  1548. ** If the locking level of the file descriptor is already at or below
  1549. ** the requested locking level, this routine is a no-op.
  1550. */
  1551. static int afpUnixUnlock(sqlite3_file *id, int locktype) {
  1552. struct flock lock;
  1553. int rc = SQLITE_OK;
  1554. unixFile *pFile = (unixFile*)id;
  1555. afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  1556. assert( pFile );
  1557. OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
  1558. pFile->locktype, getpid());
  1559. assert( locktype<=SHARED_LOCK );
  1560. if( pFile->locktype<=locktype ){
  1561. return SQLITE_OK;
  1562. }
  1563. if( CHECK_THREADID(pFile) ){
  1564. return SQLITE_MISUSE;
  1565. }
  1566. enterMutex();
  1567. if( pFile->locktype>SHARED_LOCK ){
  1568. if( locktype==SHARED_LOCK ){
  1569. int failed = 0;
  1570. /* unlock the exclusive range - then re-establish the shared lock */
  1571. if (pFile->locktype==EXCLUSIVE_LOCK) {
  1572. failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
  1573. SHARED_SIZE, 0);
  1574. if (!failed) {
  1575. /* successfully removed the exclusive lock */
  1576. if (_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST+
  1577. context->sharedLockByte, 1, 1)) {
  1578. /* failed to re-establish our shared lock */
  1579. rc = SQLITE_IOERR_RDLOCK; /* This should never happen */
  1580. }
  1581. } else {
  1582. /* This should never happen - failed to unlock the exclusive range */
  1583. rc = SQLITE_IOERR_UNLOCK;
  1584. }
  1585. }
  1586. }
  1587. if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
  1588. if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)){
  1589. /* failed to release the pending lock */
  1590. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1591. }
  1592. }
  1593. if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
  1594. if (_AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0)) {
  1595. /* failed to release the reserved lock */
  1596. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1597. }
  1598. }
  1599. }
  1600. if( locktype==NO_LOCK ){
  1601. int failed = _AFPFSSetLock(context->filePath, pFile->h,
  1602. SHARED_FIRST + context->sharedLockByte, 1, 0);
  1603. if (failed) {
  1604. rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
  1605. }
  1606. }
  1607. if (rc == SQLITE_OK)
  1608. pFile->locktype = locktype;
  1609. leaveMutex();
  1610. return rc;
  1611. }
  1612. /*
  1613. ** Close a file & cleanup AFP specific locking context
  1614. */
  1615. static int afpUnixClose(sqlite3_file *id) {
  1616. unixFile *pFile = (unixFile*)pId;
  1617. if( !pFile ) return SQLITE_OK;
  1618. afpUnixUnlock(*pId, NO_LOCK);
  1619. /* free the AFP locking structure */
  1620. if (pFile->lockingContext != NULL) {
  1621. if (((afpLockingContext *)pFile->lockingContext)->filePath != NULL)
  1622. sqlite3_free(((afpLockingContext*)pFile->lockingContext)->filePath);
  1623. sqlite3_free(pFile->lockingContext);
  1624. }
  1625. if( pFile->dirfd>=0 ) close(pFile->dirfd);
  1626. pFile->dirfd = -1;
  1627. close(pFile->h);
  1628. OSTRACE2("CLOSE %-3d\n", pFile->h);
  1629. OpenCounter(-1);
  1630. return SQLITE_OK;
  1631. }
  1632. #pragma mark flock() style locking
  1633. /*
  1634. ** The flockLockingContext is not used
  1635. */
  1636. typedef void flockLockingContext;
  1637. static int flockUnixCheckReservedLock(sqlite3_file *id) {
  1638. unixFile *pFile = (unixFile*)id;
  1639. if (pFile->locktype == RESERVED_LOCK) {
  1640. return 1; /* already have a reserved lock */
  1641. } else {
  1642. /* attempt to get the lock */
  1643. int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
  1644. if (!rc) {
  1645. /* got the lock, unlock it */
  1646. flock(pFile->h, LOCK_UN);
  1647. return 0; /* no one has it reserved */
  1648. }
  1649. return 1; /* someone else might have it reserved */
  1650. }
  1651. }
  1652. static int flockUnixLock(sqlite3_file *id, int locktype) {
  1653. unixFile *pFile = (unixFile*)id;
  1654. /* if we already have a lock, it is exclusive.
  1655. ** Just adjust level and punt on outta here. */
  1656. if (pFile->locktype > NO_LOCK) {
  1657. pFile->locktype = locktype;
  1658. return SQLITE_OK;
  1659. }
  1660. /* grab an exclusive lock */
  1661. int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
  1662. if (rc) {
  1663. /* didn't get, must be busy */
  1664. return SQLITE_BUSY;
  1665. } else {
  1666. /* got it, set the type and return ok */
  1667. pFile->locktype = locktype;
  1668. return SQLITE_OK;
  1669. }
  1670. }
  1671. static int flockUnixUnlock(sqlite3_file *id, int locktype) {
  1672. unixFile *pFile = (unixFile*)id;
  1673. assert( locktype<=SHARED_LOCK );
  1674. /* no-op if possible */
  1675. if( pFile->locktype==locktype ){
  1676. return SQLITE_OK;
  1677. }
  1678. /* shared can just be set because we always have an exclusive */
  1679. if (locktype==SHARED_LOCK) {
  1680. pFile->locktype = locktype;
  1681. return SQLITE_OK;
  1682. }
  1683. /* no, really, unlock. */
  1684. int rc = flock(pFile->h, LOCK_UN);
  1685. if (rc)
  1686. return SQLITE_IOERR_UNLOCK;
  1687. else {
  1688. pFile->locktype = NO_LOCK;
  1689. return SQLITE_OK;
  1690. }
  1691. }
  1692. /*
  1693. ** Close a file.
  1694. */
  1695. static int flockUnixClose(sqlite3_file *pId) {
  1696. unixFile *pFile = (unixFile*)*pId;
  1697. if( !pFile ) return SQLITE_OK;
  1698. flockUnixUnlock(*pId, NO_LOCK);
  1699. if( pFile->dirfd>=0 ) close(pFile->dirfd);
  1700. pFile->dirfd = -1;
  1701. enterMutex();
  1702. close(pFile->h);
  1703. leaveMutex();
  1704. OSTRACE2("CLOSE %-3d\n", pFile->h);
  1705. OpenCounter(-1);
  1706. return SQLITE_OK;
  1707. }
  1708. #pragma mark Old-School .lock file based locking
  1709. /*
  1710. ** The dotlockLockingContext structure contains all dotlock (.lock) lock
  1711. ** specific state
  1712. */
  1713. typedef struct dotlockLockingContext dotlockLockingContext;
  1714. struct dotlockLockingContext {
  1715. char *lockPath;
  1716. };
  1717. static int dotlockUnixCheckReservedLock(sqlite3_file *id) {
  1718. unixFile *pFile = (unixFile*)id;
  1719. dotlockLockingContext *context =
  1720. (dotlockLockingContext *) pFile->lockingContext;
  1721. if (pFile->locktype == RESERVED_LOCK) {
  1722. return 1; /* already have a reserved lock */
  1723. } else {
  1724. struct stat statBuf;
  1725. if (lstat(context->lockPath,&statBuf) == 0)
  1726. /* file exists, someone else has the lock */
  1727. return 1;
  1728. else
  1729. /* file does not exist, we could have it if we want it */
  1730. return 0;
  1731. }
  1732. }
  1733. static int dotlockUnixLock(sqlite3_file *id, int locktype) {
  1734. unixFile *pFile = (unixFile*)id;
  1735. dotlockLockingContext *context =
  1736. (dotlockLockingContext *) pFile->lockingContext;
  1737. /* if we already have a lock, it is exclusive.
  1738. ** Just adjust level and punt on outta here. */
  1739. if (pFile->locktype > NO_LOCK) {
  1740. pFile->locktype = locktype;
  1741. /* Always update the timestamp on the old file */
  1742. utimes(context->lockPath,NULL);
  1743. return SQLITE_OK;
  1744. }
  1745. /* check to see if lock file already exists */
  1746. struct stat statBuf;
  1747. if (lstat(context->lockPath,&statBuf) == 0){
  1748. return SQLITE_BUSY; /* it does, busy */
  1749. }
  1750. /* grab an exclusive lock */
  1751. int fd = open(context->lockPath,O_RDONLY|O_CREAT|O_EXCL,0600);
  1752. if (fd < 0) {
  1753. /* failed to open/create the file, someone else may have stolen the lock */
  1754. return SQLITE_BUSY;
  1755. }
  1756. close(fd);
  1757. /* got it, set the type and return ok */
  1758. pFile->locktype = locktype;
  1759. return SQLITE_OK;
  1760. }
  1761. static int dotlockUnixUnlock(sqlite3_file *id, int locktype) {
  1762. unixFile *pFile = (unixFile*)id;
  1763. dotlockLockingContext *context =
  1764. (dotlockLockingContext *) pFile->lockingContext;
  1765. assert( locktype<=SHARED_LOCK );
  1766. /* no-op if possible */
  1767. if( pFile->locktype==locktype ){
  1768. return SQLITE_OK;
  1769. }
  1770. /* shared can just be set because we always have an exclusive */
  1771. if (locktype==SHARED_LOCK) {
  1772. pFile->locktype = locktype;
  1773. return SQLITE_OK;
  1774. }
  1775. /* no, really, unlock. */
  1776. unlink(context->lockPath);
  1777. pFile->locktype = NO_LOCK;
  1778. return SQLITE_OK;
  1779. }
  1780. /*
  1781. ** Close a file.
  1782. */
  1783. static int dotlockUnixClose(sqlite3_file *id) {
  1784. unixFile *pFile = (unixFile*)id;
  1785. if( !pFile ) return SQLITE_OK;
  1786. dotlockUnixUnlock(*pId, NO_LOCK);
  1787. /* free the dotlock locking structure */
  1788. if (pFile->lockingContext != NULL) {
  1789. if (((dotlockLockingContext *)pFile->lockingContext)->lockPath != NULL)
  1790. sqlite3_free( ( (dotlockLockingContext *)
  1791. pFile->lockingContext)->lockPath);
  1792. sqlite3_free(pFile->lockingContext);
  1793. }
  1794. if( pFile->dirfd>=0 ) close(pFile->dirfd);
  1795. pFile->dirfd = -1;
  1796. enterMutex();
  1797. close(pFile->h);
  1798. leaveMutex();
  1799. OSTRACE2("CLOSE %-3d\n", pFile->h);
  1800. OpenCounter(-1);
  1801. return SQLITE_OK;
  1802. }
  1803. #pragma mark No locking
  1804. /*
  1805. ** The nolockLockingContext is void
  1806. */
  1807. typedef void nolockLockingContext;
  1808. static int nolockUnixCheckReservedLock(sqlite3_file *id) {
  1809. return 0;
  1810. }
  1811. static int nolockUnixLock(sqlite3_file *id, int locktype) {
  1812. return SQLITE_OK;
  1813. }
  1814. static int nolockUnixUnlock(sqlite3_file *id, int locktype) {
  1815. return SQLITE_OK;
  1816. }
  1817. /*
  1818. ** Close a file.
  1819. */
  1820. static int nolockUnixClose(sqlite3_file *id) {
  1821. unixFile *pFile = (unixFile*)id;
  1822. if( !pFile ) return SQLITE_OK;
  1823. if( pFile->dirfd>=0 ) close(pFile->dirfd);
  1824. pFile->dirfd = -1;
  1825. enterMutex();
  1826. close(pFile->h);
  1827. leaveMutex();
  1828. OSTRACE2("CLOSE %-3d\n", pFile->h);
  1829. OpenCounter(-1);
  1830. return SQLITE_OK;
  1831. }
  1832. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  1833. /*
  1834. ** Information and control of an open file handle.
  1835. */
  1836. static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  1837. switch( op ){
  1838. case SQLITE_FCNTL_LOCKSTATE: {
  1839. *(int*)pArg = ((unixFile*)id)->locktype;
  1840. return SQLITE_OK;
  1841. }
  1842. }
  1843. return SQLITE_ERROR;
  1844. }
  1845. /*
  1846. ** Return the sector size in bytes of the underlying block device for
  1847. ** the specified file. This is almost always 512 bytes, but may be
  1848. ** larger for some devices.
  1849. **
  1850. ** SQLite code assumes this function cannot fail. It also assumes that
  1851. ** if two files are created in the same file-system directory (i.e.
  1852. ** a database and its journal file) that the sector size will be the
  1853. ** same for both.
  1854. */
  1855. static int unixSectorSize(sqlite3_file *id){
  1856. return SQLITE_DEFAULT_SECTOR_SIZE;
  1857. }
  1858. /*
  1859. ** Return the device characteristics for the file. This is always 0.
  1860. */
  1861. static int unixDeviceCharacteristics(sqlite3_file *id){
  1862. return 0;
  1863. }
  1864. /*
  1865. ** This vector defines all the methods that can operate on an sqlite3_file
  1866. ** for unix.
  1867. */
  1868. static const sqlite3_io_methods sqlite3UnixIoMethod = {
  1869. 1, /* iVersion */
  1870. unixClose,
  1871. unixRead,
  1872. unixWrite,
  1873. unixTruncate,
  1874. unixSync,
  1875. unixFileSize,
  1876. unixLock,
  1877. unixUnlock,
  1878. unixCheckReservedLock,
  1879. unixFileControl,
  1880. unixSectorSize,
  1881. unixDeviceCharacteristics
  1882. };
  1883. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  1884. /*
  1885. ** This vector defines all the methods that can operate on an sqlite3_file
  1886. ** for unix with AFP style file locking.
  1887. */
  1888. static const sqlite3_io_methods sqlite3AFPLockingUnixIoMethod = {
  1889. 1, /* iVersion */
  1890. unixClose,
  1891. unixRead,
  1892. unixWrite,
  1893. unixTruncate,
  1894. unixSync,
  1895. unixFileSize,
  1896. afpUnixLock,
  1897. afpUnixUnlock,
  1898. afpUnixCheckReservedLock,
  1899. unixFileControl,
  1900. unixSectorSize,
  1901. unixDeviceCharacteristics
  1902. };
  1903. /*
  1904. ** This vector defines all the methods that can operate on an sqlite3_file
  1905. ** for unix with flock() style file locking.
  1906. */
  1907. static const sqlite3_io_methods sqlite3FlockLockingUnixIoMethod = {
  1908. 1, /* iVersion */
  1909. flockUnixClose,
  1910. unixRead,
  1911. unixWrite,
  1912. unixTruncate,
  1913. unixSync,
  1914. unixFileSize,
  1915. flockUnixLock,
  1916. flockUnixUnlock,
  1917. flockUnixCheckReservedLock,
  1918. unixFileControl,
  1919. unixSectorSize,
  1920. unixDeviceCharacteristics
  1921. };
  1922. /*
  1923. ** This vector defines all the methods that can operate on an sqlite3_file
  1924. ** for unix with dotlock style file locking.
  1925. */
  1926. static const sqlite3_io_methods sqlite3DotlockLockingUnixIoMethod = {
  1927. 1, /* iVersion */
  1928. dotlockUnixClose,
  1929. unixRead,
  1930. unixWrite,
  1931. unixTruncate,
  1932. unixSync,
  1933. unixFileSize,
  1934. dotlockUnixLock,
  1935. dotlockUnixUnlock,
  1936. dotlockUnixCheckReservedLock,
  1937. unixFileControl,
  1938. unixSectorSize,
  1939. unixDeviceCharacteristics
  1940. };
  1941. /*
  1942. ** This vector defines all the methods that can operate on an sqlite3_file
  1943. ** for unix with dotlock style file locking.
  1944. */
  1945. static const sqlite3_io_methods sqlite3NolockLockingUnixIoMethod = {
  1946. 1, /* iVersion */
  1947. nolockUnixClose,
  1948. unixRead,
  1949. unixWrite,
  1950. unixTruncate,
  1951. unixSync,
  1952. unixFileSize,
  1953. nolockUnixLock,
  1954. nolockUnixUnlock,
  1955. nolockUnixCheckReservedLock,
  1956. unixFileControl,
  1957. unixSectorSize,
  1958. unixDeviceCharacteristics
  1959. };
  1960. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  1961. /*
  1962. ** Allocate memory for a new unixFile and initialize that unixFile.
  1963. ** Write a pointer to the new unixFile into *pId.
  1964. ** If we run out of memory, close the file and return an error.
  1965. */
  1966. #ifdef SQLITE_ENABLE_LOCKING_STYLE
  1967. /*
  1968. ** When locking extensions are enabled, the filepath and locking style
  1969. ** are needed to determine the unixFile pMethod to use for locking operations.
  1970. ** The locking-style specific lockingContext data structure is created
  1971. ** and assigned here also.
  1972. */
  1973. static int fillInUnixFile(
  1974. int h, /* Open file descriptor of file being opened */
  1975. int dirfd, /* Directory file descriptor */
  1976. sqlite3_file *pId, /* Write completed initialization here */
  1977. const char *zFilename, /* Name of the file being opened */
  1978. ){
  1979. sqlite3LockingStyle lockingStyle;
  1980. unixFile *pNew = (unixFile *)pId;
  1981. int rc;
  1982. memset(pNew, 0, sizeof(unixFile));
  1983. lockingStyle = sqlite3DetectLockingStyle(zFilename, h);
  1984. if ( lockingStyle == posixLockingStyle ) {
  1985. enterMutex();
  1986. rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
  1987. leaveMutex();
  1988. if( rc ){
  1989. close(h);
  1990. unlink(zFilename);
  1991. return SQLITE_NOMEM;
  1992. }
  1993. } else {
  1994. /* pLock and pOpen are only used for posix advisory locking */
  1995. pNew->pLock = NULL;
  1996. pNew->pOpen = NULL;
  1997. }
  1998. pNew->dirfd = -1;
  1999. pNew->h = h;
  2000. SET_THREADID(pNew);
  2001. pNew = sqlite3_malloc( sizeof(unixFile) );
  2002. if( pNew==0 ){
  2003. close(h);
  2004. enterMutex();
  2005. releaseLockInfo(pNew->pLock);
  2006. releaseOpenCnt(pNew->pOpen);
  2007. leaveMutex();
  2008. return SQLITE_NOMEM;
  2009. }else{
  2010. switch(lockingStyle) {
  2011. case afpLockingStyle: {
  2012. /* afp locking uses the file path so it needs to be included in
  2013. ** the afpLockingContext */
  2014. int nFilename;
  2015. pNew->pMethod = &sqlite3AFPLockingUnixIoMethod;
  2016. pNew->lockingContext =
  2017. sqlite3_malloc(sizeof(afpLockingContext));
  2018. nFilename = strlen(zFilename)+1;
  2019. ((afpLockingContext *)pNew->lockingContext)->filePath =
  2020. sqlite3_malloc(nFilename);
  2021. memcpy(((afpLockingContext *)pNew->lockingContext)->filePath,
  2022. zFilename, nFilename);
  2023. srandomdev();
  2024. break;
  2025. }
  2026. case flockLockingStyle:
  2027. /* flock locking doesn't need additional lockingContext information */
  2028. pNew->pMethod = &sqlite3FlockLockingUnixIoMethod;
  2029. break;
  2030. case dotlockLockingStyle: {
  2031. /* dotlock locking uses the file path so it needs to be included in
  2032. ** the dotlockLockingContext */
  2033. int nFilename;
  2034. pNew->pMethod = &sqlite3DotlockLockingUnixIoMethod;
  2035. pNew->lockingContext = sqlite3_malloc(
  2036. sizeof(dotlockLockingContext));
  2037. nFilename = strlen(zFilename) + 6;
  2038. ((dotlockLockingContext *)pNew->lockingContext)->lockPath =
  2039. sqlite3_malloc( nFilename );
  2040. sqlite3_snprintf(nFilename,
  2041. ((dotlockLockingContext *)pNew->lockingContext)->lockPath,
  2042. "%s.lock", zFilename);
  2043. break;
  2044. }
  2045. case posixLockingStyle:
  2046. /* posix locking doesn't need additional lockingContext information */
  2047. pNew->pMethod = &sqlite3UnixIoMethod;
  2048. break;
  2049. case noLockingStyle:
  2050. case unsupportedLockingStyle:
  2051. default:
  2052. pNew->pMethod = &sqlite3NolockLockingUnixIoMethod;
  2053. }
  2054. OpenCounter(+1);
  2055. return SQLITE_OK;
  2056. }
  2057. }
  2058. #else /* SQLITE_ENABLE_LOCKING_STYLE */
  2059. static int fillInUnixFile(
  2060. int h, /* Open file descriptor on file being opened */
  2061. int dirfd,
  2062. sqlite3_file *pId, /* Write to the unixFile structure here */
  2063. const char *zFilename /* Name of the file being opened */
  2064. ){
  2065. unixFile *pNew = (unixFile *)pId;
  2066. int rc;
  2067. #ifdef FD_CLOEXEC
  2068. fcntl(h, F_SETFD, fcntl(h, F_GETFD, 0) | FD_CLOEXEC);
  2069. #endif
  2070. enterMutex();
  2071. rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
  2072. leaveMutex();
  2073. if( rc ){
  2074. if( dirfd>=0 ) close(dirfd);
  2075. close(h);
  2076. return SQLITE_NOMEM;
  2077. }
  2078. OSTRACE3("OPEN %-3d %s\n", h, zFilename);
  2079. pNew->dirfd = -1;
  2080. pNew->h = h;
  2081. pNew->dirfd = dirfd;
  2082. SET_THREADID(pNew);
  2083. pNew->pMethod = &sqlite3UnixIoMethod;
  2084. OpenCounter(+1);
  2085. return SQLITE_OK;
  2086. }
  2087. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  2088. /*
  2089. ** Open a file descriptor to the directory containing file zFilename.
  2090. ** If successful, *pFd is set to the opened file descriptor and
  2091. ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
  2092. ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
  2093. ** value.
  2094. **
  2095. ** If SQLITE_OK is returned, the caller is responsible for closing
  2096. ** the file descriptor *pFd using close().
  2097. */
  2098. static int openDirectory(const char *zFilename, int *pFd){
  2099. int ii;
  2100. int fd = -1;
  2101. char zDirname[MAX_PATHNAME+1];
  2102. sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
  2103. for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
  2104. if( ii>0 ){
  2105. zDirname[ii] = '\0';
  2106. fd = open(zDirname, O_RDONLY|O_BINARY, 0);
  2107. if( fd>=0 ){
  2108. #ifdef FD_CLOEXEC
  2109. fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  2110. #endif
  2111. OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
  2112. }
  2113. }
  2114. *pFd = fd;
  2115. return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
  2116. }
  2117. /*
  2118. ** Open the file zPath.
  2119. **
  2120. ** Previously, the SQLite OS layer used three functions in place of this
  2121. ** one:
  2122. **
  2123. ** sqlite3OsOpenReadWrite();
  2124. ** sqlite3OsOpenReadOnly();
  2125. ** sqlite3OsOpenExclusive();
  2126. **
  2127. ** These calls correspond to the following combinations of flags:
  2128. **
  2129. ** ReadWrite() -> (READWRITE | CREATE)
  2130. ** ReadOnly() -> (READONLY)
  2131. ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
  2132. **
  2133. ** The old OpenExclusive() accepted a boolean argument - "delFlag". If
  2134. ** true, the file was configured to be automatically deleted when the
  2135. ** file handle closed. To achieve the same effect using this new
  2136. ** interface, add the DELETEONCLOSE flag to those specified above for
  2137. ** OpenExclusive().
  2138. */
  2139. static int unixOpen(
  2140. sqlite3_vfs *pVfs,
  2141. const char *zPath,
  2142. sqlite3_file *pFile,
  2143. int flags,
  2144. int *pOutFlags
  2145. ){
  2146. int fd = 0; /* File descriptor returned by open() */
  2147. int dirfd = -1; /* Directory file descriptor */
  2148. int oflags = 0; /* Flags to pass to open() */
  2149. int eType = flags&0xFFFFFF00; /* Type of file to open */
  2150. int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
  2151. int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
  2152. int isCreate = (flags & SQLITE_OPEN_CREATE);
  2153. int isReadonly = (flags & SQLITE_OPEN_READONLY);
  2154. int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
  2155. /* If creating a master or main-file journal, this function will open
  2156. ** a file-descriptor on the directory too. The first time unixSync()
  2157. ** is called the directory file descriptor will be fsync()ed and close()d.
  2158. */
  2159. int isOpenDirectory = (isCreate &&
  2160. (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
  2161. );
  2162. /* Check the following statements are true:
  2163. **
  2164. ** (a) Exactly one of the READWRITE and READONLY flags must be set, and
  2165. ** (b) if CREATE is set, then READWRITE must also be set, and
  2166. ** (c) if EXCLUSIVE is set, then CREATE must also be set.
  2167. ** (d) if DELETEONCLOSE is set, then CREATE must also be set.
  2168. */
  2169. assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
  2170. assert(isCreate==0 || isReadWrite);
  2171. assert(isExclusive==0 || isCreate);
  2172. assert(isDelete==0 || isCreate);
  2173. /* The main DB, main journal, and master journal are never automatically
  2174. ** deleted
  2175. */
  2176. assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
  2177. assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
  2178. assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
  2179. /* Assert that the upper layer has set one of the "file-type" flags. */
  2180. assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
  2181. || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
  2182. || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
  2183. || eType==SQLITE_OPEN_TRANSIENT_DB
  2184. );
  2185. if( isReadonly ) oflags |= O_RDONLY;
  2186. if( isReadWrite ) oflags |= O_RDWR;
  2187. if( isCreate ) oflags |= O_CREAT;
  2188. if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
  2189. oflags |= (O_LARGEFILE|O_BINARY);
  2190. memset(pFile, 0, sizeof(unixFile));
  2191. fd = open(zPath, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
  2192. if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
  2193. /* Failed to open the file for read/write access. Try read-only. */
  2194. flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
  2195. flags |= SQLITE_OPEN_READONLY;
  2196. return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
  2197. }
  2198. if( fd<0 ){
  2199. return SQLITE_CANTOPEN;
  2200. }
  2201. if( isDelete ){
  2202. unlink(zPath);
  2203. }
  2204. if( pOutFlags ){
  2205. *pOutFlags = flags;
  2206. }
  2207. assert(fd!=0);
  2208. if( isOpenDirectory ){
  2209. int rc = openDirectory(zPath, &dirfd);
  2210. if( rc!=SQLITE_OK ){
  2211. close(fd);
  2212. return rc;
  2213. }
  2214. }
  2215. return fillInUnixFile(fd, dirfd, pFile, zPath);
  2216. }
  2217. /*
  2218. ** Delete the file at zPath. If the dirSync argument is true, fsync()
  2219. ** the directory after deleting the file.
  2220. */
  2221. static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
  2222. int rc = SQLITE_OK;
  2223. SimulateIOError(return SQLITE_IOERR_DELETE);
  2224. unlink(zPath);
  2225. if( dirSync ){
  2226. int fd;
  2227. rc = openDirectory(zPath, &fd);
  2228. if( rc==SQLITE_OK ){
  2229. if( fsync(fd) ){
  2230. rc = SQLITE_IOERR_DIR_FSYNC;
  2231. }
  2232. close(fd);
  2233. }
  2234. }
  2235. return rc;
  2236. }
  2237. /*
  2238. ** Test the existance of or access permissions of file zPath. The
  2239. ** test performed depends on the value of flags:
  2240. **
  2241. ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
  2242. ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
  2243. ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
  2244. **
  2245. ** Otherwise return 0.
  2246. */
  2247. static int unixAccess(sqlite3_vfs *pVfs, const char *zPath, int flags){
  2248. int amode = 0;
  2249. switch( flags ){
  2250. case SQLITE_ACCESS_EXISTS:
  2251. amode = F_OK;
  2252. break;
  2253. case SQLITE_ACCESS_READWRITE:
  2254. amode = W_OK|R_OK;
  2255. break;
  2256. case SQLITE_ACCESS_READ:
  2257. amode = R_OK;
  2258. break;
  2259. default:
  2260. assert(!"Invalid flags argument");
  2261. }
  2262. return (access(zPath, amode)==0);
  2263. }
  2264. /*
  2265. ** Create a temporary file name in zBuf. zBuf must be allocated
  2266. ** by the calling process and must be big enough to hold at least
  2267. ** pVfs->mxPathname bytes.
  2268. */
  2269. static int unixGetTempname(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
  2270. static const char *azDirs[] = {
  2271. 0,
  2272. "/var/tmp",
  2273. "/usr/tmp",
  2274. "/tmp",
  2275. ".",
  2276. };
  2277. static const unsigned char zChars[] =
  2278. "abcdefghijklmnopqrstuvwxyz"
  2279. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  2280. "0123456789";
  2281. int i, j;
  2282. struct stat buf;
  2283. const char *zDir = ".";
  2284. /* It's odd to simulate an io-error here, but really this is just
  2285. ** using the io-error infrastructure to test that SQLite handles this
  2286. ** function failing.
  2287. */
  2288. SimulateIOError( return SQLITE_ERROR );
  2289. azDirs[0] = sqlite3_temp_directory;
  2290. for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
  2291. if( azDirs[i]==0 ) continue;
  2292. if( stat(azDirs[i], &buf) ) continue;
  2293. if( !S_ISDIR(buf.st_mode) ) continue;
  2294. if( access(azDirs[i], 07) ) continue;
  2295. zDir = azDirs[i];
  2296. break;
  2297. }
  2298. if( strlen(zDir) - sizeof(SQLITE_TEMP_FILE_PREFIX) - 17 <=0 ){
  2299. return SQLITE_ERROR;
  2300. }
  2301. do{
  2302. assert( pVfs->mxPathname==MAX_PATHNAME );
  2303. sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
  2304. j = strlen(zBuf);
  2305. sqlite3Randomness(15, &zBuf[j]);
  2306. for(i=0; i<15; i++, j++){
  2307. zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
  2308. }
  2309. zBuf[j] = 0;
  2310. }while( access(zBuf,0)==0 );
  2311. return SQLITE_OK;
  2312. }
  2313. /*
  2314. ** Turn a relative pathname into a full pathname. The relative path
  2315. ** is stored as a nul-terminated string in the buffer pointed to by
  2316. ** zPath.
  2317. **
  2318. ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
  2319. ** (in this case, MAX_PATHNAME bytes). The full-path is written to
  2320. ** this buffer before returning.
  2321. */
  2322. static int unixFullPathname(
  2323. sqlite3_vfs *pVfs, /* Pointer to vfs object */
  2324. const char *zPath, /* Possibly relative input path */
  2325. int nOut, /* Size of output buffer in bytes */
  2326. char *zOut /* Output buffer */
  2327. ){
  2328. /* It's odd to simulate an io-error here, but really this is just
  2329. ** using the io-error infrastructure to test that SQLite handles this
  2330. ** function failing. This function could fail if, for example, the
  2331. ** current working directly has been unlinked.
  2332. */
  2333. SimulateIOError( return SQLITE_ERROR );
  2334. assert( pVfs->mxPathname==MAX_PATHNAME );
  2335. zOut[nOut-1] = '\0';
  2336. if( zPath[0]=='/' ){
  2337. sqlite3_snprintf(nOut, zOut, "%s", zPath);
  2338. }else{
  2339. int nCwd;
  2340. if( getcwd(zOut, nOut-1)==0 ){
  2341. return SQLITE_CANTOPEN;
  2342. }
  2343. nCwd = strlen(zOut);
  2344. sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
  2345. }
  2346. return SQLITE_OK;
  2347. #if 0
  2348. /*
  2349. ** Remove "/./" path elements and convert "/A/./" path elements
  2350. ** to just "/".
  2351. */
  2352. if( zFull ){
  2353. int i, j;
  2354. for(i=j=0; zFull[i]; i++){
  2355. if( zFull[i]=='/' ){
  2356. if( zFull[i+1]=='/' ) continue;
  2357. if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
  2358. i += 1;
  2359. continue;
  2360. }
  2361. if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
  2362. while( j>0 && zFull[j-1]!='/' ){ j--; }
  2363. i += 3;
  2364. continue;
  2365. }
  2366. }
  2367. zFull[j++] = zFull[i];
  2368. }
  2369. zFull[j] = 0;
  2370. }
  2371. #endif
  2372. }
  2373. #ifndef SQLITE_OMIT_LOAD_EXTENSION
  2374. /*
  2375. ** Interfaces for opening a shared library, finding entry points
  2376. ** within the shared library, and closing the shared library.
  2377. */
  2378. #include <dlfcn.h>
  2379. static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
  2380. return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
  2381. }
  2382. /*
  2383. ** SQLite calls this function immediately after a call to unixDlSym() or
  2384. ** unixDlOpen() fails (returns a null pointer). If a more detailed error
  2385. ** message is available, it is written to zBufOut. If no error message
  2386. ** is available, zBufOut is left unmodified and SQLite uses a default
  2387. ** error message.
  2388. */
  2389. static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
  2390. char *zErr;
  2391. enterMutex();
  2392. zErr = dlerror();
  2393. if( zErr ){
  2394. sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
  2395. }
  2396. leaveMutex();
  2397. }
  2398. static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){
  2399. return dlsym(pHandle, zSymbol);
  2400. }
  2401. static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){
  2402. dlclose(pHandle);
  2403. }
  2404. #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
  2405. #define unixDlOpen 0
  2406. #define unixDlError 0
  2407. #define unixDlSym 0
  2408. #define unixDlClose 0
  2409. #endif
  2410. /*
  2411. ** Write nBuf bytes of random data to the supplied buffer zBuf.
  2412. */
  2413. static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
  2414. assert(nBuf>=(sizeof(time_t)+sizeof(int)));
  2415. /* We have to initialize zBuf to prevent valgrind from reporting
  2416. ** errors. The reports issued by valgrind are incorrect - we would
  2417. ** prefer that the randomness be increased by making use of the
  2418. ** uninitialized space in zBuf - but valgrind errors tend to worry
  2419. ** some users. Rather than argue, it seems easier just to initialize
  2420. ** the whole array and silence valgrind, even if that means less randomness
  2421. ** in the random seed.
  2422. **
  2423. ** When testing, initializing zBuf[] to zero is all we do. That means
  2424. ** that we always use the same random number sequence. This makes the
  2425. ** tests repeatable.
  2426. */
  2427. memset(zBuf, 0, nBuf);
  2428. #if !defined(SQLITE_TEST)
  2429. {
  2430. int pid, fd;
  2431. fd = open("/dev/urandom", O_RDONLY);
  2432. if( fd<0 ){
  2433. time_t t;
  2434. time(&t);
  2435. memcpy(zBuf, &t, sizeof(t));
  2436. pid = getpid();
  2437. memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
  2438. }else{
  2439. read(fd, zBuf, nBuf);
  2440. close(fd);
  2441. }
  2442. }
  2443. #endif
  2444. return SQLITE_OK;
  2445. }
  2446. /*
  2447. ** Sleep for a little while. Return the amount of time slept.
  2448. ** The argument is the number of microseconds we want to sleep.
  2449. ** The return value is the number of microseconds of sleep actually
  2450. ** requested from the underlying operating system, a number which
  2451. ** might be greater than or equal to the argument, but not less
  2452. ** than the argument.
  2453. */
  2454. static int unixSleep(sqlite3_vfs *pVfs, int microseconds){
  2455. #if defined(HAVE_USLEEP) && HAVE_USLEEP
  2456. usleep(microseconds);
  2457. return microseconds;
  2458. #else
  2459. int seconds = (microseconds+999999)/1000000;
  2460. sleep(seconds);
  2461. return seconds*1000000;
  2462. #endif
  2463. }
  2464. /*
  2465. ** The following variable, if set to a non-zero value, becomes the result
  2466. ** returned from sqlite3OsCurrentTime(). This is used for testing.
  2467. */
  2468. #ifdef SQLITE_TEST
  2469. int sqlite3_current_time = 0;
  2470. #endif
  2471. /*
  2472. ** Find the current time (in Universal Coordinated Time). Write the
  2473. ** current time and date as a Julian Day number into *prNow and
  2474. ** return 0. Return 1 if the time and date cannot be found.
  2475. */
  2476. static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){
  2477. #ifdef NO_GETTOD
  2478. time_t t;
  2479. time(&t);
  2480. *prNow = t/86400.0 + 2440587.5;
  2481. #else
  2482. struct timeval sNow;
  2483. gettimeofday(&sNow, 0);
  2484. *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
  2485. #endif
  2486. #ifdef SQLITE_TEST
  2487. if( sqlite3_current_time ){
  2488. *prNow = sqlite3_current_time/86400.0 + 2440587.5;
  2489. }
  2490. #endif
  2491. return 0;
  2492. }
  2493. /*
  2494. ** Return a pointer to the sqlite3DefaultVfs structure. We use
  2495. ** a function rather than give the structure global scope because
  2496. ** some compilers (MSVC) do not allow forward declarations of
  2497. ** initialized structures.
  2498. */
  2499. sqlite3_vfs *sqlite3OsDefaultVfs(void){
  2500. static sqlite3_vfs unixVfs = {
  2501. 1, /* iVersion */
  2502. sizeof(unixFile), /* szOsFile */
  2503. MAX_PATHNAME, /* mxPathname */
  2504. 0, /* pNext */
  2505. "unix", /* zName */
  2506. 0, /* pAppData */
  2507. unixOpen, /* xOpen */
  2508. unixDelete, /* xDelete */
  2509. unixAccess, /* xAccess */
  2510. unixGetTempname, /* xGetTempName */
  2511. unixFullPathname, /* xFullPathname */
  2512. unixDlOpen, /* xDlOpen */
  2513. unixDlError, /* xDlError */
  2514. unixDlSym, /* xDlSym */
  2515. unixDlClose, /* xDlClose */
  2516. unixRandomness, /* xRandomness */
  2517. unixSleep, /* xSleep */
  2518. unixCurrentTime /* xCurrentTime */
  2519. };
  2520. return &unixVfs;
  2521. }
  2522. #endif /* OS_UNIX */