tree-walk.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250
  1. #include "cache.h"
  2. #include "tree-walk.h"
  3. #include "dir.h"
  4. #include "object-store.h"
  5. #include "tree.h"
  6. #include "pathspec.h"
  7. static const char *get_mode(const char *str, unsigned int *modep)
  8. {
  9. unsigned char c;
  10. unsigned int mode = 0;
  11. if (*str == ' ')
  12. return NULL;
  13. while ((c = *str++) != ' ') {
  14. if (c < '0' || c > '7')
  15. return NULL;
  16. mode = (mode << 3) + (c - '0');
  17. }
  18. *modep = mode;
  19. return str;
  20. }
  21. static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
  22. {
  23. const char *path;
  24. unsigned int mode, len;
  25. const unsigned hashsz = the_hash_algo->rawsz;
  26. if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
  27. strbuf_addstr(err, _("too-short tree object"));
  28. return -1;
  29. }
  30. path = get_mode(buf, &mode);
  31. if (!path) {
  32. strbuf_addstr(err, _("malformed mode in tree entry"));
  33. return -1;
  34. }
  35. if (!*path) {
  36. strbuf_addstr(err, _("empty filename in tree entry"));
  37. return -1;
  38. }
  39. len = strlen(path) + 1;
  40. /* Initialize the descriptor entry */
  41. desc->entry.path = path;
  42. desc->entry.mode = canon_mode(mode);
  43. desc->entry.pathlen = len - 1;
  44. hashcpy(desc->entry.oid.hash, (const unsigned char *)path + len);
  45. return 0;
  46. }
  47. static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
  48. {
  49. desc->buffer = buffer;
  50. desc->size = size;
  51. if (size)
  52. return decode_tree_entry(desc, buffer, size, err);
  53. return 0;
  54. }
  55. void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  56. {
  57. struct strbuf err = STRBUF_INIT;
  58. if (init_tree_desc_internal(desc, buffer, size, &err))
  59. die("%s", err.buf);
  60. strbuf_release(&err);
  61. }
  62. int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
  63. {
  64. struct strbuf err = STRBUF_INIT;
  65. int result = init_tree_desc_internal(desc, buffer, size, &err);
  66. if (result)
  67. error("%s", err.buf);
  68. strbuf_release(&err);
  69. return result;
  70. }
  71. void *fill_tree_descriptor(struct repository *r,
  72. struct tree_desc *desc,
  73. const struct object_id *oid)
  74. {
  75. unsigned long size = 0;
  76. void *buf = NULL;
  77. if (oid) {
  78. buf = read_object_with_reference(r, oid, tree_type, &size, NULL);
  79. if (!buf)
  80. die("unable to read tree %s", oid_to_hex(oid));
  81. }
  82. init_tree_desc(desc, buf, size);
  83. return buf;
  84. }
  85. static void entry_clear(struct name_entry *a)
  86. {
  87. memset(a, 0, sizeof(*a));
  88. }
  89. static void entry_extract(struct tree_desc *t, struct name_entry *a)
  90. {
  91. *a = t->entry;
  92. }
  93. static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
  94. {
  95. const void *buf = desc->buffer;
  96. const unsigned char *end = (const unsigned char *)desc->entry.path + desc->entry.pathlen + 1 + the_hash_algo->rawsz;
  97. unsigned long size = desc->size;
  98. unsigned long len = end - (const unsigned char *)buf;
  99. if (size < len)
  100. die(_("too-short tree file"));
  101. buf = end;
  102. size -= len;
  103. desc->buffer = buf;
  104. desc->size = size;
  105. if (size)
  106. return decode_tree_entry(desc, buf, size, err);
  107. return 0;
  108. }
  109. void update_tree_entry(struct tree_desc *desc)
  110. {
  111. struct strbuf err = STRBUF_INIT;
  112. if (update_tree_entry_internal(desc, &err))
  113. die("%s", err.buf);
  114. strbuf_release(&err);
  115. }
  116. int update_tree_entry_gently(struct tree_desc *desc)
  117. {
  118. struct strbuf err = STRBUF_INIT;
  119. if (update_tree_entry_internal(desc, &err)) {
  120. error("%s", err.buf);
  121. strbuf_release(&err);
  122. /* Stop processing this tree after error */
  123. desc->size = 0;
  124. return -1;
  125. }
  126. strbuf_release(&err);
  127. return 0;
  128. }
  129. int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  130. {
  131. if (!desc->size)
  132. return 0;
  133. *entry = desc->entry;
  134. update_tree_entry(desc);
  135. return 1;
  136. }
  137. int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
  138. {
  139. if (!desc->size)
  140. return 0;
  141. *entry = desc->entry;
  142. if (update_tree_entry_gently(desc))
  143. return 0;
  144. return 1;
  145. }
  146. void setup_traverse_info(struct traverse_info *info, const char *base)
  147. {
  148. size_t pathlen = strlen(base);
  149. static struct traverse_info dummy;
  150. memset(info, 0, sizeof(*info));
  151. if (pathlen && base[pathlen-1] == '/')
  152. pathlen--;
  153. info->pathlen = pathlen ? pathlen + 1 : 0;
  154. info->name = base;
  155. info->namelen = pathlen;
  156. if (pathlen)
  157. info->prev = &dummy;
  158. }
  159. char *make_traverse_path(char *path, size_t pathlen,
  160. const struct traverse_info *info,
  161. const char *name, size_t namelen)
  162. {
  163. /* Always points to the end of the name we're about to add */
  164. size_t pos = st_add(info->pathlen, namelen);
  165. if (pos >= pathlen)
  166. BUG("too small buffer passed to make_traverse_path");
  167. path[pos] = 0;
  168. for (;;) {
  169. if (pos < namelen)
  170. BUG("traverse_info pathlen does not match strings");
  171. pos -= namelen;
  172. memcpy(path + pos, name, namelen);
  173. if (!pos)
  174. break;
  175. path[--pos] = '/';
  176. if (!info)
  177. BUG("traverse_info ran out of list items");
  178. name = info->name;
  179. namelen = info->namelen;
  180. info = info->prev;
  181. }
  182. return path;
  183. }
  184. void strbuf_make_traverse_path(struct strbuf *out,
  185. const struct traverse_info *info,
  186. const char *name, size_t namelen)
  187. {
  188. size_t len = traverse_path_len(info, namelen);
  189. strbuf_grow(out, len);
  190. make_traverse_path(out->buf + out->len, out->alloc - out->len,
  191. info, name, namelen);
  192. strbuf_setlen(out, out->len + len);
  193. }
  194. struct tree_desc_skip {
  195. struct tree_desc_skip *prev;
  196. const void *ptr;
  197. };
  198. struct tree_desc_x {
  199. struct tree_desc d;
  200. struct tree_desc_skip *skip;
  201. };
  202. static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
  203. {
  204. /*
  205. * The caller wants to pick *a* from a tree or nothing.
  206. * We are looking at *b* in a tree.
  207. *
  208. * (0) If a and b are the same name, we are trivially happy.
  209. *
  210. * There are three possibilities where *a* could be hiding
  211. * behind *b*.
  212. *
  213. * (1) *a* == "t", *b* == "ab" i.e. *b* sorts earlier than *a* no
  214. * matter what.
  215. * (2) *a* == "t", *b* == "t-2" and "t" is a subtree in the tree;
  216. * (3) *a* == "t-2", *b* == "t" and "t-2" is a blob in the tree.
  217. *
  218. * Otherwise we know *a* won't appear in the tree without
  219. * scanning further.
  220. */
  221. int cmp = name_compare(a, a_len, b, b_len);
  222. /* Most common case first -- reading sync'd trees */
  223. if (!cmp)
  224. return cmp;
  225. if (0 < cmp) {
  226. /* a comes after b; it does not matter if it is case (3)
  227. if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
  228. return 1;
  229. */
  230. return 1; /* keep looking */
  231. }
  232. /* b comes after a; are we looking at case (2)? */
  233. if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
  234. return 1; /* keep looking */
  235. return -1; /* a cannot appear in the tree */
  236. }
  237. /*
  238. * From the extended tree_desc, extract the first name entry, while
  239. * paying attention to the candidate "first" name. Most importantly,
  240. * when looking for an entry, if there are entries that sorts earlier
  241. * in the tree object representation than that name, skip them and
  242. * process the named entry first. We will remember that we haven't
  243. * processed the first entry yet, and in the later call skip the
  244. * entry we processed early when update_extended_entry() is called.
  245. *
  246. * E.g. if the underlying tree object has these entries:
  247. *
  248. * blob "t-1"
  249. * blob "t-2"
  250. * tree "t"
  251. * blob "t=1"
  252. *
  253. * and the "first" asks for "t", remember that we still need to
  254. * process "t-1" and "t-2" but extract "t". After processing the
  255. * entry "t" from this call, the caller will let us know by calling
  256. * update_extended_entry() that we can remember "t" has been processed
  257. * already.
  258. */
  259. static void extended_entry_extract(struct tree_desc_x *t,
  260. struct name_entry *a,
  261. const char *first,
  262. int first_len)
  263. {
  264. const char *path;
  265. int len;
  266. struct tree_desc probe;
  267. struct tree_desc_skip *skip;
  268. /*
  269. * Extract the first entry from the tree_desc, but skip the
  270. * ones that we already returned in earlier rounds.
  271. */
  272. while (1) {
  273. if (!t->d.size) {
  274. entry_clear(a);
  275. break; /* not found */
  276. }
  277. entry_extract(&t->d, a);
  278. for (skip = t->skip; skip; skip = skip->prev)
  279. if (a->path == skip->ptr)
  280. break; /* found */
  281. if (!skip)
  282. break;
  283. /* We have processed this entry already. */
  284. update_tree_entry(&t->d);
  285. }
  286. if (!first || !a->path)
  287. return;
  288. /*
  289. * The caller wants "first" from this tree, or nothing.
  290. */
  291. path = a->path;
  292. len = tree_entry_len(a);
  293. switch (check_entry_match(first, first_len, path, len)) {
  294. case -1:
  295. entry_clear(a);
  296. case 0:
  297. return;
  298. default:
  299. break;
  300. }
  301. /*
  302. * We need to look-ahead -- we suspect that a subtree whose
  303. * name is "first" may be hiding behind the current entry "path".
  304. */
  305. probe = t->d;
  306. while (probe.size) {
  307. entry_extract(&probe, a);
  308. path = a->path;
  309. len = tree_entry_len(a);
  310. switch (check_entry_match(first, first_len, path, len)) {
  311. case -1:
  312. entry_clear(a);
  313. case 0:
  314. return;
  315. default:
  316. update_tree_entry(&probe);
  317. break;
  318. }
  319. /* keep looking */
  320. }
  321. entry_clear(a);
  322. }
  323. static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
  324. {
  325. if (t->d.entry.path == a->path) {
  326. update_tree_entry(&t->d);
  327. } else {
  328. /* we have returned this entry early */
  329. struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
  330. skip->ptr = a->path;
  331. skip->prev = t->skip;
  332. t->skip = skip;
  333. }
  334. }
  335. static void free_extended_entry(struct tree_desc_x *t)
  336. {
  337. struct tree_desc_skip *p, *s;
  338. for (s = t->skip; s; s = p) {
  339. p = s->prev;
  340. free(s);
  341. }
  342. }
  343. static inline int prune_traversal(struct index_state *istate,
  344. struct name_entry *e,
  345. struct traverse_info *info,
  346. struct strbuf *base,
  347. int still_interesting)
  348. {
  349. if (!info->pathspec || still_interesting == 2)
  350. return 2;
  351. if (still_interesting < 0)
  352. return still_interesting;
  353. return tree_entry_interesting(istate, e, base,
  354. 0, info->pathspec);
  355. }
  356. int traverse_trees(struct index_state *istate,
  357. int n, struct tree_desc *t,
  358. struct traverse_info *info)
  359. {
  360. int error = 0;
  361. struct name_entry entry[MAX_TRAVERSE_TREES];
  362. int i;
  363. struct tree_desc_x tx[ARRAY_SIZE(entry)];
  364. struct strbuf base = STRBUF_INIT;
  365. int interesting = 1;
  366. char *traverse_path;
  367. if (n >= ARRAY_SIZE(entry))
  368. BUG("traverse_trees() called with too many trees (%d)", n);
  369. for (i = 0; i < n; i++) {
  370. tx[i].d = t[i];
  371. tx[i].skip = NULL;
  372. }
  373. if (info->prev) {
  374. strbuf_make_traverse_path(&base, info->prev,
  375. info->name, info->namelen);
  376. strbuf_addch(&base, '/');
  377. traverse_path = xstrndup(base.buf, base.len);
  378. } else {
  379. traverse_path = xstrndup(info->name, info->pathlen);
  380. }
  381. info->traverse_path = traverse_path;
  382. for (;;) {
  383. int trees_used;
  384. unsigned long mask, dirmask;
  385. const char *first = NULL;
  386. int first_len = 0;
  387. struct name_entry *e = NULL;
  388. int len;
  389. for (i = 0; i < n; i++) {
  390. e = entry + i;
  391. extended_entry_extract(tx + i, e, NULL, 0);
  392. }
  393. /*
  394. * A tree may have "t-2" at the current location even
  395. * though it may have "t" that is a subtree behind it,
  396. * and another tree may return "t". We want to grab
  397. * all "t" from all trees to match in such a case.
  398. */
  399. for (i = 0; i < n; i++) {
  400. e = entry + i;
  401. if (!e->path)
  402. continue;
  403. len = tree_entry_len(e);
  404. if (!first) {
  405. first = e->path;
  406. first_len = len;
  407. continue;
  408. }
  409. if (name_compare(e->path, len, first, first_len) < 0) {
  410. first = e->path;
  411. first_len = len;
  412. }
  413. }
  414. if (first) {
  415. for (i = 0; i < n; i++) {
  416. e = entry + i;
  417. extended_entry_extract(tx + i, e, first, first_len);
  418. /* Cull the ones that are not the earliest */
  419. if (!e->path)
  420. continue;
  421. len = tree_entry_len(e);
  422. if (name_compare(e->path, len, first, first_len))
  423. entry_clear(e);
  424. }
  425. }
  426. /* Now we have in entry[i] the earliest name from the trees */
  427. mask = 0;
  428. dirmask = 0;
  429. for (i = 0; i < n; i++) {
  430. if (!entry[i].path)
  431. continue;
  432. mask |= 1ul << i;
  433. if (S_ISDIR(entry[i].mode))
  434. dirmask |= 1ul << i;
  435. e = &entry[i];
  436. }
  437. if (!mask)
  438. break;
  439. interesting = prune_traversal(istate, e, info, &base, interesting);
  440. if (interesting < 0)
  441. break;
  442. if (interesting) {
  443. trees_used = info->fn(n, mask, dirmask, entry, info);
  444. if (trees_used < 0) {
  445. error = trees_used;
  446. if (!info->show_all_errors)
  447. break;
  448. }
  449. mask &= trees_used;
  450. }
  451. for (i = 0; i < n; i++)
  452. if (mask & (1ul << i))
  453. update_extended_entry(tx + i, entry + i);
  454. }
  455. for (i = 0; i < n; i++)
  456. free_extended_entry(tx + i);
  457. free(traverse_path);
  458. info->traverse_path = NULL;
  459. strbuf_release(&base);
  460. return error;
  461. }
  462. struct dir_state {
  463. void *tree;
  464. unsigned long size;
  465. struct object_id oid;
  466. };
  467. static int find_tree_entry(struct repository *r, struct tree_desc *t,
  468. const char *name, struct object_id *result,
  469. unsigned short *mode)
  470. {
  471. int namelen = strlen(name);
  472. while (t->size) {
  473. const char *entry;
  474. struct object_id oid;
  475. int entrylen, cmp;
  476. oidcpy(&oid, tree_entry_extract(t, &entry, mode));
  477. entrylen = tree_entry_len(&t->entry);
  478. update_tree_entry(t);
  479. if (entrylen > namelen)
  480. continue;
  481. cmp = memcmp(name, entry, entrylen);
  482. if (cmp > 0)
  483. continue;
  484. if (cmp < 0)
  485. break;
  486. if (entrylen == namelen) {
  487. oidcpy(result, &oid);
  488. return 0;
  489. }
  490. if (name[entrylen] != '/')
  491. continue;
  492. if (!S_ISDIR(*mode))
  493. break;
  494. if (++entrylen == namelen) {
  495. oidcpy(result, &oid);
  496. return 0;
  497. }
  498. return get_tree_entry(r, &oid, name + entrylen, result, mode);
  499. }
  500. return -1;
  501. }
  502. int get_tree_entry(struct repository *r,
  503. const struct object_id *tree_oid,
  504. const char *name,
  505. struct object_id *oid,
  506. unsigned short *mode)
  507. {
  508. int retval;
  509. void *tree;
  510. unsigned long size;
  511. struct object_id root;
  512. tree = read_object_with_reference(r, tree_oid, tree_type, &size, &root);
  513. if (!tree)
  514. return -1;
  515. if (name[0] == '\0') {
  516. oidcpy(oid, &root);
  517. free(tree);
  518. return 0;
  519. }
  520. if (!size) {
  521. retval = -1;
  522. } else {
  523. struct tree_desc t;
  524. init_tree_desc(&t, tree, size);
  525. retval = find_tree_entry(r, &t, name, oid, mode);
  526. }
  527. free(tree);
  528. return retval;
  529. }
  530. /*
  531. * This is Linux's built-in max for the number of symlinks to follow.
  532. * That limit, of course, does not affect git, but it's a reasonable
  533. * choice.
  534. */
  535. #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
  536. /**
  537. * Find a tree entry by following symlinks in tree_sha (which is
  538. * assumed to be the root of the repository). In the event that a
  539. * symlink points outside the repository (e.g. a link to /foo or a
  540. * root-level link to ../foo), the portion of the link which is
  541. * outside the repository will be returned in result_path, and *mode
  542. * will be set to 0. It is assumed that result_path is uninitialized.
  543. * If there are no symlinks, or the end result of the symlink chain
  544. * points to an object inside the repository, result will be filled in
  545. * with the sha1 of the found object, and *mode will hold the mode of
  546. * the object.
  547. *
  548. * See the code for enum get_oid_result for a description of
  549. * the return values.
  550. */
  551. enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
  552. struct object_id *tree_oid, const char *name,
  553. struct object_id *result, struct strbuf *result_path,
  554. unsigned short *mode)
  555. {
  556. int retval = MISSING_OBJECT;
  557. struct dir_state *parents = NULL;
  558. size_t parents_alloc = 0;
  559. size_t i, parents_nr = 0;
  560. struct object_id current_tree_oid;
  561. struct strbuf namebuf = STRBUF_INIT;
  562. struct tree_desc t;
  563. int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
  564. init_tree_desc(&t, NULL, 0UL);
  565. strbuf_addstr(&namebuf, name);
  566. oidcpy(&current_tree_oid, tree_oid);
  567. while (1) {
  568. int find_result;
  569. char *first_slash;
  570. char *remainder = NULL;
  571. if (!t.buffer) {
  572. void *tree;
  573. struct object_id root;
  574. unsigned long size;
  575. tree = read_object_with_reference(r,
  576. &current_tree_oid,
  577. tree_type, &size,
  578. &root);
  579. if (!tree)
  580. goto done;
  581. ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
  582. parents[parents_nr].tree = tree;
  583. parents[parents_nr].size = size;
  584. oidcpy(&parents[parents_nr].oid, &root);
  585. parents_nr++;
  586. if (namebuf.buf[0] == '\0') {
  587. oidcpy(result, &root);
  588. retval = FOUND;
  589. goto done;
  590. }
  591. if (!size)
  592. goto done;
  593. /* descend */
  594. init_tree_desc(&t, tree, size);
  595. }
  596. /* Handle symlinks to e.g. a//b by removing leading slashes */
  597. while (namebuf.buf[0] == '/') {
  598. strbuf_remove(&namebuf, 0, 1);
  599. }
  600. /* Split namebuf into a first component and a remainder */
  601. if ((first_slash = strchr(namebuf.buf, '/'))) {
  602. *first_slash = 0;
  603. remainder = first_slash + 1;
  604. }
  605. if (!strcmp(namebuf.buf, "..")) {
  606. struct dir_state *parent;
  607. /*
  608. * We could end up with .. in the namebuf if it
  609. * appears in a symlink.
  610. */
  611. if (parents_nr == 1) {
  612. if (remainder)
  613. *first_slash = '/';
  614. strbuf_add(result_path, namebuf.buf,
  615. namebuf.len);
  616. *mode = 0;
  617. retval = FOUND;
  618. goto done;
  619. }
  620. parent = &parents[parents_nr - 1];
  621. free(parent->tree);
  622. parents_nr--;
  623. parent = &parents[parents_nr - 1];
  624. init_tree_desc(&t, parent->tree, parent->size);
  625. strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
  626. continue;
  627. }
  628. /* We could end up here via a symlink to dir/.. */
  629. if (namebuf.buf[0] == '\0') {
  630. oidcpy(result, &parents[parents_nr - 1].oid);
  631. retval = FOUND;
  632. goto done;
  633. }
  634. /* Look up the first (or only) path component in the tree. */
  635. find_result = find_tree_entry(r, &t, namebuf.buf,
  636. &current_tree_oid, mode);
  637. if (find_result) {
  638. goto done;
  639. }
  640. if (S_ISDIR(*mode)) {
  641. if (!remainder) {
  642. oidcpy(result, &current_tree_oid);
  643. retval = FOUND;
  644. goto done;
  645. }
  646. /* Descend the tree */
  647. t.buffer = NULL;
  648. strbuf_remove(&namebuf, 0,
  649. 1 + first_slash - namebuf.buf);
  650. } else if (S_ISREG(*mode)) {
  651. if (!remainder) {
  652. oidcpy(result, &current_tree_oid);
  653. retval = FOUND;
  654. } else {
  655. retval = NOT_DIR;
  656. }
  657. goto done;
  658. } else if (S_ISLNK(*mode)) {
  659. /* Follow a symlink */
  660. unsigned long link_len;
  661. size_t len;
  662. char *contents, *contents_start;
  663. struct dir_state *parent;
  664. enum object_type type;
  665. if (follows_remaining-- == 0) {
  666. /* Too many symlinks followed */
  667. retval = SYMLINK_LOOP;
  668. goto done;
  669. }
  670. /*
  671. * At this point, we have followed at a least
  672. * one symlink, so on error we need to report this.
  673. */
  674. retval = DANGLING_SYMLINK;
  675. contents = repo_read_object_file(r,
  676. &current_tree_oid, &type,
  677. &link_len);
  678. if (!contents)
  679. goto done;
  680. if (contents[0] == '/') {
  681. strbuf_addstr(result_path, contents);
  682. free(contents);
  683. *mode = 0;
  684. retval = FOUND;
  685. goto done;
  686. }
  687. if (remainder)
  688. len = first_slash - namebuf.buf;
  689. else
  690. len = namebuf.len;
  691. contents_start = contents;
  692. parent = &parents[parents_nr - 1];
  693. init_tree_desc(&t, parent->tree, parent->size);
  694. strbuf_splice(&namebuf, 0, len,
  695. contents_start, link_len);
  696. if (remainder)
  697. namebuf.buf[link_len] = '/';
  698. free(contents);
  699. }
  700. }
  701. done:
  702. for (i = 0; i < parents_nr; i++)
  703. free(parents[i].tree);
  704. free(parents);
  705. strbuf_release(&namebuf);
  706. return retval;
  707. }
  708. static int match_entry(const struct pathspec_item *item,
  709. const struct name_entry *entry, int pathlen,
  710. const char *match, int matchlen,
  711. enum interesting *never_interesting)
  712. {
  713. int m = -1; /* signals that we haven't called strncmp() */
  714. if (item->magic & PATHSPEC_ICASE)
  715. /*
  716. * "Never interesting" trick requires exact
  717. * matching. We could do something clever with inexact
  718. * matching, but it's trickier (and not to forget that
  719. * strcasecmp is locale-dependent, at least in
  720. * glibc). Just disable it for now. It can't be worse
  721. * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
  722. * pattern.
  723. */
  724. *never_interesting = entry_not_interesting;
  725. else if (*never_interesting != entry_not_interesting) {
  726. /*
  727. * We have not seen any match that sorts later
  728. * than the current path.
  729. */
  730. /*
  731. * Does match sort strictly earlier than path
  732. * with their common parts?
  733. */
  734. m = strncmp(match, entry->path,
  735. (matchlen < pathlen) ? matchlen : pathlen);
  736. if (m < 0)
  737. return 0;
  738. /*
  739. * If we come here even once, that means there is at
  740. * least one pathspec that would sort equal to or
  741. * later than the path we are currently looking at.
  742. * In other words, if we have never reached this point
  743. * after iterating all pathspecs, it means all
  744. * pathspecs are either outside of base, or inside the
  745. * base but sorts strictly earlier than the current
  746. * one. In either case, they will never match the
  747. * subsequent entries. In such a case, we initialized
  748. * the variable to -1 and that is what will be
  749. * returned, allowing the caller to terminate early.
  750. */
  751. *never_interesting = entry_not_interesting;
  752. }
  753. if (pathlen > matchlen)
  754. return 0;
  755. if (matchlen > pathlen) {
  756. if (match[pathlen] != '/')
  757. return 0;
  758. /*
  759. * Reject non-directories as partial pathnames, except
  760. * when match is a submodule with a trailing slash and
  761. * nothing else (to handle 'submod/' and 'submod'
  762. * uniformly).
  763. */
  764. if (!S_ISDIR(entry->mode) &&
  765. (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
  766. return 0;
  767. }
  768. if (m == -1)
  769. /*
  770. * we cheated and did not do strncmp(), so we do
  771. * that here.
  772. */
  773. m = ps_strncmp(item, match, entry->path, pathlen);
  774. /*
  775. * If common part matched earlier then it is a hit,
  776. * because we rejected the case where path is not a
  777. * leading directory and is shorter than match.
  778. */
  779. if (!m)
  780. /*
  781. * match_entry does not check if the prefix part is
  782. * matched case-sensitively. If the entry is a
  783. * directory and part of prefix, it'll be rematched
  784. * eventually by basecmp with special treatment for
  785. * the prefix.
  786. */
  787. return 1;
  788. return 0;
  789. }
  790. /* :(icase)-aware string compare */
  791. static int basecmp(const struct pathspec_item *item,
  792. const char *base, const char *match, int len)
  793. {
  794. if (item->magic & PATHSPEC_ICASE) {
  795. int ret, n = len > item->prefix ? item->prefix : len;
  796. ret = strncmp(base, match, n);
  797. if (ret)
  798. return ret;
  799. base += n;
  800. match += n;
  801. len -= n;
  802. }
  803. return ps_strncmp(item, base, match, len);
  804. }
  805. static int match_dir_prefix(const struct pathspec_item *item,
  806. const char *base,
  807. const char *match, int matchlen)
  808. {
  809. if (basecmp(item, base, match, matchlen))
  810. return 0;
  811. /*
  812. * If the base is a subdirectory of a path which
  813. * was specified, all of them are interesting.
  814. */
  815. if (!matchlen ||
  816. base[matchlen] == '/' ||
  817. match[matchlen - 1] == '/')
  818. return 1;
  819. /* Just a random prefix match */
  820. return 0;
  821. }
  822. /*
  823. * Perform matching on the leading non-wildcard part of
  824. * pathspec. item->nowildcard_len must be greater than zero. Return
  825. * non-zero if base is matched.
  826. */
  827. static int match_wildcard_base(const struct pathspec_item *item,
  828. const char *base, int baselen,
  829. int *matched)
  830. {
  831. const char *match = item->match;
  832. /* the wildcard part is not considered in this function */
  833. int matchlen = item->nowildcard_len;
  834. if (baselen) {
  835. int dirlen;
  836. /*
  837. * Return early if base is longer than the
  838. * non-wildcard part but it does not match.
  839. */
  840. if (baselen >= matchlen) {
  841. *matched = matchlen;
  842. return !basecmp(item, base, match, matchlen);
  843. }
  844. dirlen = matchlen;
  845. while (dirlen && match[dirlen - 1] != '/')
  846. dirlen--;
  847. /*
  848. * Return early if base is shorter than the
  849. * non-wildcard part but it does not match. Note that
  850. * base ends with '/' so we are sure it really matches
  851. * directory
  852. */
  853. if (basecmp(item, base, match, baselen))
  854. return 0;
  855. *matched = baselen;
  856. } else
  857. *matched = 0;
  858. /*
  859. * we could have checked entry against the non-wildcard part
  860. * that is not in base and does similar never_interesting
  861. * optimization as in match_entry. For now just be happy with
  862. * base comparison.
  863. */
  864. return entry_interesting;
  865. }
  866. /*
  867. * Is a tree entry interesting given the pathspec we have?
  868. *
  869. * Pre-condition: either baselen == base_offset (i.e. empty path)
  870. * or base[baselen-1] == '/' (i.e. with trailing slash).
  871. */
  872. static enum interesting do_match(struct index_state *istate,
  873. const struct name_entry *entry,
  874. struct strbuf *base, int base_offset,
  875. const struct pathspec *ps,
  876. int exclude)
  877. {
  878. int i;
  879. int pathlen, baselen = base->len - base_offset;
  880. enum interesting never_interesting = ps->has_wildcard ?
  881. entry_not_interesting : all_entries_not_interesting;
  882. GUARD_PATHSPEC(ps,
  883. PATHSPEC_FROMTOP |
  884. PATHSPEC_MAXDEPTH |
  885. PATHSPEC_LITERAL |
  886. PATHSPEC_GLOB |
  887. PATHSPEC_ICASE |
  888. PATHSPEC_EXCLUDE |
  889. PATHSPEC_ATTR);
  890. if (!ps->nr) {
  891. if (!ps->recursive ||
  892. !(ps->magic & PATHSPEC_MAXDEPTH) ||
  893. ps->max_depth == -1)
  894. return all_entries_interesting;
  895. return within_depth(base->buf + base_offset, baselen,
  896. !!S_ISDIR(entry->mode),
  897. ps->max_depth) ?
  898. entry_interesting : entry_not_interesting;
  899. }
  900. pathlen = tree_entry_len(entry);
  901. for (i = ps->nr - 1; i >= 0; i--) {
  902. const struct pathspec_item *item = ps->items+i;
  903. const char *match = item->match;
  904. const char *base_str = base->buf + base_offset;
  905. int matchlen = item->len, matched = 0;
  906. if ((!exclude && item->magic & PATHSPEC_EXCLUDE) ||
  907. ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
  908. continue;
  909. if (baselen >= matchlen) {
  910. /* If it doesn't match, move along... */
  911. if (!match_dir_prefix(item, base_str, match, matchlen))
  912. goto match_wildcards;
  913. if (!ps->recursive ||
  914. !(ps->magic & PATHSPEC_MAXDEPTH) ||
  915. ps->max_depth == -1) {
  916. if (!item->attr_match_nr)
  917. return all_entries_interesting;
  918. else
  919. goto interesting;
  920. }
  921. if (within_depth(base_str + matchlen + 1,
  922. baselen - matchlen - 1,
  923. !!S_ISDIR(entry->mode),
  924. ps->max_depth))
  925. goto interesting;
  926. else
  927. return entry_not_interesting;
  928. }
  929. /* Either there must be no base, or the base must match. */
  930. if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
  931. if (match_entry(item, entry, pathlen,
  932. match + baselen, matchlen - baselen,
  933. &never_interesting))
  934. goto interesting;
  935. if (item->nowildcard_len < item->len) {
  936. if (!git_fnmatch(item, match + baselen, entry->path,
  937. item->nowildcard_len - baselen))
  938. goto interesting;
  939. /*
  940. * Match all directories. We'll try to
  941. * match files later on.
  942. */
  943. if (ps->recursive && S_ISDIR(entry->mode))
  944. return entry_interesting;
  945. /*
  946. * When matching against submodules with
  947. * wildcard characters, ensure that the entry
  948. * at least matches up to the first wild
  949. * character. More accurate matching can then
  950. * be performed in the submodule itself.
  951. */
  952. if (ps->recurse_submodules &&
  953. S_ISGITLINK(entry->mode) &&
  954. !ps_strncmp(item, match + baselen,
  955. entry->path,
  956. item->nowildcard_len - baselen))
  957. goto interesting;
  958. }
  959. continue;
  960. }
  961. match_wildcards:
  962. if (item->nowildcard_len == item->len)
  963. continue;
  964. if (item->nowildcard_len &&
  965. !match_wildcard_base(item, base_str, baselen, &matched))
  966. continue;
  967. /*
  968. * Concatenate base and entry->path into one and do
  969. * fnmatch() on it.
  970. *
  971. * While we could avoid concatenation in certain cases
  972. * [1], which saves a memcpy and potentially a
  973. * realloc, it turns out not worth it. Measurement on
  974. * linux-2.6 does not show any clear improvements,
  975. * partly because of the nowildcard_len optimization
  976. * in git_fnmatch(). Avoid micro-optimizations here.
  977. *
  978. * [1] if match_wildcard_base() says the base
  979. * directory is already matched, we only need to match
  980. * the rest, which is shorter so _in theory_ faster.
  981. */
  982. strbuf_add(base, entry->path, pathlen);
  983. if (!git_fnmatch(item, match, base->buf + base_offset,
  984. item->nowildcard_len)) {
  985. strbuf_setlen(base, base_offset + baselen);
  986. goto interesting;
  987. }
  988. /*
  989. * When matching against submodules with
  990. * wildcard characters, ensure that the entry
  991. * at least matches up to the first wild
  992. * character. More accurate matching can then
  993. * be performed in the submodule itself.
  994. */
  995. if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
  996. !ps_strncmp(item, match, base->buf + base_offset,
  997. item->nowildcard_len)) {
  998. strbuf_setlen(base, base_offset + baselen);
  999. goto interesting;
  1000. }
  1001. strbuf_setlen(base, base_offset + baselen);
  1002. /*
  1003. * Match all directories. We'll try to match files
  1004. * later on.
  1005. * max_depth is ignored but we may consider support it
  1006. * in future, see
  1007. * https://lore.kernel.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
  1008. */
  1009. if (ps->recursive && S_ISDIR(entry->mode))
  1010. return entry_interesting;
  1011. continue;
  1012. interesting:
  1013. if (item->attr_match_nr) {
  1014. int ret;
  1015. /*
  1016. * Must not return all_entries_not_interesting
  1017. * prematurely. We do not know if all entries do not
  1018. * match some attributes with current attr API.
  1019. */
  1020. never_interesting = entry_not_interesting;
  1021. /*
  1022. * Consider all directories interesting (because some
  1023. * of those files inside may match some attributes
  1024. * even though the parent dir does not)
  1025. *
  1026. * FIXME: attributes _can_ match directories and we
  1027. * can probably return all_entries_interesting or
  1028. * all_entries_not_interesting here if matched.
  1029. */
  1030. if (S_ISDIR(entry->mode))
  1031. return entry_interesting;
  1032. strbuf_add(base, entry->path, pathlen);
  1033. ret = match_pathspec_attrs(istate, base->buf + base_offset,
  1034. base->len - base_offset, item);
  1035. strbuf_setlen(base, base_offset + baselen);
  1036. if (!ret)
  1037. continue;
  1038. }
  1039. return entry_interesting;
  1040. }
  1041. return never_interesting; /* No matches */
  1042. }
  1043. /*
  1044. * Is a tree entry interesting given the pathspec we have?
  1045. *
  1046. * Pre-condition: either baselen == base_offset (i.e. empty path)
  1047. * or base[baselen-1] == '/' (i.e. with trailing slash).
  1048. */
  1049. enum interesting tree_entry_interesting(struct index_state *istate,
  1050. const struct name_entry *entry,
  1051. struct strbuf *base, int base_offset,
  1052. const struct pathspec *ps)
  1053. {
  1054. enum interesting positive, negative;
  1055. positive = do_match(istate, entry, base, base_offset, ps, 0);
  1056. /*
  1057. * case | entry | positive | negative | result
  1058. * -----+-------+----------+----------+-------
  1059. * 1 | file | -1 | -1..2 | -1
  1060. * 2 | file | 0 | -1..2 | 0
  1061. * 3 | file | 1 | -1 | 1
  1062. * 4 | file | 1 | 0 | 1
  1063. * 5 | file | 1 | 1 | 0
  1064. * 6 | file | 1 | 2 | 0
  1065. * 7 | file | 2 | -1 | 2
  1066. * 8 | file | 2 | 0 | 1
  1067. * 9 | file | 2 | 1 | 0
  1068. * 10 | file | 2 | 2 | -1
  1069. * -----+-------+----------+----------+-------
  1070. * 11 | dir | -1 | -1..2 | -1
  1071. * 12 | dir | 0 | -1..2 | 0
  1072. * 13 | dir | 1 | -1 | 1
  1073. * 14 | dir | 1 | 0 | 1
  1074. * 15 | dir | 1 | 1 | 1 (*)
  1075. * 16 | dir | 1 | 2 | 0
  1076. * 17 | dir | 2 | -1 | 2
  1077. * 18 | dir | 2 | 0 | 1
  1078. * 19 | dir | 2 | 1 | 1 (*)
  1079. * 20 | dir | 2 | 2 | -1
  1080. *
  1081. * (*) An exclude pattern interested in a directory does not
  1082. * necessarily mean it will exclude all of the directory. In
  1083. * wildcard case, it can't decide until looking at individual
  1084. * files inside. So don't write such directories off yet.
  1085. */
  1086. if (!(ps->magic & PATHSPEC_EXCLUDE) ||
  1087. positive <= entry_not_interesting) /* #1, #2, #11, #12 */
  1088. return positive;
  1089. negative = do_match(istate, entry, base, base_offset, ps, 1);
  1090. /* #8, #18 */
  1091. if (positive == all_entries_interesting &&
  1092. negative == entry_not_interesting)
  1093. return entry_interesting;
  1094. /* #3, #4, #7, #13, #14, #17 */
  1095. if (negative <= entry_not_interesting)
  1096. return positive;
  1097. /* #15, #19 */
  1098. if (S_ISDIR(entry->mode) &&
  1099. positive >= entry_interesting &&
  1100. negative == entry_interesting)
  1101. return entry_interesting;
  1102. if ((positive == entry_interesting &&
  1103. negative >= entry_interesting) || /* #5, #6, #16 */
  1104. (positive == all_entries_interesting &&
  1105. negative == entry_interesting)) /* #9 */
  1106. return entry_not_interesting;
  1107. return all_entries_not_interesting; /* #10, #20 */
  1108. }