http-walker.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. #include "cache.h"
  2. #include "repository.h"
  3. #include "commit.h"
  4. #include "walker.h"
  5. #include "http.h"
  6. #include "list.h"
  7. #include "transport.h"
  8. #include "packfile.h"
  9. #include "object-store.h"
  10. struct alt_base {
  11. char *base;
  12. int got_indices;
  13. struct packed_git *packs;
  14. struct alt_base *next;
  15. };
  16. enum object_request_state {
  17. WAITING,
  18. ABORTED,
  19. ACTIVE,
  20. COMPLETE
  21. };
  22. struct object_request {
  23. struct walker *walker;
  24. struct object_id oid;
  25. struct alt_base *repo;
  26. enum object_request_state state;
  27. struct http_object_request *req;
  28. struct list_head node;
  29. };
  30. struct alternates_request {
  31. struct walker *walker;
  32. const char *base;
  33. struct strbuf *url;
  34. struct strbuf *buffer;
  35. struct active_request_slot *slot;
  36. int http_specific;
  37. };
  38. struct walker_data {
  39. const char *url;
  40. int got_alternates;
  41. struct alt_base *alt;
  42. };
  43. static LIST_HEAD(object_queue_head);
  44. static void fetch_alternates(struct walker *walker, const char *base);
  45. static void process_object_response(void *callback_data);
  46. static void start_object_request(struct walker *walker,
  47. struct object_request *obj_req)
  48. {
  49. struct active_request_slot *slot;
  50. struct http_object_request *req;
  51. req = new_http_object_request(obj_req->repo->base, &obj_req->oid);
  52. if (req == NULL) {
  53. obj_req->state = ABORTED;
  54. return;
  55. }
  56. obj_req->req = req;
  57. slot = req->slot;
  58. slot->callback_func = process_object_response;
  59. slot->callback_data = obj_req;
  60. /* Try to get the request started, abort the request on error */
  61. obj_req->state = ACTIVE;
  62. if (!start_active_slot(slot)) {
  63. obj_req->state = ABORTED;
  64. release_http_object_request(req);
  65. return;
  66. }
  67. }
  68. static void finish_object_request(struct object_request *obj_req)
  69. {
  70. if (finish_http_object_request(obj_req->req))
  71. return;
  72. if (obj_req->req->rename == 0)
  73. walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid));
  74. }
  75. static void process_object_response(void *callback_data)
  76. {
  77. struct object_request *obj_req =
  78. (struct object_request *)callback_data;
  79. struct walker *walker = obj_req->walker;
  80. struct walker_data *data = walker->data;
  81. struct alt_base *alt = data->alt;
  82. process_http_object_request(obj_req->req);
  83. obj_req->state = COMPLETE;
  84. normalize_curl_result(&obj_req->req->curl_result,
  85. obj_req->req->http_code,
  86. obj_req->req->errorstr,
  87. sizeof(obj_req->req->errorstr));
  88. /* Use alternates if necessary */
  89. if (missing_target(obj_req->req)) {
  90. fetch_alternates(walker, alt->base);
  91. if (obj_req->repo->next != NULL) {
  92. obj_req->repo =
  93. obj_req->repo->next;
  94. release_http_object_request(obj_req->req);
  95. start_object_request(walker, obj_req);
  96. return;
  97. }
  98. }
  99. finish_object_request(obj_req);
  100. }
  101. static void release_object_request(struct object_request *obj_req)
  102. {
  103. if (obj_req->req !=NULL && obj_req->req->localfile != -1)
  104. error("fd leakage in release: %d", obj_req->req->localfile);
  105. list_del(&obj_req->node);
  106. free(obj_req);
  107. }
  108. #ifdef USE_CURL_MULTI
  109. static int fill_active_slot(struct walker *walker)
  110. {
  111. struct object_request *obj_req;
  112. struct list_head *pos, *tmp, *head = &object_queue_head;
  113. list_for_each_safe(pos, tmp, head) {
  114. obj_req = list_entry(pos, struct object_request, node);
  115. if (obj_req->state == WAITING) {
  116. if (has_object_file(&obj_req->oid))
  117. obj_req->state = COMPLETE;
  118. else {
  119. start_object_request(walker, obj_req);
  120. return 1;
  121. }
  122. }
  123. }
  124. return 0;
  125. }
  126. #endif
  127. static void prefetch(struct walker *walker, unsigned char *sha1)
  128. {
  129. struct object_request *newreq;
  130. struct walker_data *data = walker->data;
  131. newreq = xmalloc(sizeof(*newreq));
  132. newreq->walker = walker;
  133. hashcpy(newreq->oid.hash, sha1);
  134. newreq->repo = data->alt;
  135. newreq->state = WAITING;
  136. newreq->req = NULL;
  137. http_is_verbose = walker->get_verbosely;
  138. list_add_tail(&newreq->node, &object_queue_head);
  139. #ifdef USE_CURL_MULTI
  140. fill_active_slots();
  141. step_active_slots();
  142. #endif
  143. }
  144. static int is_alternate_allowed(const char *url)
  145. {
  146. const char *protocols[] = {
  147. "http", "https", "ftp", "ftps"
  148. };
  149. int i;
  150. if (http_follow_config != HTTP_FOLLOW_ALWAYS) {
  151. warning("alternate disabled by http.followRedirects: %s", url);
  152. return 0;
  153. }
  154. for (i = 0; i < ARRAY_SIZE(protocols); i++) {
  155. const char *end;
  156. if (skip_prefix(url, protocols[i], &end) &&
  157. starts_with(end, "://"))
  158. break;
  159. }
  160. if (i >= ARRAY_SIZE(protocols)) {
  161. warning("ignoring alternate with unknown protocol: %s", url);
  162. return 0;
  163. }
  164. if (!is_transport_allowed(protocols[i], 0)) {
  165. warning("ignoring alternate with restricted protocol: %s", url);
  166. return 0;
  167. }
  168. return 1;
  169. }
  170. static void process_alternates_response(void *callback_data)
  171. {
  172. struct alternates_request *alt_req =
  173. (struct alternates_request *)callback_data;
  174. struct walker *walker = alt_req->walker;
  175. struct walker_data *cdata = walker->data;
  176. struct active_request_slot *slot = alt_req->slot;
  177. struct alt_base *tail = cdata->alt;
  178. const char *base = alt_req->base;
  179. const char null_byte = '\0';
  180. char *data;
  181. int i = 0;
  182. normalize_curl_result(&slot->curl_result, slot->http_code,
  183. curl_errorstr, sizeof(curl_errorstr));
  184. if (alt_req->http_specific) {
  185. if (slot->curl_result != CURLE_OK ||
  186. !alt_req->buffer->len) {
  187. /* Try reusing the slot to get non-http alternates */
  188. alt_req->http_specific = 0;
  189. strbuf_reset(alt_req->url);
  190. strbuf_addf(alt_req->url, "%s/objects/info/alternates",
  191. base);
  192. curl_easy_setopt(slot->curl, CURLOPT_URL,
  193. alt_req->url->buf);
  194. active_requests++;
  195. slot->in_use = 1;
  196. if (slot->finished != NULL)
  197. (*slot->finished) = 0;
  198. if (!start_active_slot(slot)) {
  199. cdata->got_alternates = -1;
  200. slot->in_use = 0;
  201. if (slot->finished != NULL)
  202. (*slot->finished) = 1;
  203. }
  204. return;
  205. }
  206. } else if (slot->curl_result != CURLE_OK) {
  207. if (!missing_target(slot)) {
  208. cdata->got_alternates = -1;
  209. return;
  210. }
  211. }
  212. fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
  213. alt_req->buffer->len--;
  214. data = alt_req->buffer->buf;
  215. while (i < alt_req->buffer->len) {
  216. int posn = i;
  217. while (posn < alt_req->buffer->len && data[posn] != '\n')
  218. posn++;
  219. if (data[posn] == '\n') {
  220. int okay = 0;
  221. int serverlen = 0;
  222. struct alt_base *newalt;
  223. if (data[i] == '/') {
  224. /*
  225. * This counts
  226. * http://git.host/pub/scm/linux.git/
  227. * -----------here^
  228. * so memcpy(dst, base, serverlen) will
  229. * copy up to "...git.host".
  230. */
  231. const char *colon_ss = strstr(base,"://");
  232. if (colon_ss) {
  233. serverlen = (strchr(colon_ss + 3, '/')
  234. - base);
  235. okay = 1;
  236. }
  237. } else if (!memcmp(data + i, "../", 3)) {
  238. /*
  239. * Relative URL; chop the corresponding
  240. * number of subpath from base (and ../
  241. * from data), and concatenate the result.
  242. *
  243. * The code first drops ../ from data, and
  244. * then drops one ../ from data and one path
  245. * from base. IOW, one extra ../ is dropped
  246. * from data than path is dropped from base.
  247. *
  248. * This is not wrong. The alternate in
  249. * http://git.host/pub/scm/linux.git/
  250. * to borrow from
  251. * http://git.host/pub/scm/linus.git/
  252. * is ../../linus.git/objects/. You need
  253. * two ../../ to borrow from your direct
  254. * neighbour.
  255. */
  256. i += 3;
  257. serverlen = strlen(base);
  258. while (i + 2 < posn &&
  259. !memcmp(data + i, "../", 3)) {
  260. do {
  261. serverlen--;
  262. } while (serverlen &&
  263. base[serverlen - 1] != '/');
  264. i += 3;
  265. }
  266. /* If the server got removed, give up. */
  267. okay = strchr(base, ':') - base + 3 <
  268. serverlen;
  269. } else if (alt_req->http_specific) {
  270. char *colon = strchr(data + i, ':');
  271. char *slash = strchr(data + i, '/');
  272. if (colon && slash && colon < data + posn &&
  273. slash < data + posn && colon < slash) {
  274. okay = 1;
  275. }
  276. }
  277. if (okay) {
  278. struct strbuf target = STRBUF_INIT;
  279. strbuf_add(&target, base, serverlen);
  280. strbuf_add(&target, data + i, posn - i);
  281. if (!strbuf_strip_suffix(&target, "objects")) {
  282. warning("ignoring alternate that does"
  283. " not end in 'objects': %s",
  284. target.buf);
  285. strbuf_release(&target);
  286. } else if (is_alternate_allowed(target.buf)) {
  287. warning("adding alternate object store: %s",
  288. target.buf);
  289. newalt = xmalloc(sizeof(*newalt));
  290. newalt->next = NULL;
  291. newalt->base = strbuf_detach(&target, NULL);
  292. newalt->got_indices = 0;
  293. newalt->packs = NULL;
  294. while (tail->next != NULL)
  295. tail = tail->next;
  296. tail->next = newalt;
  297. } else {
  298. strbuf_release(&target);
  299. }
  300. }
  301. }
  302. i = posn + 1;
  303. }
  304. cdata->got_alternates = 1;
  305. }
  306. static void fetch_alternates(struct walker *walker, const char *base)
  307. {
  308. struct strbuf buffer = STRBUF_INIT;
  309. struct strbuf url = STRBUF_INIT;
  310. struct active_request_slot *slot;
  311. struct alternates_request alt_req;
  312. struct walker_data *cdata = walker->data;
  313. /*
  314. * If another request has already started fetching alternates,
  315. * wait for them to arrive and return to processing this request's
  316. * curl message
  317. */
  318. #ifdef USE_CURL_MULTI
  319. while (cdata->got_alternates == 0) {
  320. step_active_slots();
  321. }
  322. #endif
  323. /* Nothing to do if they've already been fetched */
  324. if (cdata->got_alternates == 1)
  325. return;
  326. /* Start the fetch */
  327. cdata->got_alternates = 0;
  328. if (walker->get_verbosely)
  329. fprintf(stderr, "Getting alternates list for %s\n", base);
  330. strbuf_addf(&url, "%s/objects/info/http-alternates", base);
  331. /*
  332. * Use a callback to process the result, since another request
  333. * may fail and need to have alternates loaded before continuing
  334. */
  335. slot = get_active_slot();
  336. slot->callback_func = process_alternates_response;
  337. alt_req.walker = walker;
  338. slot->callback_data = &alt_req;
  339. curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
  340. curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
  341. curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
  342. alt_req.base = base;
  343. alt_req.url = &url;
  344. alt_req.buffer = &buffer;
  345. alt_req.http_specific = 1;
  346. alt_req.slot = slot;
  347. if (start_active_slot(slot))
  348. run_active_slot(slot);
  349. else
  350. cdata->got_alternates = -1;
  351. strbuf_release(&buffer);
  352. strbuf_release(&url);
  353. }
  354. static int fetch_indices(struct walker *walker, struct alt_base *repo)
  355. {
  356. int ret;
  357. if (repo->got_indices)
  358. return 0;
  359. if (walker->get_verbosely)
  360. fprintf(stderr, "Getting pack list for %s\n", repo->base);
  361. switch (http_get_info_packs(repo->base, &repo->packs)) {
  362. case HTTP_OK:
  363. case HTTP_MISSING_TARGET:
  364. repo->got_indices = 1;
  365. ret = 0;
  366. break;
  367. default:
  368. repo->got_indices = 0;
  369. ret = -1;
  370. }
  371. return ret;
  372. }
  373. static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
  374. {
  375. struct packed_git *target;
  376. int ret;
  377. struct slot_results results;
  378. struct http_pack_request *preq;
  379. if (fetch_indices(walker, repo))
  380. return -1;
  381. target = find_sha1_pack(sha1, repo->packs);
  382. if (!target)
  383. return -1;
  384. close_pack_index(target);
  385. if (walker->get_verbosely) {
  386. fprintf(stderr, "Getting pack %s\n",
  387. hash_to_hex(target->hash));
  388. fprintf(stderr, " which contains %s\n",
  389. hash_to_hex(sha1));
  390. }
  391. preq = new_http_pack_request(target->hash, repo->base);
  392. if (preq == NULL)
  393. goto abort;
  394. preq->slot->results = &results;
  395. if (start_active_slot(preq->slot)) {
  396. run_active_slot(preq->slot);
  397. if (results.curl_result != CURLE_OK) {
  398. error("Unable to get pack file %s\n%s", preq->url,
  399. curl_errorstr);
  400. goto abort;
  401. }
  402. } else {
  403. error("Unable to start request");
  404. goto abort;
  405. }
  406. ret = finish_http_pack_request(preq);
  407. release_http_pack_request(preq);
  408. if (ret)
  409. return ret;
  410. http_install_packfile(target, &repo->packs);
  411. return 0;
  412. abort:
  413. return -1;
  414. }
  415. static void abort_object_request(struct object_request *obj_req)
  416. {
  417. release_object_request(obj_req);
  418. }
  419. static int fetch_object(struct walker *walker, unsigned char *hash)
  420. {
  421. char *hex = hash_to_hex(hash);
  422. int ret = 0;
  423. struct object_request *obj_req = NULL;
  424. struct http_object_request *req;
  425. struct list_head *pos, *head = &object_queue_head;
  426. list_for_each(pos, head) {
  427. obj_req = list_entry(pos, struct object_request, node);
  428. if (hasheq(obj_req->oid.hash, hash))
  429. break;
  430. }
  431. if (obj_req == NULL)
  432. return error("Couldn't find request for %s in the queue", hex);
  433. if (has_object_file(&obj_req->oid)) {
  434. if (obj_req->req != NULL)
  435. abort_http_object_request(obj_req->req);
  436. abort_object_request(obj_req);
  437. return 0;
  438. }
  439. #ifdef USE_CURL_MULTI
  440. while (obj_req->state == WAITING)
  441. step_active_slots();
  442. #else
  443. start_object_request(walker, obj_req);
  444. #endif
  445. /*
  446. * obj_req->req might change when fetching alternates in the callback
  447. * process_object_response; therefore, the "shortcut" variable, req,
  448. * is used only after we're done with slots.
  449. */
  450. while (obj_req->state == ACTIVE)
  451. run_active_slot(obj_req->req->slot);
  452. req = obj_req->req;
  453. if (req->localfile != -1) {
  454. close(req->localfile);
  455. req->localfile = -1;
  456. }
  457. normalize_curl_result(&req->curl_result, req->http_code,
  458. req->errorstr, sizeof(req->errorstr));
  459. if (obj_req->state == ABORTED) {
  460. ret = error("Request for %s aborted", hex);
  461. } else if (req->curl_result != CURLE_OK &&
  462. req->http_code != 416) {
  463. if (missing_target(req))
  464. ret = -1; /* Be silent, it is probably in a pack. */
  465. else
  466. ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
  467. req->errorstr, req->curl_result,
  468. req->http_code, hex);
  469. } else if (req->zret != Z_STREAM_END) {
  470. walker->corrupt_object_found++;
  471. ret = error("File %s (%s) corrupt", hex, req->url);
  472. } else if (!oideq(&obj_req->oid, &req->real_oid)) {
  473. ret = error("File %s has bad hash", hex);
  474. } else if (req->rename < 0) {
  475. struct strbuf buf = STRBUF_INIT;
  476. loose_object_path(the_repository, &buf, &req->oid);
  477. ret = error("unable to write sha1 filename %s", buf.buf);
  478. strbuf_release(&buf);
  479. }
  480. release_http_object_request(req);
  481. release_object_request(obj_req);
  482. return ret;
  483. }
  484. static int fetch(struct walker *walker, unsigned char *hash)
  485. {
  486. struct walker_data *data = walker->data;
  487. struct alt_base *altbase = data->alt;
  488. if (!fetch_object(walker, hash))
  489. return 0;
  490. while (altbase) {
  491. if (!http_fetch_pack(walker, altbase, hash))
  492. return 0;
  493. fetch_alternates(walker, data->alt->base);
  494. altbase = altbase->next;
  495. }
  496. return error("Unable to find %s under %s", hash_to_hex(hash),
  497. data->alt->base);
  498. }
  499. static int fetch_ref(struct walker *walker, struct ref *ref)
  500. {
  501. struct walker_data *data = walker->data;
  502. return http_fetch_ref(data->alt->base, ref);
  503. }
  504. static void cleanup(struct walker *walker)
  505. {
  506. struct walker_data *data = walker->data;
  507. struct alt_base *alt, *alt_next;
  508. if (data) {
  509. alt = data->alt;
  510. while (alt) {
  511. alt_next = alt->next;
  512. free(alt->base);
  513. free(alt);
  514. alt = alt_next;
  515. }
  516. free(data);
  517. walker->data = NULL;
  518. }
  519. }
  520. struct walker *get_http_walker(const char *url)
  521. {
  522. char *s;
  523. struct walker_data *data = xmalloc(sizeof(struct walker_data));
  524. struct walker *walker = xmalloc(sizeof(struct walker));
  525. data->alt = xmalloc(sizeof(*data->alt));
  526. data->alt->base = xstrdup(url);
  527. for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
  528. *s = 0;
  529. data->alt->got_indices = 0;
  530. data->alt->packs = NULL;
  531. data->alt->next = NULL;
  532. data->got_alternates = -1;
  533. walker->corrupt_object_found = 0;
  534. walker->fetch = fetch;
  535. walker->fetch_ref = fetch_ref;
  536. walker->prefetch = prefetch;
  537. walker->cleanup = cleanup;
  538. walker->data = data;
  539. #ifdef USE_CURL_MULTI
  540. add_fill_function(walker, (int (*)(void *)) fill_active_slot);
  541. #endif
  542. return walker;
  543. }