byterange_filter.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. /* Licensed to the Apache Software Foundation (ASF) under one or more
  2. * contributor license agreements. See the NOTICE file distributed with
  3. * this work for additional information regarding copyright ownership.
  4. * The ASF licenses this file to You under the Apache License, Version 2.0
  5. * (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * byterange_filter.c --- HTTP byterange filter and friends.
  18. */
  19. #include "apr.h"
  20. #include "apr_strings.h"
  21. #include "apr_buckets.h"
  22. #include "apr_lib.h"
  23. #include "apr_signal.h"
  24. #define APR_WANT_STDIO /* for sscanf */
  25. #define APR_WANT_STRFUNC
  26. #define APR_WANT_MEMFUNC
  27. #include "apr_want.h"
  28. #define CORE_PRIVATE
  29. #include "util_filter.h"
  30. #include "ap_config.h"
  31. #include "httpd.h"
  32. #include "http_config.h"
  33. #include "http_core.h"
  34. #include "http_protocol.h"
  35. #include "http_main.h"
  36. #include "http_request.h"
  37. #include "http_vhost.h"
  38. #include "http_log.h" /* For errors detected in basic auth common
  39. * support code... */
  40. #include "apr_date.h" /* For apr_date_parse_http and APR_DATE_BAD */
  41. #include "util_charset.h"
  42. #include "util_ebcdic.h"
  43. #include "util_time.h"
  44. #include "mod_core.h"
  45. #if APR_HAVE_STDARG_H
  46. #include <stdarg.h>
  47. #endif
  48. #if APR_HAVE_UNISTD_H
  49. #include <unistd.h>
  50. #endif
  51. static int ap_set_byterange(request_rec *r, apr_off_t clength,
  52. apr_array_header_t **indexes);
  53. /*
  54. * Here we try to be compatible with clients that want multipart/x-byteranges
  55. * instead of multipart/byteranges (also see above), as per HTTP/1.1. We
  56. * look for the Request-Range header (e.g. Netscape 2 and 3) as an indication
  57. * that the browser supports an older protocol. We also check User-Agent
  58. * for Microsoft Internet Explorer 3, which needs this as well.
  59. */
  60. static int use_range_x(request_rec *r)
  61. {
  62. const char *ua;
  63. return (apr_table_get(r->headers_in, "Request-Range")
  64. || ((ua = apr_table_get(r->headers_in, "User-Agent"))
  65. && ap_strstr_c(ua, "MSIE 3")));
  66. }
  67. #define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
  68. static apr_status_t copy_brigade_range(apr_bucket_brigade *bb,
  69. apr_bucket_brigade *bbout,
  70. apr_off_t start,
  71. apr_off_t end)
  72. {
  73. apr_bucket *first = NULL, *last = NULL, *out_first = NULL, *e;
  74. apr_uint64_t pos = 0, off_first = 0, off_last = 0;
  75. apr_status_t rv;
  76. apr_uint64_t start64, end64;
  77. apr_off_t pofft = 0;
  78. /*
  79. * Once we know that start and end are >= 0 convert everything to apr_uint64_t.
  80. * See the comments in apr_brigade_partition why.
  81. * In short apr_off_t (for values >= 0)and apr_size_t fit into apr_uint64_t.
  82. */
  83. start64 = (apr_uint64_t)start;
  84. end64 = (apr_uint64_t)end;
  85. if (start < 0 || end < 0 || start64 > end64)
  86. return APR_EINVAL;
  87. for (e = APR_BRIGADE_FIRST(bb);
  88. e != APR_BRIGADE_SENTINEL(bb);
  89. e = APR_BUCKET_NEXT(e))
  90. {
  91. apr_uint64_t elen64;
  92. /* we know that no bucket has undefined length (-1) */
  93. AP_DEBUG_ASSERT(e->length != (apr_size_t)(-1));
  94. elen64 = (apr_uint64_t)e->length;
  95. if (!first && (elen64 + pos > start64)) {
  96. first = e;
  97. off_first = pos;
  98. }
  99. if (elen64 + pos > end64) {
  100. last = e;
  101. off_last = pos;
  102. break;
  103. }
  104. pos += elen64;
  105. }
  106. if (!first || !last)
  107. return APR_EINVAL;
  108. e = first;
  109. while (1)
  110. {
  111. apr_bucket *copy;
  112. AP_DEBUG_ASSERT(e != APR_BRIGADE_SENTINEL(bb));
  113. rv = apr_bucket_copy(e, &copy);
  114. if (rv != APR_SUCCESS) {
  115. apr_brigade_cleanup(bbout);
  116. return rv;
  117. }
  118. APR_BRIGADE_INSERT_TAIL(bbout, copy);
  119. if (e == first) {
  120. if (off_first != start64) {
  121. rv = apr_bucket_split(copy, (apr_size_t)(start64 - off_first));
  122. if (rv != APR_SUCCESS) {
  123. apr_brigade_cleanup(bbout);
  124. return rv;
  125. }
  126. out_first = APR_BUCKET_NEXT(copy);
  127. APR_BUCKET_REMOVE(copy);
  128. apr_bucket_destroy(copy);
  129. }
  130. else {
  131. out_first = copy;
  132. }
  133. }
  134. if (e == last) {
  135. if (e == first) {
  136. off_last += start64 - off_first;
  137. copy = out_first;
  138. }
  139. if (end64 - off_last != (apr_uint64_t)e->length) {
  140. rv = apr_bucket_split(copy, (apr_size_t)(end64 + 1 - off_last));
  141. if (rv != APR_SUCCESS) {
  142. apr_brigade_cleanup(bbout);
  143. return rv;
  144. }
  145. copy = APR_BUCKET_NEXT(copy);
  146. if (copy != APR_BRIGADE_SENTINEL(bbout)) {
  147. APR_BUCKET_REMOVE(copy);
  148. apr_bucket_destroy(copy);
  149. }
  150. }
  151. break;
  152. }
  153. e = APR_BUCKET_NEXT(e);
  154. }
  155. AP_DEBUG_ASSERT(APR_SUCCESS == apr_brigade_length(bbout, 1, &pofft));
  156. pos = (apr_uint64_t)pofft;
  157. AP_DEBUG_ASSERT(pos == end64 - start64 + 1);
  158. return APR_SUCCESS;
  159. }
  160. typedef struct indexes_t {
  161. apr_off_t start;
  162. apr_off_t end;
  163. } indexes_t;
  164. static apr_status_t send_416(ap_filter_t *f, apr_bucket_brigade *tmpbb)
  165. {
  166. apr_bucket *e;
  167. conn_rec *c = f->r->connection;
  168. ap_remove_output_filter(f);
  169. f->r->status = HTTP_OK;
  170. e = ap_bucket_error_create(HTTP_RANGE_NOT_SATISFIABLE, NULL,
  171. f->r->pool, c->bucket_alloc);
  172. APR_BRIGADE_INSERT_TAIL(tmpbb, e);
  173. e = apr_bucket_eos_create(c->bucket_alloc);
  174. APR_BRIGADE_INSERT_TAIL(tmpbb, e);
  175. return ap_pass_brigade(f->next, tmpbb);
  176. }
  177. AP_CORE_DECLARE_NONSTD(apr_status_t) ap_byterange_filter(ap_filter_t *f,
  178. apr_bucket_brigade *bb)
  179. {
  180. request_rec *r = f->r;
  181. conn_rec *c = r->connection;
  182. apr_bucket *e;
  183. apr_bucket_brigade *bsend;
  184. apr_bucket_brigade *tmpbb;
  185. apr_off_t range_start;
  186. apr_off_t range_end;
  187. apr_off_t clength = 0;
  188. apr_status_t rv;
  189. int found = 0;
  190. int num_ranges;
  191. char *boundary = NULL;
  192. char *bound_head = NULL;
  193. apr_array_header_t *indexes;
  194. indexes_t *idx;
  195. int i;
  196. int original_status;
  197. /*
  198. * Iterate through the brigade until reaching EOS or a bucket with
  199. * unknown length.
  200. */
  201. for (e = APR_BRIGADE_FIRST(bb);
  202. (e != APR_BRIGADE_SENTINEL(bb) && !APR_BUCKET_IS_EOS(e)
  203. && e->length != (apr_size_t)-1);
  204. e = APR_BUCKET_NEXT(e)) {
  205. clength += e->length;
  206. }
  207. /*
  208. * Don't attempt to do byte range work if this brigade doesn't
  209. * contain an EOS, or if any of the buckets has an unknown length;
  210. * this avoids the cases where it is expensive to perform
  211. * byteranging (i.e. may require arbitrary amounts of memory).
  212. */
  213. if (!APR_BUCKET_IS_EOS(e) || clength <= 0) {
  214. ap_remove_output_filter(f);
  215. return ap_pass_brigade(f->next, bb);
  216. }
  217. original_status = r->status;
  218. num_ranges = ap_set_byterange(r, clength, &indexes);
  219. /* We have nothing to do, get out of the way. */
  220. if (num_ranges == 0) {
  221. r->status = original_status;
  222. ap_remove_output_filter(f);
  223. return ap_pass_brigade(f->next, bb);
  224. }
  225. /* this brigade holds what we will be sending */
  226. bsend = apr_brigade_create(r->pool, c->bucket_alloc);
  227. if (num_ranges < 0)
  228. return send_416(f, bsend);
  229. if (num_ranges > 1) {
  230. /* Is ap_make_content_type required here? */
  231. const char *orig_ct = ap_make_content_type(r, r->content_type);
  232. boundary = apr_psprintf(r->pool, "%" APR_UINT64_T_HEX_FMT "%lx",
  233. (apr_uint64_t)r->request_time, c->id);
  234. ap_set_content_type(r, apr_pstrcat(r->pool, "multipart",
  235. use_range_x(r) ? "/x-" : "/",
  236. "byteranges; boundary=",
  237. boundary, NULL));
  238. if (strcasecmp(orig_ct, NO_CONTENT_TYPE)) {
  239. bound_head = apr_pstrcat(r->pool,
  240. CRLF "--", boundary,
  241. CRLF "Content-type: ",
  242. orig_ct,
  243. CRLF "Content-range: bytes ",
  244. NULL);
  245. }
  246. else {
  247. /* if we have no type for the content, do our best */
  248. bound_head = apr_pstrcat(r->pool,
  249. CRLF "--", boundary,
  250. CRLF "Content-range: bytes ",
  251. NULL);
  252. }
  253. ap_xlate_proto_to_ascii(bound_head, strlen(bound_head));
  254. }
  255. tmpbb = apr_brigade_create(r->pool, c->bucket_alloc);
  256. idx = (indexes_t *)indexes->elts;
  257. for (i = 0; i < indexes->nelts; i++, idx++) {
  258. range_start = idx->start;
  259. range_end = idx->end;
  260. rv = copy_brigade_range(bb, tmpbb, range_start, range_end);
  261. if (rv != APR_SUCCESS ) {
  262. ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
  263. "copy_brigade_range() failed [%" APR_OFF_T_FMT
  264. "-%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT "]",
  265. range_start, range_end, clength);
  266. continue;
  267. }
  268. found = 1;
  269. /*
  270. * For single range requests, we must produce Content-Range header.
  271. * Otherwise, we need to produce the multipart boundaries.
  272. */
  273. if (num_ranges == 1) {
  274. apr_table_setn(r->headers_out, "Content-Range",
  275. apr_psprintf(r->pool, "bytes " BYTERANGE_FMT,
  276. range_start, range_end, clength));
  277. }
  278. else {
  279. char *ts;
  280. e = apr_bucket_pool_create(bound_head, strlen(bound_head),
  281. r->pool, c->bucket_alloc);
  282. APR_BRIGADE_INSERT_TAIL(bsend, e);
  283. ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
  284. range_start, range_end, clength);
  285. ap_xlate_proto_to_ascii(ts, strlen(ts));
  286. e = apr_bucket_pool_create(ts, strlen(ts), r->pool,
  287. c->bucket_alloc);
  288. APR_BRIGADE_INSERT_TAIL(bsend, e);
  289. }
  290. APR_BRIGADE_CONCAT(bsend, tmpbb);
  291. if (i && !(i & 0x1F)) {
  292. /*
  293. * Every now and then, pass what we have down the filter chain.
  294. * In this case, the content-length filter cannot calculate and
  295. * set the content length and we must remove any Content-Length
  296. * header already present.
  297. */
  298. apr_table_unset(r->headers_out, "Content-Length");
  299. if ((rv = ap_pass_brigade(f->next, bsend)) != APR_SUCCESS)
  300. return rv;
  301. apr_brigade_cleanup(bsend);
  302. }
  303. }
  304. if (found == 0) {
  305. /* bsend is assumed to be empty if we get here. */
  306. return send_416(f, bsend);
  307. }
  308. if (num_ranges > 1) {
  309. char *end;
  310. /* add the final boundary */
  311. end = apr_pstrcat(r->pool, CRLF "--", boundary, "--" CRLF, NULL);
  312. ap_xlate_proto_to_ascii(end, strlen(end));
  313. e = apr_bucket_pool_create(end, strlen(end), r->pool, c->bucket_alloc);
  314. APR_BRIGADE_INSERT_TAIL(bsend, e);
  315. }
  316. e = apr_bucket_eos_create(c->bucket_alloc);
  317. APR_BRIGADE_INSERT_TAIL(bsend, e);
  318. /* we're done with the original content - all of our data is in bsend. */
  319. apr_brigade_cleanup(bb);
  320. apr_brigade_destroy(tmpbb);
  321. /* send our multipart output */
  322. return ap_pass_brigade(f->next, bsend);
  323. }
  324. static int ap_set_byterange(request_rec *r, apr_off_t clength,
  325. apr_array_header_t **indexes)
  326. {
  327. const char *range;
  328. const char *if_range;
  329. const char *match;
  330. const char *ct;
  331. char *cur;
  332. int num_ranges = 0, unsatisfiable = 0;
  333. apr_off_t sum_lengths = 0;
  334. indexes_t *idx;
  335. int ranges = 1;
  336. const char *it;
  337. if (r->assbackwards) {
  338. return 0;
  339. }
  340. /*
  341. * Check for Range request-header (HTTP/1.1) or Request-Range for
  342. * backwards-compatibility with second-draft Luotonen/Franks
  343. * byte-ranges (e.g. Netscape Navigator 2-3).
  344. *
  345. * We support this form, with Request-Range, and (farther down) we
  346. * send multipart/x-byteranges instead of multipart/byteranges for
  347. * Request-Range based requests to work around a bug in Netscape
  348. * Navigator 2-3 and MSIE 3.
  349. */
  350. if (!(range = apr_table_get(r->headers_in, "Range"))) {
  351. range = apr_table_get(r->headers_in, "Request-Range");
  352. }
  353. if (!range || strncasecmp(range, "bytes=", 6) || r->status != HTTP_OK) {
  354. return 0;
  355. }
  356. /* is content already a single range? */
  357. if (apr_table_get(r->headers_out, "Content-Range")) {
  358. return 0;
  359. }
  360. /* is content already a multiple range? */
  361. if ((ct = apr_table_get(r->headers_out, "Content-Type"))
  362. && (!strncasecmp(ct, "multipart/byteranges", 20)
  363. || !strncasecmp(ct, "multipart/x-byteranges", 22))) {
  364. return 0;
  365. }
  366. /*
  367. * Check the If-Range header for Etag or Date.
  368. * Note that this check will return false (as required) if either
  369. * of the two etags are weak.
  370. */
  371. if ((if_range = apr_table_get(r->headers_in, "If-Range"))) {
  372. if (if_range[0] == '"') {
  373. if (!(match = apr_table_get(r->headers_out, "Etag"))
  374. || (strcmp(if_range, match) != 0)) {
  375. return 0;
  376. }
  377. }
  378. else if (!(match = apr_table_get(r->headers_out, "Last-Modified"))
  379. || (strcmp(if_range, match) != 0)) {
  380. return 0;
  381. }
  382. }
  383. range += 6;
  384. it = range;
  385. while (*it) {
  386. if (*it++ == ',') {
  387. ranges++;
  388. }
  389. }
  390. it = range;
  391. *indexes = apr_array_make(r->pool, ranges, sizeof(indexes_t));
  392. while ((cur = ap_getword(r->pool, &range, ','))) {
  393. char *dash;
  394. char *errp;
  395. apr_off_t number, start, end;
  396. if (!*cur)
  397. break;
  398. /*
  399. * Per RFC 2616 14.35.1: If there is at least one syntactically invalid
  400. * byte-range-spec, we must ignore the whole header.
  401. */
  402. if (!(dash = strchr(cur, '-'))) {
  403. return 0;
  404. }
  405. if (dash == cur) {
  406. /* In the form "-5" */
  407. if (apr_strtoff(&number, dash+1, &errp, 10) || *errp) {
  408. return 0;
  409. }
  410. if (number < 1) {
  411. return 0;
  412. }
  413. start = clength - number;
  414. end = clength - 1;
  415. }
  416. else {
  417. *dash++ = '\0';
  418. if (apr_strtoff(&number, cur, &errp, 10) || *errp) {
  419. return 0;
  420. }
  421. start = number;
  422. if (*dash) {
  423. if (apr_strtoff(&number, dash, &errp, 10) || *errp) {
  424. return 0;
  425. }
  426. end = number;
  427. if (start > end) {
  428. return 0;
  429. }
  430. }
  431. else { /* "5-" */
  432. end = clength - 1;
  433. }
  434. }
  435. if (start < 0) {
  436. start = 0;
  437. }
  438. if (start >= clength) {
  439. unsatisfiable = 1;
  440. continue;
  441. }
  442. if (end >= clength) {
  443. end = clength - 1;
  444. }
  445. idx = (indexes_t *)apr_array_push(*indexes);
  446. idx->start = start;
  447. idx->end = end;
  448. sum_lengths += end - start + 1;
  449. /* new set again */
  450. num_ranges++;
  451. }
  452. if (num_ranges == 0 && unsatisfiable) {
  453. /* If all ranges are unsatisfiable, we should return 416 */
  454. return -1;
  455. }
  456. if (sum_lengths > clength) {
  457. ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
  458. "Sum of ranges larger than file, ignoring.");
  459. return 0;
  460. }
  461. r->status = HTTP_PARTIAL_CONTENT;
  462. r->range = it;
  463. return num_ranges;
  464. }