mod_negotiation.c 109 KB


  1. /* Licensed to the Apache Software Foundation (ASF) under one or more
  2. * contributor license agreements. See the NOTICE file distributed with
  3. * this work for additional information regarding copyright ownership.
  4. * The ASF licenses this file to You under the Apache License, Version 2.0
  5. * (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * mod_negotiation.c: keeps track of MIME types the client is willing to
  18. * accept, and contains code to handle type arbitration.
  19. *
  20. * rst
  21. */
  22. #include "apr.h"
  23. #include "apr_strings.h"
  24. #include "apr_file_io.h"
  25. #include "apr_lib.h"
  26. #define APR_WANT_STRFUNC
  27. #include "apr_want.h"
  28. #include "ap_config.h"
  29. #include "httpd.h"
  30. #include "http_config.h"
  31. #include "http_request.h"
  32. #include "http_protocol.h"
  33. #include "http_core.h"
  34. #include "http_log.h"
  35. #include "util_script.h"
  36. #define MAP_FILE_MAGIC_TYPE "application/x-type-map"
  37. /* Commands --- configuring document caching on a per (virtual?)
  38. * server basis...
  39. */
  40. typedef struct {
  41. int forcelangpriority;
  42. apr_array_header_t *language_priority;
  43. } neg_dir_config;
  44. /* forcelangpriority flags
  45. */
  46. #define FLP_UNDEF 0 /* Same as FLP_DEFAULT, but base overrides */
  47. #define FLP_NONE 1 /* Return 406, HTTP_NOT_ACCEPTABLE */
  48. #define FLP_PREFER 2 /* Use language_priority rather than MC */
  49. #define FLP_FALLBACK 4 /* Use language_priority rather than NA */
  50. #define FLP_DEFAULT FLP_PREFER
  51. /* env evaluation
  52. */
  53. #define DISCARD_ALL_ENCODINGS 1 /* no-gzip */
  54. #define DISCARD_ALL_BUT_HTML 2 /* gzip-only-text/html */
  55. module AP_MODULE_DECLARE_DATA negotiation_module;
  56. static void *create_neg_dir_config(apr_pool_t *p, char *dummy)
  57. {
  58. neg_dir_config *new = (neg_dir_config *) apr_palloc(p,
  59. sizeof(neg_dir_config));
  60. new->forcelangpriority = FLP_UNDEF;
  61. new->language_priority = NULL;
  62. return new;
  63. }
  64. static void *merge_neg_dir_configs(apr_pool_t *p, void *basev, void *addv)
  65. {
  66. neg_dir_config *base = (neg_dir_config *) basev;
  67. neg_dir_config *add = (neg_dir_config *) addv;
  68. neg_dir_config *new = (neg_dir_config *) apr_palloc(p,
  69. sizeof(neg_dir_config));
  70. /* give priority to the config in the subdirectory */
  71. new->forcelangpriority = (add->forcelangpriority != FLP_UNDEF)
  72. ? add->forcelangpriority
  73. : base->forcelangpriority;
  74. new->language_priority = add->language_priority
  75. ? add->language_priority
  76. : base->language_priority;
  77. return new;
  78. }
  79. static const char *set_language_priority(cmd_parms *cmd, void *n_,
  80. const char *lang)
  81. {
  82. neg_dir_config *n = n_;
  83. const char **langp;
  84. if (!n->language_priority)
  85. n->language_priority = apr_array_make(cmd->pool, 4, sizeof(char *));
  86. langp = (const char **) apr_array_push(n->language_priority);
  87. *langp = lang;
  88. return NULL;
  89. }
  90. static const char *set_force_priority(cmd_parms *cmd, void *n_, const char *w)
  91. {
  92. neg_dir_config *n = n_;
  93. if (!strcasecmp(w, "None")) {
  94. if (n->forcelangpriority & ~FLP_NONE) {
  95. return "Cannot combine ForceLanguagePriority options with None";
  96. }
  97. n->forcelangpriority = FLP_NONE;
  98. }
  99. else if (!strcasecmp(w, "Prefer")) {
  100. if (n->forcelangpriority & FLP_NONE) {
  101. return "Cannot combine ForceLanguagePriority options None and "
  102. "Prefer";
  103. }
  104. n->forcelangpriority |= FLP_PREFER;
  105. }
  106. else if (!strcasecmp(w, "Fallback")) {
  107. if (n->forcelangpriority & FLP_NONE) {
  108. return "Cannot combine ForceLanguagePriority options None and "
  109. "Fallback";
  110. }
  111. n->forcelangpriority |= FLP_FALLBACK;
  112. }
  113. else {
  114. return apr_pstrcat(cmd->pool, "Invalid ForceLanguagePriority option ",
  115. w, NULL);
  116. }
  117. return NULL;
  118. }
  119. static const char *cache_negotiated_docs(cmd_parms *cmd, void *dummy,
  120. int arg)
  121. {
  122. ap_set_module_config(cmd->server->module_config, &negotiation_module,
  123. (arg ? "Cache" : NULL));
  124. return NULL;
  125. }
  126. static int do_cache_negotiated_docs(server_rec *s)
  127. {
  128. return (ap_get_module_config(s->module_config,
  129. &negotiation_module) != NULL);
  130. }
  131. static const command_rec negotiation_cmds[] =
  132. {
  133. AP_INIT_FLAG("CacheNegotiatedDocs", cache_negotiated_docs, NULL, RSRC_CONF,
  134. "Either 'on' or 'off' (default)"),
  135. AP_INIT_ITERATE("LanguagePriority", set_language_priority, NULL,
  136. OR_FILEINFO,
  137. "space-delimited list of MIME language abbreviations"),
  138. AP_INIT_ITERATE("ForceLanguagePriority", set_force_priority, NULL,
  139. OR_FILEINFO,
  140. "Force LanguagePriority elections, either None, or "
  141. "Fallback and/or Prefer"),
  142. {NULL}
  143. };
  144. /*
  145. * Record of available info on a media type specified by the client
  146. * (we also use 'em for encodings and languages)
  147. */
  148. typedef struct accept_rec {
  149. char *name; /* MUST be lowercase */
  150. float quality;
  151. float level;
  152. char *charset; /* for content-type only */
  153. } accept_rec;
  154. /*
  155. * Record of available info on a particular variant
  156. *
  157. * Note that a few of these fields are updated by the actual negotiation
  158. * code. These are:
  159. *
  160. * level_matched --- initialized to zero. Set to the value of level
  161. * if the client actually accepts this media type at that
  162. * level (and *not* if it got in on a wildcard). See level_cmp
  163. * below.
  164. * mime_stars -- initialized to zero. Set to the number of stars
  165. * present in the best matching Accept header element.
  166. * 1 for star/star, 2 for type/star and 3 for
  167. * type/subtype.
  168. *
  169. * definite -- initialized to 1. Set to 0 if there is a match which
  170. * makes the variant non-definite according to the rules
  171. * in rfc2296.
  172. */
  173. typedef struct var_rec {
  174. request_rec *sub_req; /* May be NULL (is, for map files) */
  175. const char *mime_type; /* MUST be lowercase */
  176. const char *file_name; /* Set to 'this' (for map file body content) */
  177. apr_off_t body; /* Only for map file body content */
  178. const char *content_encoding;
  179. apr_array_header_t *content_languages; /* list of lang. for this variant */
  180. const char *content_charset;
  181. const char *description;
  182. /* The next five items give the quality values for the dimensions
  183. * of negotiation for this variant. They are obtained from the
  184. * appropriate header lines, except for source_quality, which
  185. * is obtained from the variant itself (the 'qs' parameter value
  186. * from the variant's mime-type). Apart from source_quality,
  187. * these values are set when we find the quality for each variant
  188. * (see best_match()). source_quality is set from the 'qs' parameter
  189. * of the variant description or mime type: see set_mime_fields().
  190. */
  191. float lang_quality; /* quality of this variant's language */
  192. float encoding_quality; /* ditto encoding */
  193. float charset_quality; /* ditto charset */
  194. float mime_type_quality; /* ditto media type */
  195. float source_quality; /* source quality for this variant */
  196. /* Now some special values */
  197. float level; /* Auxiliary to content-type... */
  198. apr_off_t bytes; /* content length, if known */
  199. int lang_index; /* Index into LanguagePriority list */
  200. int is_pseudo_html; /* text/html, *or* the INCLUDES_MAGIC_TYPEs */
  201. /* Above are all written-once properties of the variant. The
  202. * three fields below are changed during negotiation:
  203. */
  204. float level_matched;
  205. int mime_stars;
  206. int definite;
  207. } var_rec;
  208. /* Something to carry around the state of negotiation (and to keep
  209. * all of this thread-safe)...
  210. */
  211. typedef struct {
  212. apr_pool_t *pool;
  213. request_rec *r;
  214. neg_dir_config *conf;
  215. char *dir_name;
  216. int accept_q; /* 1 if an Accept item has a q= param */
  217. float default_lang_quality; /* fiddle lang q for variants with no lang */
  218. /* the array pointers below are NULL if the corresponding accept
  219. * headers are not present
  220. */
  221. apr_array_header_t *accepts; /* accept_recs */
  222. apr_array_header_t *accept_encodings; /* accept_recs */
  223. apr_array_header_t *accept_charsets; /* accept_recs */
  224. apr_array_header_t *accept_langs; /* accept_recs */
  225. apr_array_header_t *avail_vars; /* available variants */
  226. int count_multiviews_variants; /* number of variants found on disk */
  227. int is_transparent; /* 1 if this resource is trans. negotiable */
  228. int dont_fiddle_headers; /* 1 if we may not fiddle with accept hdrs */
  229. int ua_supports_trans; /* 1 if ua supports trans negotiation */
  230. int send_alternates; /* 1 if we want to send an Alternates header */
  231. int may_choose; /* 1 if we may choose a variant for the client */
  232. int use_rvsa; /* 1 if we must use RVSA/1.0 negotiation algo */
  233. } negotiation_state;
  234. /* A few functions to manipulate var_recs.
  235. * Cleaning out the fields...
  236. */
  237. static void clean_var_rec(var_rec *mime_info)
  238. {
  239. mime_info->sub_req = NULL;
  240. mime_info->mime_type = "";
  241. mime_info->file_name = "";
  242. mime_info->body = 0;
  243. mime_info->content_encoding = NULL;
  244. mime_info->content_languages = NULL;
  245. mime_info->content_charset = "";
  246. mime_info->description = "";
  247. mime_info->is_pseudo_html = 0;
  248. mime_info->level = 0.0f;
  249. mime_info->level_matched = 0.0f;
  250. mime_info->bytes = -1;
  251. mime_info->lang_index = -1;
  252. mime_info->mime_stars = 0;
  253. mime_info->definite = 1;
  254. mime_info->charset_quality = 1.0f;
  255. mime_info->encoding_quality = 1.0f;
  256. mime_info->lang_quality = 1.0f;
  257. mime_info->mime_type_quality = 1.0f;
  258. mime_info->source_quality = 0.0f;
  259. }
  260. /* Initializing the relevant fields of a variant record from the
  261. * accept_info read out of its content-type, one way or another.
  262. */
  263. static void set_mime_fields(var_rec *var, accept_rec *mime_info)
  264. {
  265. var->mime_type = mime_info->name;
  266. var->source_quality = mime_info->quality;
  267. var->level = mime_info->level;
  268. var->content_charset = mime_info->charset;
  269. var->is_pseudo_html = (!strcmp(var->mime_type, "text/html")
  270. || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE)
  271. || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE3));
  272. }
  273. /* Create a variant list validator in r using info from vlistr. */
  274. static void set_vlist_validator(request_rec *r, request_rec *vlistr)
  275. {
  276. /* Calculating the variant list validator is similar to
  277. * calculating an etag for the source of the variant list
  278. * information, so we use ap_make_etag(). Note that this
  279. * validator can be 'weak' in extreme case.
  280. */
  281. ap_update_mtime(vlistr, vlistr->finfo.mtime);
  282. r->vlist_validator = ap_make_etag(vlistr, 0);
  283. /* ap_set_etag will later take r->vlist_validator into account
  284. * when creating the etag header
  285. */
  286. }
  287. /*****************************************************************
  288. *
  289. * Parsing (lists of) media types and their parameters, as seen in
  290. * HTTPD header lines and elsewhere.
  291. */
  292. /*
  293. * parse quality value. atof(3) is not well-usable here, because it
  294. * depends on the locale (argh).
  295. *
  296. * However, RFC 2616 states:
  297. * 3.9 Quality Values
  298. *
  299. * [...] HTTP/1.1 applications MUST NOT generate more than three digits
  300. * after the decimal point. User configuration of these values SHOULD also
  301. * be limited in this fashion.
  302. *
  303. * qvalue = ( "0" [ "." 0*3DIGIT ] )
  304. * | ( "1" [ "." 0*3("0") ] )
  305. *
  306. * This is quite easy. If the supplied string doesn't match the above
  307. * definition (loosely), we simply return 1 (same as if there's no qvalue)
  308. */
  309. static float atoq(const char *string)
  310. {
  311. if (!string || !*string) {
  312. return 1.0f;
  313. }
  314. while (*string && apr_isspace(*string)) {
  315. ++string;
  316. }
  317. /* be tolerant and accept qvalues without leading zero
  318. * (also for backwards compat, where atof() was in use)
  319. */
  320. if (*string != '.' && *string++ != '0') {
  321. return 1.0f;
  322. }
  323. if (*string == '.') {
  324. /* better only one division later, than dealing with fscking
  325. * IEEE format 0.1 factors ...
  326. */
  327. int i = 0;
  328. if (*++string >= '0' && *string <= '9') {
  329. i += (*string - '0') * 100;
  330. if (*++string >= '0' && *string <= '9') {
  331. i += (*string - '0') * 10;
  332. if (*++string > '0' && *string <= '9') {
  333. i += (*string - '0');
  334. }
  335. }
  336. }
  337. return (float)i / 1000.0f;
  338. }
  339. return 0.0f;
  340. }
  341. /*
  342. * Get a single mime type entry --- one media type and parameters;
  343. * enter the values we recognize into the argument accept_rec
  344. */
  345. static const char *get_entry(apr_pool_t *p, accept_rec *result,
  346. const char *accept_line)
  347. {
  348. result->quality = 1.0f;
  349. result->level = 0.0f;
  350. result->charset = "";
  351. /*
  352. * Note that this handles what I gather is the "old format",
  353. *
  354. * Accept: text/html text/plain moo/zot
  355. *
  356. * without any compatibility kludges --- if the token after the
  357. * MIME type begins with a semicolon, we know we're looking at parms,
  358. * otherwise, we know we aren't. (So why all the pissing and moaning
  359. * in the CERN server code? I must be missing something).
  360. */
  361. result->name = ap_get_token(p, &accept_line, 0);
  362. ap_str_tolower(result->name); /* You want case insensitive,
  363. * you'll *get* case insensitive.
  364. */
  365. /* KLUDGE!!! Default HTML to level 2.0 unless the browser
  366. * *explicitly* says something else.
  367. */
  368. if (!strcmp(result->name, "text/html") && (result->level == 0.0)) {
  369. result->level = 2.0f;
  370. }
  371. else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE)) {
  372. result->level = 2.0f;
  373. }
  374. else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE3)) {
  375. result->level = 3.0f;
  376. }
  377. while (*accept_line == ';') {
  378. /* Parameters ... */
  379. char *parm;
  380. char *cp;
  381. char *end;
  382. ++accept_line;
  383. parm = ap_get_token(p, &accept_line, 1);
  384. /* Look for 'var = value' --- and make sure the var is in lcase. */
  385. for (cp = parm; (*cp && !apr_isspace(*cp) && *cp != '='); ++cp) {
  386. *cp = apr_tolower(*cp);
  387. }
  388. if (!*cp) {
  389. continue; /* No '='; just ignore it. */
  390. }
  391. *cp++ = '\0'; /* Delimit var */
  392. while (*cp && (apr_isspace(*cp) || *cp == '=')) {
  393. ++cp;
  394. }
  395. if (*cp == '"') {
  396. ++cp;
  397. for (end = cp;
  398. (*end && *end != '\n' && *end != '\r' && *end != '\"');
  399. end++);
  400. }
  401. else {
  402. for (end = cp; (*end && !apr_isspace(*end)); end++);
  403. }
  404. if (*end) {
  405. *end = '\0'; /* strip ending quote or return */
  406. }
  407. ap_str_tolower(cp);
  408. if (parm[0] == 'q'
  409. && (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0'))) {
  410. result->quality = atoq(cp);
  411. }
  412. else if (parm[0] == 'l' && !strcmp(&parm[1], "evel")) {
  413. result->level = (float)atoi(cp);
  414. }
  415. else if (!strcmp(parm, "charset")) {
  416. result->charset = cp;
  417. }
  418. }
  419. if (*accept_line == ',') {
  420. ++accept_line;
  421. }
  422. return accept_line;
  423. }
  424. /*****************************************************************
  425. *
  426. * Dealing with header lines ...
  427. *
  428. * Accept, Accept-Charset, Accept-Language and Accept-Encoding
  429. * are handled by do_header_line() - they all have the same
  430. * basic structure of a list of items of the format
  431. * name; q=N; charset=TEXT
  432. *
  433. * where charset is only valid in Accept.
  434. */
  435. static apr_array_header_t *do_header_line(apr_pool_t *p,
  436. const char *accept_line)
  437. {
  438. apr_array_header_t *accept_recs;
  439. if (!accept_line) {
  440. return NULL;
  441. }
  442. accept_recs = apr_array_make(p, 40, sizeof(accept_rec));
  443. while (*accept_line) {
  444. accept_rec *new = (accept_rec *) apr_array_push(accept_recs);
  445. accept_line = get_entry(p, new, accept_line);
  446. }
  447. return accept_recs;
  448. }
  449. /* Given the text of the Content-Languages: line from the var map file,
  450. * return an array containing the languages of this variant
  451. */
  452. static apr_array_header_t *do_languages_line(apr_pool_t *p,
  453. const char **lang_line)
  454. {
  455. apr_array_header_t *lang_recs = apr_array_make(p, 2, sizeof(char *));
  456. if (!lang_line) {
  457. return lang_recs;
  458. }
  459. while (**lang_line) {
  460. char **new = (char **) apr_array_push(lang_recs);
  461. *new = ap_get_token(p, lang_line, 0);
  462. ap_str_tolower(*new);
  463. if (**lang_line == ',' || **lang_line == ';') {
  464. ++(*lang_line);
  465. }
  466. }
  467. return lang_recs;
  468. }
  469. /*****************************************************************
  470. *
  471. * Handling header lines from clients...
  472. */
  473. static negotiation_state *parse_accept_headers(request_rec *r)
  474. {
  475. negotiation_state *new =
  476. (negotiation_state *) apr_pcalloc(r->pool, sizeof(negotiation_state));
  477. accept_rec *elts;
  478. apr_table_t *hdrs = r->headers_in;
  479. int i;
  480. new->pool = r->pool;
  481. new->r = r;
  482. new->conf = (neg_dir_config *)ap_get_module_config(r->per_dir_config,
  483. &negotiation_module);
  484. new->dir_name = ap_make_dirstr_parent(r->pool, r->filename);
  485. new->accepts = do_header_line(r->pool, apr_table_get(hdrs, "Accept"));
  486. /* calculate new->accept_q value */
  487. if (new->accepts) {
  488. elts = (accept_rec *) new->accepts->elts;
  489. for (i = 0; i < new->accepts->nelts; ++i) {
  490. if (elts[i].quality < 1.0) {
  491. new->accept_q = 1;
  492. }
  493. }
  494. }
  495. new->accept_encodings =
  496. do_header_line(r->pool, apr_table_get(hdrs, "Accept-Encoding"));
  497. new->accept_langs =
  498. do_header_line(r->pool, apr_table_get(hdrs, "Accept-Language"));
  499. new->accept_charsets =
  500. do_header_line(r->pool, apr_table_get(hdrs, "Accept-Charset"));
  501. /* This is possibly overkill for some servers, heck, we have
  502. * only 33 index.html variants in docs/docroot (today).
  503. * Make this configurable?
  504. */
  505. new->avail_vars = apr_array_make(r->pool, 40, sizeof(var_rec));
  506. return new;
  507. }
  508. static void parse_negotiate_header(request_rec *r, negotiation_state *neg)
  509. {
  510. const char *negotiate = apr_table_get(r->headers_in, "Negotiate");
  511. char *tok;
  512. /* First, default to no TCN, no Alternates, and the original Apache
  513. * negotiation algorithm with fiddles for broken browser configs.
  514. *
  515. * To save network bandwidth, we do not configure to send an
  516. * Alternates header to the user agent by default. User
  517. * agents that want an Alternates header for agent-driven
  518. * negotiation will have to request it by sending an
  519. * appropriate Negotiate header.
  520. */
  521. neg->ua_supports_trans = 0;
  522. neg->send_alternates = 0;
  523. neg->may_choose = 1;
  524. neg->use_rvsa = 0;
  525. neg->dont_fiddle_headers = 0;
  526. if (!negotiate)
  527. return;
  528. if (strcmp(negotiate, "trans") == 0) {
  529. /* Lynx 2.7 and 2.8 send 'negotiate: trans' even though they
  530. * do not support transparent content negotiation, so for Lynx we
  531. * ignore the negotiate header when its contents are exactly "trans".
  532. * If future versions of Lynx ever need to say 'negotiate: trans',
  533. * they can send the equivalent 'negotiate: trans, trans' instead
  534. * to avoid triggering the workaround below.
  535. */
  536. const char *ua = apr_table_get(r->headers_in, "User-Agent");
  537. if (ua && (strncmp(ua, "Lynx", 4) == 0))
  538. return;
  539. }
  540. neg->may_choose = 0; /* An empty Negotiate would require 300 response */
  541. while ((tok = ap_get_list_item(neg->pool, &negotiate)) != NULL) {
  542. if (strcmp(tok, "trans") == 0 ||
  543. strcmp(tok, "vlist") == 0 ||
  544. strcmp(tok, "guess-small") == 0 ||
  545. apr_isdigit(tok[0]) ||
  546. strcmp(tok, "*") == 0) {
  547. /* The user agent supports transparent negotiation */
  548. neg->ua_supports_trans = 1;
  549. /* Send-alternates could be configurable, but note
  550. * that it must be 1 if we have 'vlist' in the
  551. * negotiate header.
  552. */
  553. neg->send_alternates = 1;
  554. if (strcmp(tok, "1.0") == 0) {
  555. /* we may use the RVSA/1.0 algorithm, configure for it */
  556. neg->may_choose = 1;
  557. neg->use_rvsa = 1;
  558. neg->dont_fiddle_headers = 1;
  559. }
  560. else if (tok[0] == '*') {
  561. /* we may use any variant selection algorithm, configure
  562. * to use the Apache algorithm
  563. */
  564. neg->may_choose = 1;
  565. /* We disable header fiddles on the assumption that a
  566. * client sending Negotiate knows how to send correct
  567. * headers which don't need fiddling.
  568. */
  569. neg->dont_fiddle_headers = 1;
  570. }
  571. }
  572. }
  573. #ifdef NEG_DEBUG
  574. ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL,
  575. "dont_fiddle_headers=%d use_rvsa=%d ua_supports_trans=%d "
  576. "send_alternates=%d, may_choose=%d",
  577. neg->dont_fiddle_headers, neg->use_rvsa,
  578. neg->ua_supports_trans, neg->send_alternates, neg->may_choose);
  579. #endif
  580. }
  581. /* Sometimes clients will give us no Accept info at all; this routine sets
  582. * up the standard default for that case, and also arranges for us to be
  583. * willing to run a CGI script if we find one. (In fact, we set up to
  584. * dramatically prefer CGI scripts in cases where that's appropriate,
  585. * e.g., POST or when URI includes query args or extra path info).
  586. */
  587. static void maybe_add_default_accepts(negotiation_state *neg,
  588. int prefer_scripts)
  589. {
  590. accept_rec *new_accept;
  591. if (!neg->accepts) {
  592. neg->accepts = apr_array_make(neg->pool, 4, sizeof(accept_rec));
  593. new_accept = (accept_rec *) apr_array_push(neg->accepts);
  594. new_accept->name = "*/*";
  595. new_accept->quality = 1.0f;
  596. new_accept->level = 0.0f;
  597. }
  598. new_accept = (accept_rec *) apr_array_push(neg->accepts);
  599. new_accept->name = CGI_MAGIC_TYPE;
  600. if (neg->use_rvsa) {
  601. new_accept->quality = 0;
  602. }
  603. else {
  604. new_accept->quality = prefer_scripts ? 2.0f : 0.001f;
  605. }
  606. new_accept->level = 0.0f;
  607. }
  608. /*****************************************************************
  609. *
  610. * Parsing type-map files, in Roy's meta/http format augmented with
  611. * #-comments.
  612. */
  613. /* Reading RFC822-style header lines, ignoring #-comments and
  614. * handling continuations.
  615. */
  616. enum header_state {
  617. header_eof, header_seen, header_sep
  618. };
  619. static enum header_state get_header_line(char *buffer, int len, apr_file_t *map)
  620. {
  621. char *buf_end = buffer + len;
  622. char *cp;
  623. char c;
  624. /* Get a noncommented line */
  625. do {
  626. if (apr_file_gets(buffer, MAX_STRING_LEN, map) != APR_SUCCESS) {
  627. return header_eof;
  628. }
  629. } while (buffer[0] == '#');
  630. /* If blank, just return it --- this ends information on this variant */
  631. for (cp = buffer; (*cp && apr_isspace(*cp)); ++cp) {
  632. continue;
  633. }
  634. if (*cp == '\0') {
  635. return header_sep;
  636. }
  637. /* If non-blank, go looking for header lines, but note that we still
  638. * have to treat comments specially...
  639. */
  640. cp += strlen(cp);
  641. /* We need to shortcut the rest of this block following the Body:
  642. * tag - we will not look for continutation after this line.
  643. */
  644. if (!strncasecmp(buffer, "Body:", 5))
  645. return header_seen;
  646. while (apr_file_getc(&c, map) != APR_EOF) {
  647. if (c == '#') {
  648. /* Comment line */
  649. while (apr_file_getc(&c, map) != APR_EOF && c != '\n') {
  650. continue;
  651. }
  652. }
  653. else if (apr_isspace(c)) {
  654. /* Leading whitespace. POSSIBLE continuation line
  655. * Also, possibly blank --- if so, we ungetc() the final newline
  656. * so that we will pick up the blank line the next time 'round.
  657. */
  658. while (c != '\n' && apr_isspace(c)) {
  659. if(apr_file_getc(&c, map) != APR_SUCCESS)
  660. break;
  661. }
  662. apr_file_ungetc(c, map);
  663. if (c == '\n') {
  664. return header_seen; /* Blank line */
  665. }
  666. /* Continuation */
  667. while ( cp < buf_end - 2
  668. && (apr_file_getc(&c, map)) != APR_EOF
  669. && c != '\n') {
  670. *cp++ = c;
  671. }
  672. *cp++ = '\n';
  673. *cp = '\0';
  674. }
  675. else {
  676. /* Line beginning with something other than whitespace */
  677. apr_file_ungetc(c, map);
  678. return header_seen;
  679. }
  680. }
  681. return header_seen;
  682. }
  683. static apr_off_t get_body(char *buffer, apr_size_t *len, const char *tag,
  684. apr_file_t *map)
  685. {
  686. char *endbody;
  687. int bodylen;
  688. int taglen;
  689. apr_off_t pos;
  690. taglen = strlen(tag);
  691. *len -= taglen;
  692. /* We are at the first character following a body:tag\n entry
  693. * Suck in the body, then backspace to the first char after the
  694. * closing tag entry. If we fail to read, find the tag or back
  695. * up then we have a hosed file, so give up already
  696. */
  697. if (apr_file_read(map, buffer, len) != APR_SUCCESS) {
  698. return -1;
  699. }
  700. /* put a copy of the tag *after* the data read from the file
  701. * so that strstr() will find something with no reliance on
  702. * terminating '\0'
  703. */
  704. memcpy(buffer + *len, tag, taglen);
  705. endbody = strstr(buffer, tag);
  706. if (endbody == buffer + *len) {
  707. return -1;
  708. }
  709. bodylen = endbody - buffer;
  710. endbody += taglen;
  711. /* Skip all the trailing cruft after the end tag to the next line */
  712. while (*endbody) {
  713. if (*endbody == '\n') {
  714. ++endbody;
  715. break;
  716. }
  717. ++endbody;
  718. }
  719. pos = -(apr_off_t)(*len - (endbody - buffer));
  720. if (apr_file_seek(map, APR_CUR, &pos) != APR_SUCCESS) {
  721. return -1;
  722. }
  723. /* Give the caller back the actual body's file offset and length */
  724. *len = bodylen;
  725. return pos - (endbody - buffer);
  726. }
  727. /* Stripping out RFC822 comments */
  728. static void strip_paren_comments(char *hdr)
  729. {
  730. /* Hmmm... is this correct? In Roy's latest draft, (comments) can nest! */
  731. /* Nope, it isn't correct. Fails to handle backslash escape as well. */
  732. while (*hdr) {
  733. if (*hdr == '"') {
  734. hdr = strchr(hdr, '"');
  735. if (hdr == NULL) {
  736. return;
  737. }
  738. ++hdr;
  739. }
  740. else if (*hdr == '(') {
  741. while (*hdr && *hdr != ')') {
  742. *hdr++ = ' ';
  743. }
  744. if (*hdr) {
  745. *hdr++ = ' ';
  746. }
  747. }
  748. else {
  749. ++hdr;
  750. }
  751. }
  752. }
  753. /* Getting to a header body from the header */
  754. static char *lcase_header_name_return_body(char *header, request_rec *r)
  755. {
  756. char *cp = header;
  757. for ( ; *cp && *cp != ':' ; ++cp) {
  758. *cp = apr_tolower(*cp);
  759. }
  760. if (!*cp) {
  761. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  762. "Syntax error in type map, no ':' in %s for header %s",
  763. r->filename, header);
  764. return NULL;
  765. }
  766. do {
  767. ++cp;
  768. } while (*cp && apr_isspace(*cp));
  769. if (!*cp) {
  770. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  771. "Syntax error in type map --- no header body: %s for %s",
  772. r->filename, header);
  773. return NULL;
  774. }
  775. return cp;
  776. }
  777. static int read_type_map(apr_file_t **map, negotiation_state *neg,
  778. request_rec *rr)
  779. {
  780. request_rec *r = neg->r;
  781. apr_file_t *map_ = NULL;
  782. apr_status_t status;
  783. char buffer[MAX_STRING_LEN];
  784. enum header_state hstate;
  785. struct var_rec mime_info;
  786. int has_content;
  787. if (!map)
  788. map = &map_;
  789. /* We are not using multiviews */
  790. neg->count_multiviews_variants = 0;
  791. if ((status = apr_file_open(map, rr->filename, APR_READ | APR_BUFFERED,
  792. APR_OS_DEFAULT, neg->pool)) != APR_SUCCESS) {
  793. ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r,
  794. "cannot access type map file: %s", rr->filename);
  795. if (APR_STATUS_IS_ENOTDIR(status) || APR_STATUS_IS_ENOENT(status)) {
  796. return HTTP_NOT_FOUND;
  797. }
  798. else {
  799. return HTTP_FORBIDDEN;
  800. }
  801. }
  802. clean_var_rec(&mime_info);
  803. has_content = 0;
  804. do {
  805. hstate = get_header_line(buffer, MAX_STRING_LEN, *map);
  806. if (hstate == header_seen) {
  807. char *body1 = lcase_header_name_return_body(buffer, neg->r);
  808. const char *body;
  809. if (body1 == NULL) {
  810. return HTTP_INTERNAL_SERVER_ERROR;
  811. }
  812. strip_paren_comments(body1);
  813. body = body1;
  814. if (!strncmp(buffer, "uri:", 4)) {
  815. mime_info.file_name = ap_get_token(neg->pool, &body, 0);
  816. }
  817. else if (!strncmp(buffer, "content-type:", 13)) {
  818. struct accept_rec accept_info;
  819. get_entry(neg->pool, &accept_info, body);
  820. set_mime_fields(&mime_info, &accept_info);
  821. has_content = 1;
  822. }
  823. else if (!strncmp(buffer, "content-length:", 15)) {
  824. char *errp;
  825. apr_off_t number;
  826. if (apr_strtoff(&number, body, &errp, 10)
  827. || *errp || number < 0) {
  828. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  829. "Parse error in type map, Content-Length: "
  830. "'%s' in %s is invalid.",
  831. body, r->filename);
  832. break;
  833. }
  834. mime_info.bytes = number;
  835. has_content = 1;
  836. }
  837. else if (!strncmp(buffer, "content-language:", 17)) {
  838. mime_info.content_languages = do_languages_line(neg->pool,
  839. &body);
  840. has_content = 1;
  841. }
  842. else if (!strncmp(buffer, "content-encoding:", 17)) {
  843. mime_info.content_encoding = ap_get_token(neg->pool, &body, 0);
  844. has_content = 1;
  845. }
  846. else if (!strncmp(buffer, "description:", 12)) {
  847. char *desc = apr_pstrdup(neg->pool, body);
  848. char *cp;
  849. for (cp = desc; *cp; ++cp) {
  850. if (*cp=='\n') *cp=' ';
  851. }
  852. if (cp>desc) *(cp-1)=0;
  853. mime_info.description = desc;
  854. }
  855. else if (!strncmp(buffer, "body:", 5)) {
  856. char *tag = apr_pstrdup(neg->pool, body);
  857. char *eol = strchr(tag, '\0');
  858. apr_size_t len = MAX_STRING_LEN;
  859. while (--eol >= tag && apr_isspace(*eol))
  860. *eol = '\0';
  861. if ((mime_info.body = get_body(buffer, &len, tag, *map)) < 0) {
  862. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  863. "Syntax error in type map, no end tag '%s'"
  864. "found in %s for Body: content.",
  865. tag, r->filename);
  866. break;
  867. }
  868. mime_info.bytes = len;
  869. mime_info.file_name = apr_filepath_name_get(rr->filename);
  870. }
  871. }
  872. else {
  873. if (*mime_info.file_name && has_content) {
  874. void *new_var = apr_array_push(neg->avail_vars);
  875. memcpy(new_var, (void *) &mime_info, sizeof(var_rec));
  876. }
  877. clean_var_rec(&mime_info);
  878. has_content = 0;
  879. }
  880. } while (hstate != header_eof);
  881. if (map_)
  882. apr_file_close(map_);
  883. set_vlist_validator(r, rr);
  884. return OK;
  885. }
  886. /* Sort function used by read_types_multi. */
  887. static int variantsortf(var_rec *a, var_rec *b) {
  888. /* First key is the source quality, sort in descending order. */
  889. /* XXX: note that we currently implement no method of setting the
  890. * source quality for multiviews variants, so we are always comparing
  891. * 1.0 to 1.0 for now
  892. */
  893. if (a->source_quality < b->source_quality)
  894. return 1;
  895. if (a->source_quality > b->source_quality)
  896. return -1;
  897. /* Second key is the variant name */
  898. return strcmp(a->file_name, b->file_name);
  899. }
  900. /*****************************************************************
  901. *
  902. * Same as read_type_map, except we use a filtered directory listing
  903. * as the map...
  904. */
  905. static int read_types_multi(negotiation_state *neg)
  906. {
  907. request_rec *r = neg->r;
  908. char *filp;
  909. int prefix_len;
  910. apr_dir_t *dirp;
  911. apr_finfo_t dirent;
  912. apr_status_t status;
  913. struct var_rec mime_info;
  914. struct accept_rec accept_info;
  915. void *new_var;
  916. int anymatch = 0;
  917. clean_var_rec(&mime_info);
  918. if (r->proxyreq || !r->filename
  919. || !ap_os_is_path_absolute(neg->pool, r->filename)) {
  920. return DECLINED;
  921. }
  922. /* Only absolute paths here */
  923. if (!(filp = strrchr(r->filename, '/'))) {
  924. return DECLINED;
  925. }
  926. ++filp;
  927. prefix_len = strlen(filp);
  928. if ((status = apr_dir_open(&dirp, neg->dir_name,
  929. neg->pool)) != APR_SUCCESS) {
  930. ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r,
  931. "cannot read directory for multi: %s", neg->dir_name);
  932. return HTTP_FORBIDDEN;
  933. }
  934. while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dirp) == APR_SUCCESS) {
  935. apr_array_header_t *exception_list;
  936. request_rec *sub_req;
  937. /* Do we have a match? */
  938. #ifdef CASE_BLIND_FILESYSTEM
  939. if (strncasecmp(dirent.name, filp, prefix_len)) {
  940. #else
  941. if (strncmp(dirent.name, filp, prefix_len)) {
  942. #endif
  943. continue;
  944. }
  945. if (dirent.name[prefix_len] != '.') {
  946. continue;
  947. }
  948. /* Don't negotiate directories and other unusual files
  949. * Really shouldn't see anything but DIR/LNK/REG here,
  950. * and we aught to discover if the LNK was interesting.
  951. *
  952. * Of course, this only helps platforms that capture the
  953. * the filetype in apr_dir_read(), which most can once
  954. * they are optimized with some magic [it's known to the
  955. * dirent, not associated to the inode, on most FS's.]
  956. */
  957. if ((dirent.valid & APR_FINFO_TYPE) && (dirent.filetype == APR_DIR))
  958. continue;
  959. /* Ok, something's here. Maybe nothing useful. Remember that
  960. * we tried, if we completely fail, so we can reject the request!
  961. */
  962. anymatch = 1;
  963. /* See if it's something which we have access to, and which
  964. * has a known type and encoding (as opposed to something
  965. * which we'll be slapping default_type on later).
  966. */
  967. sub_req = ap_sub_req_lookup_dirent(&dirent, r, AP_SUBREQ_MERGE_ARGS,
  968. NULL);
  969. /* Double check, we still don't multi-resolve non-ordinary files
  970. */
  971. if (sub_req->finfo.filetype != APR_REG)
  972. continue;
  973. /* If it has a handler, we'll pretend it's a CGI script,
  974. * since that's a good indication of the sort of thing it
  975. * might be doing.
  976. */
  977. if (sub_req->handler && !sub_req->content_type) {
  978. ap_set_content_type(sub_req, CGI_MAGIC_TYPE);
  979. }
  980. /*
  981. * mod_mime will _always_ provide us the base name in the
  982. * ap-mime-exception-list, if it processed anything. If
  983. * this list is empty, give up immediately, there was
  984. * nothing interesting. For example, looking at the files
  985. * readme.txt and readme.foo, we will throw away .foo if
  986. * it's an insignificant file (e.g. did not identify a
  987. * language, charset, encoding, content type or handler,)
  988. */
  989. exception_list =
  990. (apr_array_header_t *)apr_table_get(sub_req->notes,
  991. "ap-mime-exceptions-list");
  992. if (!exception_list) {
  993. ap_destroy_sub_req(sub_req);
  994. continue;
  995. }
  996. /* Each unregonized bit better match our base name, in sequence.
  997. * A test of index.html.foo will match index.foo or index.html.foo,
  998. * but it will never transpose the segments and allow index.foo.html
  999. * because that would introduce too much CPU consumption. Better that
  1000. * we don't attempt a many-to-many match here.
  1001. */
  1002. {
  1003. int nexcept = exception_list->nelts;
  1004. char **cur_except = (char**)exception_list->elts;
  1005. char *segstart = filp, *segend, saveend;
  1006. while (*segstart && nexcept) {
  1007. if (!(segend = strchr(segstart, '.')))
  1008. segend = strchr(segstart, '\0');
  1009. saveend = *segend;
  1010. *segend = '\0';
  1011. #ifdef CASE_BLIND_FILESYSTEM
  1012. if (strcasecmp(segstart, *cur_except) == 0) {
  1013. #else
  1014. if (strcmp(segstart, *cur_except) == 0) {
  1015. #endif
  1016. --nexcept;
  1017. ++cur_except;
  1018. }
  1019. if (!saveend)
  1020. break;
  1021. *segend = saveend;
  1022. segstart = segend + 1;
  1023. }
  1024. if (nexcept) {
  1025. /* Something you don't know is, something you don't know...
  1026. */
  1027. ap_destroy_sub_req(sub_req);
  1028. continue;
  1029. }
  1030. }
  1031. /*
  1032. * ###: be warned, the _default_ content type is already
  1033. * picked up here! If we failed the subrequest, or don't
  1034. * know what we are serving, then continue.
  1035. */
  1036. if (sub_req->status != HTTP_OK || (!sub_req->content_type)) {
  1037. ap_destroy_sub_req(sub_req);
  1038. continue;
  1039. }
  1040. /* If it's a map file, we use that instead of the map
  1041. * we're building...
  1042. */
  1043. if (((sub_req->content_type) &&
  1044. !strcmp(sub_req->content_type, MAP_FILE_MAGIC_TYPE)) ||
  1045. ((sub_req->handler) &&
  1046. !strcmp(sub_req->handler, "type-map"))) {
  1047. apr_dir_close(dirp);
  1048. neg->avail_vars->nelts = 0;
  1049. if (sub_req->status != HTTP_OK) {
  1050. return sub_req->status;
  1051. }
  1052. return read_type_map(NULL, neg, sub_req);
  1053. }
  1054. /* Have reasonable variant --- gather notes. */
  1055. mime_info.sub_req = sub_req;
  1056. mime_info.file_name = apr_pstrdup(neg->pool, dirent.name);
  1057. if (sub_req->content_encoding) {
  1058. mime_info.content_encoding = sub_req->content_encoding;
  1059. }
  1060. if (sub_req->content_languages) {
  1061. mime_info.content_languages = sub_req->content_languages;
  1062. }
  1063. get_entry(neg->pool, &accept_info, sub_req->content_type);
  1064. set_mime_fields(&mime_info, &accept_info);
  1065. new_var = apr_array_push(neg->avail_vars);
  1066. memcpy(new_var, (void *) &mime_info, sizeof(var_rec));
  1067. neg->count_multiviews_variants++;
  1068. clean_var_rec(&mime_info);
  1069. }
  1070. apr_dir_close(dirp);
  1071. /* We found some file names that matched. None could be served.
  1072. * Rather than fall out to autoindex or some other mapper, this
  1073. * request must die.
  1074. */
  1075. if (anymatch && !neg->avail_vars->nelts) {
  1076. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  1077. "Negotiation: discovered file(s) matching request: %s"
  1078. " (None could be negotiated).",
  1079. r->filename);
  1080. return HTTP_NOT_FOUND;
  1081. }
  1082. set_vlist_validator(r, r);
  1083. /* Sort the variants into a canonical order. The negotiation
  1084. * result sometimes depends on the order of the variants. By
  1085. * sorting the variants into a canonical order, rather than using
  1086. * the order in which readdir() happens to return them, we ensure
  1087. * that the negotiation result will be consistent over filesystem
  1088. * backup/restores and over all mirror sites.
  1089. */
  1090. qsort((void *) neg->avail_vars->elts, neg->avail_vars->nelts,
  1091. sizeof(var_rec), (int (*)(const void *, const void *)) variantsortf);
  1092. return OK;
  1093. }
  1094. /*****************************************************************
  1095. * And now for the code you've been waiting for... actually
  1096. * finding a match to the client's requirements.
  1097. */
  1098. /* Matching MIME types ... the star/star and foo/star commenting conventions
  1099. * are implemented here. (You know what I mean by star/star, but just
  1100. * try mentioning those three characters in a C comment). Using strcmp()
  1101. * is legit, because everything has already been smashed to lowercase.
  1102. *
  1103. * Note also that if we get an exact match on the media type, we update
  1104. * level_matched for use in level_cmp below...
  1105. *
  1106. * We also give a value for mime_stars, which is used later. It should
  1107. * be 1 for star/star, 2 for type/star and 3 for type/subtype.
  1108. */
  1109. static int mime_match(accept_rec *accept_r, var_rec *avail)
  1110. {
  1111. const char *accept_type = accept_r->name;
  1112. const char *avail_type = avail->mime_type;
  1113. int len = strlen(accept_type);
  1114. if (accept_type[0] == '*') { /* Anything matches star/star */
  1115. if (avail->mime_stars < 1) {
  1116. avail->mime_stars = 1;
  1117. }
  1118. return 1;
  1119. }
  1120. else if ((accept_type[len - 1] == '*') &&
  1121. !strncmp(accept_type, avail_type, len - 2)) {
  1122. if (avail->mime_stars < 2) {
  1123. avail->mime_stars = 2;
  1124. }
  1125. return 1;
  1126. }
  1127. else if (!strcmp(accept_type, avail_type)
  1128. || (!strcmp(accept_type, "text/html")
  1129. && (!strcmp(avail_type, INCLUDES_MAGIC_TYPE)
  1130. || !strcmp(avail_type, INCLUDES_MAGIC_TYPE3)))) {
  1131. if (accept_r->level >= avail->level) {
  1132. avail->level_matched = avail->level;
  1133. avail->mime_stars = 3;
  1134. return 1;
  1135. }
  1136. }
  1137. return OK;
  1138. }
  1139. /* This code implements a piece of the tie-breaking algorithm between
  1140. * variants of equal quality. This piece is the treatment of variants
  1141. * of the same base media type, but different levels. What we want to
  1142. * return is the variant at the highest level that the client explicitly
  1143. * claimed to accept.
  1144. *
  1145. * If all the variants available are at a higher level than that, or if
  1146. * the client didn't say anything specific about this media type at all
  1147. * and these variants just got in on a wildcard, we prefer the lowest
  1148. * level, on grounds that that's the one that the client is least likely
  1149. * to choke on.
  1150. *
  1151. * (This is all motivated by treatment of levels in HTML --- we only
  1152. * want to give level 3 to browsers that explicitly ask for it; browsers
  1153. * that don't, including HTTP/0.9 browsers that only get the implicit
  1154. * "Accept: * / *" [space added to avoid confusing cpp --- no, that
  1155. * syntax doesn't really work] should get HTML2 if available).
  1156. *
  1157. * (Note that this code only comes into play when we are choosing among
  1158. * variants of equal quality, where the draft standard gives us a fair
  1159. * bit of leeway about what to do. It ain't specified by the standard;
  1160. * rather, it is a choice made by this server about what to do in cases
  1161. * where the standard does not specify a unique course of action).
  1162. */
  1163. static int level_cmp(var_rec *var1, var_rec *var2)
  1164. {
  1165. /* Levels are only comparable between matching media types */
  1166. if (var1->is_pseudo_html && !var2->is_pseudo_html) {
  1167. return 0;
  1168. }
  1169. if (!var1->is_pseudo_html && strcmp(var1->mime_type, var2->mime_type)) {
  1170. return 0;
  1171. }
  1172. /* The result of the above if statements is that, if we get to
  1173. * here, both variants have the same mime_type or both are
  1174. * pseudo-html.
  1175. */
  1176. /* Take highest level that matched, if either did match. */
  1177. if (var1->level_matched > var2->level_matched) {
  1178. return 1;
  1179. }
  1180. if (var1->level_matched < var2->level_matched) {
  1181. return -1;
  1182. }
  1183. /* Neither matched. Take lowest level, if there's a difference. */
  1184. if (var1->level < var2->level) {
  1185. return 1;
  1186. }
  1187. if (var1->level > var2->level) {
  1188. return -1;
  1189. }
  1190. /* Tied */
  1191. return 0;
  1192. }
  1193. /* Finding languages. The main entry point is set_language_quality()
  1194. * which is called for each variant. It sets two elements in the
  1195. * variant record:
  1196. * language_quality - the 'q' value of the 'best' matching language
  1197. * from Accept-Language: header (HTTP/1.1)
  1198. * lang_index - Non-negotiated language priority, using
  1199. * position of language on the Accept-Language:
  1200. * header, if present, else LanguagePriority
  1201. * directive order.
  1202. *
  1203. * When we do the variant checking for best variant, we use language
  1204. * quality first, and if a tie, language_index next (this only applies
  1205. * when _not_ using the RVSA/1.0 algorithm). If using the RVSA/1.0
  1206. * algorithm, lang_index is never used.
  1207. *
  1208. * set_language_quality() calls find_lang_index() and find_default_index()
  1209. * to set lang_index.
  1210. */
  1211. static int find_lang_index(apr_array_header_t *accept_langs, char *lang)
  1212. {
  1213. const char **alang;
  1214. int i;
  1215. if (!lang || !accept_langs) {
  1216. return -1;
  1217. }
  1218. alang = (const char **) accept_langs->elts;
  1219. for (i = 0; i < accept_langs->nelts; ++i) {
  1220. if (!strncmp(lang, *alang, strlen(*alang))) {
  1221. return i;
  1222. }
  1223. alang += (accept_langs->elt_size / sizeof(char*));
  1224. }
  1225. return -1;
  1226. }
  1227. /* set_default_lang_quality() sets the quality we apply to variants
  1228. * which have no language assigned to them. If none of the variants
  1229. * have a language, we are not negotiating on language, so all are
  1230. * acceptable, and we set the default q value to 1.0. However if
  1231. * some of the variants have languages, we set this default to 0.0001.
  1232. * The value of this default will be applied to all variants with
  1233. * no explicit language -- which will have the effect of making them
  1234. * acceptable, but only if no variants with an explicit language
  1235. * are acceptable. The default q value set here is assigned to variants
  1236. * with no language type in set_language_quality().
  1237. *
  1238. * Note that if using the RVSA/1.0 algorithm, we don't use this
  1239. * fiddle.
  1240. */
  1241. static void set_default_lang_quality(negotiation_state *neg)
  1242. {
  1243. var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
  1244. int j;
  1245. if (!neg->dont_fiddle_headers) {
  1246. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  1247. var_rec *variant = &avail_recs[j];
  1248. if (variant->content_languages &&
  1249. variant->content_languages->nelts) {
  1250. neg->default_lang_quality = 0.0001f;
  1251. return;
  1252. }
  1253. }
  1254. }
  1255. neg->default_lang_quality = 1.0f;
  1256. }
  1257. /* Set the language_quality value in the variant record. Also
  1258. * assigns lang_index for ForceLanguagePriority.
  1259. *
  1260. * To find the language_quality value, we look for the 'q' value
  1261. * of the 'best' matching language on the Accept-Language
  1262. * header. The 'best' match is the language on Accept-Language
  1263. * header which matches the language of this variant either fully,
  1264. * or as far as the prefix marker (-). If two or more languages
  1265. * match, use the longest string from the Accept-Language header
  1266. * (see HTTP/1.1 [14.4])
  1267. *
  1268. * When a variant has multiple languages, we find the 'best'
  1269. * match for each variant language tag as above, then select the
  1270. * one with the highest q value. Because both the accept-header
  1271. * and variant can have multiple languages, we now have a hairy
  1272. * loop-within-a-loop here.
  1273. *
  1274. * If the variant has no language and we have no Accept-Language
  1275. * items, leave the quality at 1.0 and return.
  1276. *
  1277. * If the variant has no language, we use the default as set by
  1278. * set_default_lang_quality() (1.0 if we are not negotiating on
  1279. * language, 0.001 if we are).
  1280. *
  1281. * Following the setting of the language quality, we drop through to
  1282. * set the old 'lang_index'. This is set based on either the order
  1283. * of the languages on the Accept-Language header, or the
  1284. * order on the LanguagePriority directive. This is only used
  1285. * in the negotiation if the language qualities tie.
  1286. */
  1287. static void set_language_quality(negotiation_state *neg, var_rec *variant)
  1288. {
  1289. int forcepriority = neg->conf->forcelangpriority;
  1290. if (forcepriority == FLP_UNDEF) {
  1291. forcepriority = FLP_DEFAULT;
  1292. }
  1293. if (!variant->content_languages || !variant->content_languages->nelts) {
  1294. /* This variant has no content-language, so use the default
  1295. * quality factor for variants with no content-language
  1296. * (previously set by set_default_lang_quality()).
  1297. * Leave the factor alone (it remains at 1.0) when we may not fiddle
  1298. * with the headers.
  1299. */
  1300. if (!neg->dont_fiddle_headers) {
  1301. variant->lang_quality = neg->default_lang_quality;
  1302. }
  1303. if (!neg->accept_langs) {
  1304. return; /* no accept-language header */
  1305. }
  1306. return;
  1307. }
  1308. else {
  1309. /* Variant has one (or more) languages. Look for the best
  1310. * match. We do this by going through each language on the
  1311. * variant description looking for a match on the
  1312. * Accept-Language header. The best match is the longest
  1313. * matching language on the header. The final result is the
  1314. * best q value from all the languages on the variant
  1315. * description.
  1316. */
  1317. if (!neg->accept_langs) {
  1318. /* no accept-language header makes the variant indefinite */
  1319. variant->definite = 0;
  1320. }
  1321. else { /* There is an accept-language with 0 or more items */
  1322. accept_rec *accs = (accept_rec *) neg->accept_langs->elts;
  1323. accept_rec *best = NULL, *star = NULL;
  1324. accept_rec *bestthistag;
  1325. char *lang, *p;
  1326. float fiddle_q = 0.0f;
  1327. int any_match_on_star = 0;
  1328. int i, j;
  1329. apr_size_t alen, longest_lang_range_len;
  1330. for (j = 0; j < variant->content_languages->nelts; ++j) {
  1331. p = NULL;
  1332. bestthistag = NULL;
  1333. longest_lang_range_len = 0;
  1334. alen = 0;
  1335. /* lang is the variant's language-tag, which is the one
  1336. * we are allowed to use the prefix of in HTTP/1.1
  1337. */
  1338. lang = ((char **) (variant->content_languages->elts))[j];
  1339. /* now find the best (i.e. longest) matching
  1340. * Accept-Language header language. We put the best match
  1341. * for this tag in bestthistag. We cannot update the
  1342. * overall best (based on q value) because the best match
  1343. * for this tag is the longest language item on the accept
  1344. * header, not necessarily the highest q.
  1345. */
  1346. for (i = 0; i < neg->accept_langs->nelts; ++i) {
  1347. if (!strcmp(accs[i].name, "*")) {
  1348. if (!star) {
  1349. star = &accs[i];
  1350. }
  1351. continue;
  1352. }
  1353. /* Find language. We match if either the variant
  1354. * language tag exactly matches the language range
  1355. * from the accept header, or a prefix of the variant
  1356. * language tag up to a '-' character matches the
  1357. * whole of the language range in the Accept-Language
  1358. * header. Note that HTTP/1.x allows any number of
  1359. * '-' characters in a tag or range, currently only
  1360. * tags with zero or one '-' characters are defined
  1361. * for general use (see rfc1766).
  1362. *
  1363. * We only use language range in the Accept-Language
  1364. * header the best match for the variant language tag
  1365. * if it is longer than the previous best match.
  1366. */
  1367. alen = strlen(accs[i].name);
  1368. if ((strlen(lang) >= alen) &&
  1369. !strncmp(lang, accs[i].name, alen) &&
  1370. ((lang[alen] == 0) || (lang[alen] == '-')) ) {
  1371. if (alen > longest_lang_range_len) {
  1372. longest_lang_range_len = alen;
  1373. bestthistag = &accs[i];
  1374. }
  1375. }
  1376. if (!bestthistag && !neg->dont_fiddle_headers) {
  1377. /* The next bit is a fiddle. Some browsers might
  1378. * be configured to send more specific language
  1379. * ranges than desirable. For example, an
  1380. * Accept-Language of en-US should never match
  1381. * variants with languages en or en-GB. But US
  1382. * English speakers might pick en-US as their
  1383. * language choice. So this fiddle checks if the
  1384. * language range has a prefix, and if so, it
  1385. * matches variants which match that prefix with a
  1386. * priority of 0.001. So a request for en-US would
  1387. * match variants of types en and en-GB, but at
  1388. * much lower priority than matches of en-US
  1389. * directly, or of any other language listed on
  1390. * the Accept-Language header. Note that this
  1391. * fiddle does not handle multi-level prefixes.
  1392. */
  1393. if ((p = strchr(accs[i].name, '-'))) {
  1394. int plen = p - accs[i].name;
  1395. if (!strncmp(lang, accs[i].name, plen)) {
  1396. fiddle_q = 0.001f;
  1397. }
  1398. }
  1399. }
  1400. }
  1401. /* Finished looking at Accept-Language headers, the best
  1402. * (longest) match is in bestthistag, or NULL if no match
  1403. */
  1404. if (!best ||
  1405. (bestthistag && bestthistag->quality > best->quality)) {
  1406. best = bestthistag;
  1407. }
  1408. /* See if the tag matches on a * in the Accept-Language
  1409. * header. If so, record this fact for later use
  1410. */
  1411. if (!bestthistag && star) {
  1412. any_match_on_star = 1;
  1413. }
  1414. }
  1415. /* If one of the language tags of the variant matched on *, we
  1416. * need to see if its q is better than that of any non-* match
  1417. * on any other tag of the variant. If so the * match takes
  1418. * precedence and the overall match is not definite.
  1419. */
  1420. if ( any_match_on_star &&
  1421. ((best && star->quality > best->quality) ||
  1422. (!best)) ) {
  1423. best = star;
  1424. variant->definite = 0;
  1425. }
  1426. variant->lang_quality = best ? best->quality : fiddle_q;
  1427. }
  1428. }
  1429. /* Handle the ForceDefaultLanguage overrides, based on the best match
  1430. * to LanguagePriority order. The best match is the lowest index of
  1431. * any LanguagePriority match.
  1432. */
  1433. if (((forcepriority & FLP_PREFER)
  1434. && (variant->lang_index < 0))
  1435. || ((forcepriority & FLP_FALLBACK)
  1436. && !variant->lang_quality))
  1437. {
  1438. int bestidx = -1;
  1439. int j;
  1440. for (j = 0; j < variant->content_languages->nelts; ++j)
  1441. {
  1442. /* lang is the variant's language-tag, which is the one
  1443. * we are allowed to use the prefix of in HTTP/1.1
  1444. */
  1445. char *lang = ((char **) (variant->content_languages->elts))[j];
  1446. int idx = -1;
  1447. /* If we wish to fallback or
  1448. * we use our own LanguagePriority index.
  1449. */
  1450. idx = find_lang_index(neg->conf->language_priority, lang);
  1451. if ((idx >= 0) && ((bestidx == -1) || (idx < bestidx))) {
  1452. bestidx = idx;
  1453. }
  1454. }
  1455. if (bestidx >= 0) {
  1456. if (variant->lang_quality) {
  1457. if (forcepriority & FLP_PREFER) {
  1458. variant->lang_index = bestidx;
  1459. }
  1460. }
  1461. else {
  1462. if (forcepriority & FLP_FALLBACK) {
  1463. variant->lang_index = bestidx;
  1464. variant->lang_quality = .0001f;
  1465. variant->definite = 0;
  1466. }
  1467. }
  1468. }
  1469. }
  1470. return;
  1471. }
  1472. /* Determining the content length --- if the map didn't tell us,
  1473. * we have to do a stat() and remember for next time.
  1474. */
  1475. static apr_off_t find_content_length(negotiation_state *neg, var_rec *variant)
  1476. {
  1477. apr_finfo_t statb;
  1478. if (variant->bytes < 0) {
  1479. if ( variant->sub_req
  1480. && (variant->sub_req->finfo.valid & APR_FINFO_SIZE)) {
  1481. variant->bytes = variant->sub_req->finfo.size;
  1482. }
  1483. else {
  1484. char *fullname = ap_make_full_path(neg->pool, neg->dir_name,
  1485. variant->file_name);
  1486. if (apr_stat(&statb, fullname,
  1487. APR_FINFO_SIZE, neg->pool) == APR_SUCCESS) {
  1488. variant->bytes = statb.size;
  1489. }
  1490. }
  1491. }
  1492. return variant->bytes;
  1493. }
  1494. /* For a given variant, find the best matching Accept: header
  1495. * and assign the Accept: header's quality value to the
  1496. * mime_type_quality field of the variant, for later use in
  1497. * determining the best matching variant.
  1498. */
  1499. static void set_accept_quality(negotiation_state *neg, var_rec *variant)
  1500. {
  1501. int i;
  1502. accept_rec *accept_recs;
  1503. float q = 0.0f;
  1504. int q_definite = 1;
  1505. /* if no Accept: header, leave quality alone (will
  1506. * remain at the default value of 1)
  1507. *
  1508. * XXX: This if is currently never true because of the effect of
  1509. * maybe_add_default_accepts().
  1510. */
  1511. if (!neg->accepts) {
  1512. if (variant->mime_type && *variant->mime_type)
  1513. variant->definite = 0;
  1514. return;
  1515. }
  1516. accept_recs = (accept_rec *) neg->accepts->elts;
  1517. /*
  1518. * Go through each of the ranges on the Accept: header,
  1519. * looking for the 'best' match with this variant's
  1520. * content-type. We use the best match's quality
  1521. * value (from the Accept: header) for this variant's
  1522. * mime_type_quality field.
  1523. *
  1524. * The best match is determined like this:
  1525. * type/type is better than type/ * is better than * / *
  1526. * if match is type/type, use the level mime param if available
  1527. */
  1528. for (i = 0; i < neg->accepts->nelts; ++i) {
  1529. accept_rec *type = &accept_recs[i];
  1530. int prev_mime_stars;
  1531. prev_mime_stars = variant->mime_stars;
  1532. if (!mime_match(type, variant)) {
  1533. continue; /* didn't match the content type at all */
  1534. }
  1535. else {
  1536. /* did match - see if there were less or more stars than
  1537. * in previous match
  1538. */
  1539. if (prev_mime_stars == variant->mime_stars) {
  1540. continue; /* more stars => not as good a match */
  1541. }
  1542. }
  1543. /* If we are allowed to mess with the q-values
  1544. * and have no explicit q= parameters in the accept header,
  1545. * make wildcards very low, so we have a low chance
  1546. * of ending up with them if there's something better.
  1547. */
  1548. if (!neg->dont_fiddle_headers && !neg->accept_q &&
  1549. variant->mime_stars == 1) {
  1550. q = 0.01f;
  1551. }
  1552. else if (!neg->dont_fiddle_headers && !neg->accept_q &&
  1553. variant->mime_stars == 2) {
  1554. q = 0.02f;
  1555. }
  1556. else {
  1557. q = type->quality;
  1558. }
  1559. q_definite = (variant->mime_stars == 3);
  1560. }
  1561. variant->mime_type_quality = q;
  1562. variant->definite = variant->definite && q_definite;
  1563. }
  1564. /* For a given variant, find the 'q' value of the charset given
  1565. * on the Accept-Charset line. If no charsets are listed,
  1566. * assume value of '1'.
  1567. */
  1568. static void set_charset_quality(negotiation_state *neg, var_rec *variant)
  1569. {
  1570. int i;
  1571. accept_rec *accept_recs;
  1572. const char *charset = variant->content_charset;
  1573. accept_rec *star = NULL;
  1574. /* if no Accept-Charset: header, leave quality alone (will
  1575. * remain at the default value of 1)
  1576. */
  1577. if (!neg->accept_charsets) {
  1578. if (charset && *charset)
  1579. variant->definite = 0;
  1580. return;
  1581. }
  1582. accept_recs = (accept_rec *) neg->accept_charsets->elts;
  1583. if (charset == NULL || !*charset) {
  1584. /* Charset of variant not known */
  1585. /* if not a text / * type, leave quality alone */
  1586. if (!(!strncmp(variant->mime_type, "text/", 5)
  1587. || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE)
  1588. || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE3)
  1589. ))
  1590. return;
  1591. /* Don't go guessing if we are in strict header mode,
  1592. * e.g. when running the rvsa, as any guess won't be reflected
  1593. * in the variant list or content-location headers.
  1594. */
  1595. if (neg->dont_fiddle_headers)
  1596. return;
  1597. charset = "iso-8859-1"; /* The default charset for HTTP text types */
  1598. }
  1599. /*
  1600. * Go through each of the items on the Accept-Charset header,
  1601. * looking for a match with this variant's charset. If none
  1602. * match, charset is unacceptable, so set quality to 0.
  1603. */
  1604. for (i = 0; i < neg->accept_charsets->nelts; ++i) {
  1605. accept_rec *type = &accept_recs[i];
  1606. if (!strcmp(type->name, charset)) {
  1607. variant->charset_quality = type->quality;
  1608. return;
  1609. }
  1610. else if (strcmp(type->name, "*") == 0) {
  1611. star = type;
  1612. }
  1613. }
  1614. /* No explicit match */
  1615. if (star) {
  1616. variant->charset_quality = star->quality;
  1617. variant->definite = 0;
  1618. return;
  1619. }
  1620. /* If this variant is in charset iso-8859-1, the default is 1.0 */
  1621. if (strcmp(charset, "iso-8859-1") == 0) {
  1622. variant->charset_quality = 1.0f;
  1623. }
  1624. else {
  1625. variant->charset_quality = 0.0f;
  1626. }
  1627. }
  1628. /* is_identity_encoding is included for back-compat, but does anyone
  1629. * use 7bit, 8bin or binary in their var files??
  1630. */
  1631. static int is_identity_encoding(const char *enc)
  1632. {
  1633. return (!enc || !enc[0] || !strcmp(enc, "7bit") || !strcmp(enc, "8bit")
  1634. || !strcmp(enc, "binary"));
  1635. }
  1636. /*
  1637. * set_encoding_quality determines whether the encoding for a particular
  1638. * variant is acceptable for the user-agent.
  1639. *
  1640. * The rules for encoding are that if the user-agent does not supply
  1641. * any Accept-Encoding header, then all encodings are allowed but a
  1642. * variant with no encoding should be preferred.
  1643. * If there is an empty Accept-Encoding header, then no encodings are
  1644. * acceptable. If there is a non-empty Accept-Encoding header, then
  1645. * any of the listed encodings are acceptable, as well as no encoding
  1646. * unless the "identity" encoding is specifically excluded.
  1647. */
  1648. static void set_encoding_quality(negotiation_state *neg, var_rec *variant)
  1649. {
  1650. accept_rec *accept_recs;
  1651. const char *enc = variant->content_encoding;
  1652. accept_rec *star = NULL;
  1653. float value_if_not_found = 0.0f;
  1654. int i;
  1655. if (!neg->accept_encodings) {
  1656. /* We had no Accept-Encoding header, assume that all
  1657. * encodings are acceptable with a low quality,
  1658. * but we prefer no encoding if available.
  1659. */
  1660. if (!enc || is_identity_encoding(enc))
  1661. variant->encoding_quality = 1.0f;
  1662. else
  1663. variant->encoding_quality = 0.5f;
  1664. return;
  1665. }
  1666. if (!enc || is_identity_encoding(enc)) {
  1667. enc = "identity";
  1668. value_if_not_found = 0.0001f;
  1669. }
  1670. accept_recs = (accept_rec *) neg->accept_encodings->elts;
  1671. /* Go through each of the encodings on the Accept-Encoding: header,
  1672. * looking for a match with our encoding. x- prefixes are ignored.
  1673. */
  1674. if (enc[0] == 'x' && enc[1] == '-') {
  1675. enc += 2;
  1676. }
  1677. for (i = 0; i < neg->accept_encodings->nelts; ++i) {
  1678. char *name = accept_recs[i].name;
  1679. if (name[0] == 'x' && name[1] == '-') {
  1680. name += 2;
  1681. }
  1682. if (!strcmp(name, enc)) {
  1683. variant->encoding_quality = accept_recs[i].quality;
  1684. return;
  1685. }
  1686. if (strcmp(name, "*") == 0) {
  1687. star = &accept_recs[i];
  1688. }
  1689. }
  1690. /* No explicit match */
  1691. if (star) {
  1692. variant->encoding_quality = star->quality;
  1693. return;
  1694. }
  1695. /* Encoding not found on Accept-Encoding: header, so it is
  1696. * _not_ acceptable unless it is the identity (no encoding)
  1697. */
  1698. variant->encoding_quality = value_if_not_found;
  1699. }
  1700. /*************************************************************
  1701. * Possible results of the variant selection algorithm
  1702. */
  1703. enum algorithm_results {
  1704. alg_choice = 1, /* choose variant */
  1705. alg_list /* list variants */
  1706. };
  1707. /* Below is the 'best_match' function. It returns an int, which has
  1708. * one of the two values alg_choice or alg_list, which give the result
  1709. * of the variant selection algorithm. alg_list means that no best
  1710. * variant was found by the algorithm, alg_choice means that a best
  1711. * variant was found and should be returned. The list/choice
  1712. * terminology comes from TCN (rfc2295), but is used in a more generic
  1713. * way here. The best variant is returned in *pbest. best_match has
  1714. * two possible algorithms for determining the best variant: the
  1715. * RVSA/1.0 algorithm (from RFC2296), and the standard Apache
  1716. * algorithm. These are split out into separate functions
  1717. * (is_variant_better_rvsa() and is_variant_better()). Selection of
  1718. * one is through the neg->use_rvsa flag.
  1719. *
  1720. * The call to best_match also creates full information, including
  1721. * language, charset, etc quality for _every_ variant. This is needed
  1722. * for generating a correct Vary header, and can be used for the
  1723. * Alternates header, the human-readable list responses and 406 errors.
  1724. */
  1725. /* Firstly, the RVSA/1.0 (HTTP Remote Variant Selection Algorithm
  1726. * v1.0) from rfc2296. This is the algorithm that goes together with
  1727. * transparent content negotiation (TCN).
  1728. */
  1729. static int is_variant_better_rvsa(negotiation_state *neg, var_rec *variant,
  1730. var_rec *best, float *p_bestq)
  1731. {
  1732. float bestq = *p_bestq, q;
  1733. /* TCN does not cover negotiation on content-encoding. For now,
  1734. * we ignore the encoding unless it was explicitly excluded.
  1735. */
  1736. if (variant->encoding_quality == 0.0f)
  1737. return 0;
  1738. q = variant->mime_type_quality *
  1739. variant->source_quality *
  1740. variant->charset_quality *
  1741. variant->lang_quality;
  1742. /* RFC 2296 calls for the result to be rounded to 5 decimal places,
  1743. * but we don't do that because it serves no useful purpose other
  1744. * than to ensure that a remote algorithm operates on the same
  1745. * precision as ours. That is silly, since what we obviously want
  1746. * is for the algorithm to operate on the best available precision
  1747. * regardless of who runs it. Since the above calculation may
  1748. * result in significant variance at 1e-12, rounding would be bogus.
  1749. */
  1750. #ifdef NEG_DEBUG
  1751. ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL,
  1752. "Variant: file=%s type=%s lang=%s sourceq=%1.3f "
  1753. "mimeq=%1.3f langq=%1.3f charq=%1.3f encq=%1.3f "
  1754. "q=%1.5f definite=%d",
  1755. (variant->file_name ? variant->file_name : ""),
  1756. (variant->mime_type ? variant->mime_type : ""),
  1757. (variant->content_languages
  1758. ? apr_array_pstrcat(neg->pool, variant->content_languages, ',')
  1759. : ""),
  1760. variant->source_quality,
  1761. variant->mime_type_quality,
  1762. variant->lang_quality,
  1763. variant->charset_quality,
  1764. variant->encoding_quality,
  1765. q,
  1766. variant->definite);
  1767. #endif
  1768. if (q <= 0.0f) {
  1769. return 0;
  1770. }
  1771. if (q > bestq) {
  1772. *p_bestq = q;
  1773. return 1;
  1774. }
  1775. if (q == bestq) {
  1776. /* If the best variant's encoding is of lesser quality than
  1777. * this variant, then we prefer this variant
  1778. */
  1779. if (variant->encoding_quality > best->encoding_quality) {
  1780. *p_bestq = q;
  1781. return 1;
  1782. }
  1783. }
  1784. return 0;
  1785. }
  1786. /* Negotiation algorithm as used by previous versions of Apache
  1787. * (just about).
  1788. */
  1789. static int is_variant_better(negotiation_state *neg, var_rec *variant,
  1790. var_rec *best, float *p_bestq)
  1791. {
  1792. float bestq = *p_bestq, q;
  1793. int levcmp;
  1794. /* For non-transparent negotiation, server can choose how
  1795. * to handle the negotiation. We'll use the following in
  1796. * order: content-type, language, content-type level, charset,
  1797. * content encoding, content length.
  1798. *
  1799. * For each check, we have three possible outcomes:
  1800. * This variant is worse than current best: return 0
  1801. * This variant is better than the current best:
  1802. * assign this variant's q to *p_bestq, and return 1
  1803. * This variant is just as desirable as the current best:
  1804. * drop through to the next test.
  1805. *
  1806. * This code is written in this long-winded way to allow future
  1807. * customisation, either by the addition of additional
  1808. * checks, or to allow the order of the checks to be determined
  1809. * by configuration options (e.g. we might prefer to check
  1810. * language quality _before_ content type).
  1811. */
  1812. /* First though, eliminate this variant if it is not
  1813. * acceptable by type, charset, encoding or language.
  1814. */
  1815. #ifdef NEG_DEBUG
  1816. ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL,
  1817. "Variant: file=%s type=%s lang=%s sourceq=%1.3f "
  1818. "mimeq=%1.3f langq=%1.3f langidx=%d charq=%1.3f encq=%1.3f ",
  1819. (variant->file_name ? variant->file_name : ""),
  1820. (variant->mime_type ? variant->mime_type : ""),
  1821. (variant->content_languages
  1822. ? apr_array_pstrcat(neg->pool, variant->content_languages, ',')
  1823. : ""),
  1824. variant->source_quality,
  1825. variant->mime_type_quality,
  1826. variant->lang_quality,
  1827. variant->lang_index,
  1828. variant->charset_quality,
  1829. variant->encoding_quality);
  1830. #endif
  1831. if (variant->encoding_quality == 0.0f ||
  1832. variant->lang_quality == 0.0f ||
  1833. variant->source_quality == 0.0f ||
  1834. variant->charset_quality == 0.0f ||
  1835. variant->mime_type_quality == 0.0f) {
  1836. return 0; /* don't consider unacceptables */
  1837. }
  1838. q = variant->mime_type_quality * variant->source_quality;
  1839. if (q == 0.0 || q < bestq) {
  1840. return 0;
  1841. }
  1842. if (q > bestq || !best) {
  1843. *p_bestq = q;
  1844. return 1;
  1845. }
  1846. /* language */
  1847. if (variant->lang_quality < best->lang_quality) {
  1848. return 0;
  1849. }
  1850. if (variant->lang_quality > best->lang_quality) {
  1851. *p_bestq = q;
  1852. return 1;
  1853. }
  1854. /* if language qualities were equal, try the LanguagePriority stuff */
  1855. if (best->lang_index != -1 &&
  1856. (variant->lang_index == -1 || variant->lang_index > best->lang_index)) {
  1857. return 0;
  1858. }
  1859. if (variant->lang_index != -1 &&
  1860. (best->lang_index == -1 || variant->lang_index < best->lang_index)) {
  1861. *p_bestq = q;
  1862. return 1;
  1863. }
  1864. /* content-type level (sometimes used with text/html, though we
  1865. * support it on other types too)
  1866. */
  1867. levcmp = level_cmp(variant, best);
  1868. if (levcmp == -1) {
  1869. return 0;
  1870. }
  1871. if (levcmp == 1) {
  1872. *p_bestq = q;
  1873. return 1;
  1874. }
  1875. /* charset */
  1876. if (variant->charset_quality < best->charset_quality) {
  1877. return 0;
  1878. }
  1879. /* If the best variant's charset is ISO-8859-1 and this variant has
  1880. * the same charset quality, then we prefer this variant
  1881. */
  1882. if (variant->charset_quality > best->charset_quality ||
  1883. ((variant->content_charset != NULL &&
  1884. *variant->content_charset != '\0' &&
  1885. strcmp(variant->content_charset, "iso-8859-1") != 0) &&
  1886. (best->content_charset == NULL ||
  1887. *best->content_charset == '\0' ||
  1888. strcmp(best->content_charset, "iso-8859-1") == 0))) {
  1889. *p_bestq = q;
  1890. return 1;
  1891. }
  1892. /* Prefer the highest value for encoding_quality.
  1893. */
  1894. if (variant->encoding_quality < best->encoding_quality) {
  1895. return 0;
  1896. }
  1897. if (variant->encoding_quality > best->encoding_quality) {
  1898. *p_bestq = q;
  1899. return 1;
  1900. }
  1901. /* content length if all else equal */
  1902. if (find_content_length(neg, variant) >= find_content_length(neg, best)) {
  1903. return 0;
  1904. }
  1905. /* ok, to get here means every thing turned out equal, except
  1906. * we have a shorter content length, so use this variant
  1907. */
  1908. *p_bestq = q;
  1909. return 1;
  1910. }
  1911. /* figure out, whether a variant is in a specific language
  1912. * it returns also false, if the variant has no language.
  1913. */
  1914. static int variant_has_language(var_rec *variant, const char *lang)
  1915. {
  1916. int j, max;
  1917. /* fast exit */
  1918. if ( !lang
  1919. || !variant->content_languages
  1920. || !(max = variant->content_languages->nelts)) {
  1921. return 0;
  1922. }
  1923. for (j = 0; j < max; ++j) {
  1924. if (!strcmp(lang,
  1925. ((char **) (variant->content_languages->elts))[j])) {
  1926. return 1;
  1927. }
  1928. }
  1929. return 0;
  1930. }
  1931. /* check for environment variables 'no-gzip' and
  1932. * 'gzip-only-text/html' to get a behaviour similiar
  1933. * to mod_deflate
  1934. */
  1935. static int discard_variant_by_env(var_rec *variant, int discard)
  1936. {
  1937. if ( is_identity_encoding(variant->content_encoding)
  1938. || !strcmp(variant->content_encoding, "identity")) {
  1939. return 0;
  1940. }
  1941. return ( (discard == DISCARD_ALL_ENCODINGS)
  1942. || (discard == DISCARD_ALL_BUT_HTML
  1943. && (!variant->mime_type
  1944. || strncmp(variant->mime_type, "text/html", 9))));
  1945. }
  1946. static int best_match(negotiation_state *neg, var_rec **pbest)
  1947. {
  1948. int j;
  1949. var_rec *best;
  1950. float bestq = 0.0f;
  1951. enum algorithm_results algorithm_result;
  1952. int may_discard = 0;
  1953. var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
  1954. /* fetch request dependent variables
  1955. * prefer-language: prefer a certain language.
  1956. */
  1957. const char *preferred_language = apr_table_get(neg->r->subprocess_env,
  1958. "prefer-language");
  1959. /* no-gzip: do not send encoded documents */
  1960. if (apr_table_get(neg->r->subprocess_env, "no-gzip")) {
  1961. may_discard = DISCARD_ALL_ENCODINGS;
  1962. }
  1963. /* gzip-only-text/html: send encoded documents only
  1964. * if they are text/html. (no-gzip has a higher priority).
  1965. */
  1966. else {
  1967. const char *env_value = apr_table_get(neg->r->subprocess_env,
  1968. "gzip-only-text/html");
  1969. if (env_value && !strcmp(env_value, "1")) {
  1970. may_discard = DISCARD_ALL_BUT_HTML;
  1971. }
  1972. }
  1973. set_default_lang_quality(neg);
  1974. /*
  1975. * Find the 'best' variant
  1976. * We run the loop possibly twice: if "prefer-language"
  1977. * environment variable is set but we did not find an appropriate
  1978. * best variant. In that case forget the preferred language and
  1979. * negotiate over all variants.
  1980. */
  1981. do {
  1982. best = NULL;
  1983. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  1984. var_rec *variant = &avail_recs[j];
  1985. /* if this variant is encoded somehow and there are special
  1986. * variables set, we do not negotiate it. see above.
  1987. */
  1988. if ( may_discard
  1989. && discard_variant_by_env(variant, may_discard)) {
  1990. continue;
  1991. }
  1992. /* if a language is preferred, but the current variant
  1993. * is not in that language, then drop it for now
  1994. */
  1995. if ( preferred_language
  1996. && !variant_has_language(variant, preferred_language)) {
  1997. continue;
  1998. }
  1999. /* Find all the relevant 'quality' values from the
  2000. * Accept... headers, and store in the variant. This also
  2001. * prepares for sending an Alternates header etc so we need to
  2002. * do it even if we do not actually plan to find a best
  2003. * variant.
  2004. */
  2005. set_accept_quality(neg, variant);
  2006. /* accept the preferred language, even when it's not listed within
  2007. * the Accept-Language header
  2008. */
  2009. if (preferred_language) {
  2010. variant->lang_quality = 1.0f;
  2011. variant->definite = 1;
  2012. }
  2013. else {
  2014. set_language_quality(neg, variant);
  2015. }
  2016. set_encoding_quality(neg, variant);
  2017. set_charset_quality(neg, variant);
  2018. /* Only do variant selection if we may actually choose a
  2019. * variant for the client
  2020. */
  2021. if (neg->may_choose) {
  2022. /* Now find out if this variant is better than the current
  2023. * best, either using the RVSA/1.0 algorithm, or Apache's
  2024. * internal server-driven algorithm. Presumably other
  2025. * server-driven algorithms are possible, and could be
  2026. * implemented here.
  2027. */
  2028. if (neg->use_rvsa) {
  2029. if (is_variant_better_rvsa(neg, variant, best, &bestq)) {
  2030. best = variant;
  2031. }
  2032. }
  2033. else {
  2034. if (is_variant_better(neg, variant, best, &bestq)) {
  2035. best = variant;
  2036. }
  2037. }
  2038. }
  2039. }
  2040. /* We now either have a best variant, or no best variant */
  2041. if (neg->use_rvsa) {
  2042. /* calculate result for RVSA/1.0 algorithm:
  2043. * only a choice response if the best variant has q>0
  2044. * and is definite
  2045. */
  2046. algorithm_result = (best && best->definite) && (bestq > 0) ?
  2047. alg_choice : alg_list;
  2048. }
  2049. else {
  2050. /* calculate result for Apache negotiation algorithm */
  2051. algorithm_result = bestq > 0 ? alg_choice : alg_list;
  2052. }
  2053. /* run the loop again, if the "prefer-language" got no clear result */
  2054. if (preferred_language && (!best || algorithm_result != alg_choice)) {
  2055. preferred_language = NULL;
  2056. continue;
  2057. }
  2058. break;
  2059. } while (1);
  2060. /* Returning a choice response with a non-neighboring variant is a
  2061. * protocol security error in TCN (see rfc2295). We do *not*
  2062. * verify here that the variant and URI are neighbors, even though
  2063. * we may return alg_choice. We depend on the environment (the
  2064. * caller) to only declare the resource transparently negotiable if
  2065. * all variants are neighbors.
  2066. */
  2067. *pbest = best;
  2068. return algorithm_result;
  2069. }
  2070. /* Sets response headers for a negotiated response.
  2071. * neg->is_transparent determines whether a transparently negotiated
  2072. * response or a plain `server driven negotiation' response is
  2073. * created. Applicable headers are Alternates, Vary, and TCN.
  2074. *
  2075. * The Vary header we create is sometimes longer than is required for
  2076. * the correct caching of negotiated results by HTTP/1.1 caches. For
  2077. * example if we have 3 variants x.html, x.ps.en and x.ps.nl, and if
  2078. * the Accept: header assigns a 0 quality to .ps, then the results of
  2079. * the two server-side negotiation algorithms we currently implement
  2080. * will never depend on Accept-Language so we could return `Vary:
  2081. * negotiate, accept' instead of the longer 'Vary: negotiate, accept,
  2082. * accept-language' which the code below will return. A routine for
  2083. * computing the exact minimal Vary header would be a huge pain to code
  2084. * and maintain though, especially because we need to take all possible
  2085. * twiddles in the server-side negotiation algorithms into account.
  2086. */
  2087. static void set_neg_headers(request_rec *r, negotiation_state *neg,
  2088. int alg_result)
  2089. {
  2090. apr_table_t *hdrs;
  2091. var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
  2092. const char *sample_type = NULL;
  2093. const char *sample_language = NULL;
  2094. const char *sample_encoding = NULL;
  2095. const char *sample_charset = NULL;
  2096. char *lang;
  2097. char *qstr;
  2098. apr_off_t len;
  2099. apr_array_header_t *arr;
  2100. int max_vlist_array = (neg->avail_vars->nelts * 21);
  2101. int first_variant = 1;
  2102. int vary_by_type = 0;
  2103. int vary_by_language = 0;
  2104. int vary_by_charset = 0;
  2105. int vary_by_encoding = 0;
  2106. int j;
  2107. /* In order to avoid O(n^2) memory copies in building Alternates,
  2108. * we preallocate a apr_table_t with the maximum substrings possible,
  2109. * fill it with the variant list, and then concatenate the entire array.
  2110. * Note that if you change the number of substrings pushed, you also
  2111. * need to change the calculation of max_vlist_array above.
  2112. */
  2113. if (neg->send_alternates && neg->avail_vars->nelts)
  2114. arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *));
  2115. else
  2116. arr = NULL;
  2117. /* Put headers into err_headers_out, since send_http_header()
  2118. * outputs both headers_out and err_headers_out.
  2119. */
  2120. hdrs = r->err_headers_out;
  2121. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  2122. var_rec *variant = &avail_recs[j];
  2123. if (variant->content_languages && variant->content_languages->nelts) {
  2124. lang = apr_array_pstrcat(r->pool, variant->content_languages, ',');
  2125. }
  2126. else {
  2127. lang = NULL;
  2128. }
  2129. /* Calculate Vary by looking for any difference between variants */
  2130. if (first_variant) {
  2131. sample_type = variant->mime_type;
  2132. sample_charset = variant->content_charset;
  2133. sample_language = lang;
  2134. sample_encoding = variant->content_encoding;
  2135. }
  2136. else {
  2137. if (!vary_by_type &&
  2138. strcmp(sample_type ? sample_type : "",
  2139. variant->mime_type ? variant->mime_type : "")) {
  2140. vary_by_type = 1;
  2141. }
  2142. if (!vary_by_charset &&
  2143. strcmp(sample_charset ? sample_charset : "",
  2144. variant->content_charset ?
  2145. variant->content_charset : "")) {
  2146. vary_by_charset = 1;
  2147. }
  2148. if (!vary_by_language &&
  2149. strcmp(sample_language ? sample_language : "",
  2150. lang ? lang : "")) {
  2151. vary_by_language = 1;
  2152. }
  2153. if (!vary_by_encoding &&
  2154. strcmp(sample_encoding ? sample_encoding : "",
  2155. variant->content_encoding ?
  2156. variant->content_encoding : "")) {
  2157. vary_by_encoding = 1;
  2158. }
  2159. }
  2160. first_variant = 0;
  2161. if (!neg->send_alternates)
  2162. continue;
  2163. /* Generate the string components for this Alternates entry */
  2164. *((const char **) apr_array_push(arr)) = "{\"";
  2165. *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, variant->file_name);
  2166. *((const char **) apr_array_push(arr)) = "\" ";
  2167. qstr = (char *) apr_palloc(r->pool, 6);
  2168. apr_snprintf(qstr, 6, "%1.3f", variant->source_quality);
  2169. /* Strip trailing zeros (saves those valuable network bytes) */
  2170. if (qstr[4] == '0') {
  2171. qstr[4] = '\0';
  2172. if (qstr[3] == '0') {
  2173. qstr[3] = '\0';
  2174. if (qstr[2] == '0') {
  2175. qstr[1] = '\0';
  2176. }
  2177. }
  2178. }
  2179. *((const char **) apr_array_push(arr)) = qstr;
  2180. if (variant->mime_type && *variant->mime_type) {
  2181. *((const char **) apr_array_push(arr)) = " {type ";
  2182. *((const char **) apr_array_push(arr)) = variant->mime_type;
  2183. *((const char **) apr_array_push(arr)) = "}";
  2184. }
  2185. if (variant->content_charset && *variant->content_charset) {
  2186. *((const char **) apr_array_push(arr)) = " {charset ";
  2187. *((const char **) apr_array_push(arr)) = variant->content_charset;
  2188. *((const char **) apr_array_push(arr)) = "}";
  2189. }
  2190. if (lang) {
  2191. *((const char **) apr_array_push(arr)) = " {language ";
  2192. *((const char **) apr_array_push(arr)) = lang;
  2193. *((const char **) apr_array_push(arr)) = "}";
  2194. }
  2195. if (variant->content_encoding && *variant->content_encoding) {
  2196. /* Strictly speaking, this is non-standard, but so is TCN */
  2197. *((const char **) apr_array_push(arr)) = " {encoding ";
  2198. *((const char **) apr_array_push(arr)) = variant->content_encoding;
  2199. *((const char **) apr_array_push(arr)) = "}";
  2200. }
  2201. /* Note that the Alternates specification (in rfc2295) does
  2202. * not require that we include {length x}, so we could omit it
  2203. * if determining the length is too expensive. We currently
  2204. * always include it though.
  2205. *
  2206. * If the variant is a CGI script, find_content_length would
  2207. * return the length of the script, not the output it
  2208. * produces, so we check for the presence of a handler and if
  2209. * there is one we don't add a length.
  2210. *
  2211. * XXX: TODO: This check does not detect a CGI script if we
  2212. * get the variant from a type map. This needs to be fixed
  2213. * (without breaking things if the type map specifies a
  2214. * content-length, which currently leads to the correct result).
  2215. */
  2216. if (!(variant->sub_req && variant->sub_req->handler)
  2217. && (len = find_content_length(neg, variant)) >= 0) {
  2218. *((const char **) apr_array_push(arr)) = " {length ";
  2219. *((const char **) apr_array_push(arr)) = apr_off_t_toa(r->pool,
  2220. len);
  2221. *((const char **) apr_array_push(arr)) = "}";
  2222. }
  2223. *((const char **) apr_array_push(arr)) = "}";
  2224. *((const char **) apr_array_push(arr)) = ", "; /* trimmed below */
  2225. }
  2226. if (neg->send_alternates && neg->avail_vars->nelts) {
  2227. arr->nelts--; /* remove last comma */
  2228. apr_table_mergen(hdrs, "Alternates",
  2229. apr_array_pstrcat(r->pool, arr, '\0'));
  2230. }
  2231. if (neg->is_transparent || vary_by_type || vary_by_language ||
  2232. vary_by_language || vary_by_charset || vary_by_encoding) {
  2233. apr_table_mergen(hdrs, "Vary", 2 + apr_pstrcat(r->pool,
  2234. neg->is_transparent ? ", negotiate" : "",
  2235. vary_by_type ? ", accept" : "",
  2236. vary_by_language ? ", accept-language" : "",
  2237. vary_by_charset ? ", accept-charset" : "",
  2238. vary_by_encoding ? ", accept-encoding" : "", NULL));
  2239. }
  2240. if (neg->is_transparent) { /* Create TCN response header */
  2241. apr_table_setn(hdrs, "TCN",
  2242. alg_result == alg_list ? "list" : "choice");
  2243. }
  2244. }
  2245. /**********************************************************************
  2246. *
  2247. * Return an HTML list of variants. This is output as part of the
  2248. * choice response or 406 status body.
  2249. */
  2250. static char *make_variant_list(request_rec *r, negotiation_state *neg)
  2251. {
  2252. apr_array_header_t *arr;
  2253. int i;
  2254. int max_vlist_array = (neg->avail_vars->nelts * 15) + 2;
  2255. /* In order to avoid O(n^2) memory copies in building the list,
  2256. * we preallocate a apr_table_t with the maximum substrings possible,
  2257. * fill it with the variant list, and then concatenate the entire array.
  2258. */
  2259. arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *));
  2260. *((const char **) apr_array_push(arr)) = "Available variants:\n<ul>\n";
  2261. for (i = 0; i < neg->avail_vars->nelts; ++i) {
  2262. var_rec *variant = &((var_rec *) neg->avail_vars->elts)[i];
  2263. const char *filename = variant->file_name ? variant->file_name : "";
  2264. apr_array_header_t *languages = variant->content_languages;
  2265. const char *description = variant->description
  2266. ? variant->description
  2267. : "";
  2268. /* The format isn't very neat, and it would be nice to make
  2269. * the tags human readable (eg replace 'language en' with 'English').
  2270. * Note that if you change the number of substrings pushed, you also
  2271. * need to change the calculation of max_vlist_array above.
  2272. */
  2273. *((const char **) apr_array_push(arr)) = "<li><a href=\"";
  2274. *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, filename);
  2275. *((const char **) apr_array_push(arr)) = "\">";
  2276. *((const char **) apr_array_push(arr)) = ap_escape_html(r->pool, filename);
  2277. *((const char **) apr_array_push(arr)) = "</a> ";
  2278. *((const char **) apr_array_push(arr)) = description;
  2279. if (variant->mime_type && *variant->mime_type) {
  2280. *((const char **) apr_array_push(arr)) = ", type ";
  2281. *((const char **) apr_array_push(arr)) = variant->mime_type;
  2282. }
  2283. if (languages && languages->nelts) {
  2284. *((const char **) apr_array_push(arr)) = ", language ";
  2285. *((const char **) apr_array_push(arr)) = apr_array_pstrcat(r->pool,
  2286. languages, ',');
  2287. }
  2288. if (variant->content_charset && *variant->content_charset) {
  2289. *((const char **) apr_array_push(arr)) = ", charset ";
  2290. *((const char **) apr_array_push(arr)) = variant->content_charset;
  2291. }
  2292. if (variant->content_encoding) {
  2293. *((const char **) apr_array_push(arr)) = ", encoding ";
  2294. *((const char **) apr_array_push(arr)) = variant->content_encoding;
  2295. }
  2296. *((const char **) apr_array_push(arr)) = "</li>\n";
  2297. }
  2298. *((const char **) apr_array_push(arr)) = "</ul>\n";
  2299. return apr_array_pstrcat(r->pool, arr, '\0');
  2300. }
  2301. static void store_variant_list(request_rec *r, negotiation_state *neg)
  2302. {
  2303. if (r->main == NULL) {
  2304. apr_table_setn(r->notes, "variant-list", make_variant_list(r, neg));
  2305. }
  2306. else {
  2307. apr_table_setn(r->main->notes, "variant-list",
  2308. make_variant_list(r->main, neg));
  2309. }
  2310. }
  2311. /* Called if we got a "Choice" response from the variant selection algorithm.
  2312. * It checks the result of the chosen variant to see if it
  2313. * is itself negotiated (if so, return error HTTP_VARIANT_ALSO_VARIES).
  2314. * Otherwise, add the appropriate headers to the current response.
  2315. */
  2316. static int setup_choice_response(request_rec *r, negotiation_state *neg,
  2317. var_rec *variant)
  2318. {
  2319. request_rec *sub_req;
  2320. const char *sub_vary;
  2321. if (!variant->sub_req) {
  2322. int status;
  2323. sub_req = ap_sub_req_lookup_file(variant->file_name, r, NULL);
  2324. status = sub_req->status;
  2325. if (status != HTTP_OK &&
  2326. !apr_table_get(sub_req->err_headers_out, "TCN")) {
  2327. ap_destroy_sub_req(sub_req);
  2328. return status;
  2329. }
  2330. variant->sub_req = sub_req;
  2331. }
  2332. else {
  2333. sub_req = variant->sub_req;
  2334. }
  2335. /* The variant selection algorithm told us to return a "Choice"
  2336. * response. This is the normal variant response, with
  2337. * some extra headers. First, ensure that the chosen
  2338. * variant did or will not itself engage in transparent negotiation.
  2339. * If not, set the appropriate headers, and fall through to
  2340. * the normal variant handling
  2341. */
  2342. /* This catches the error that a transparent type map selects a
  2343. * transparent multiviews resource as the best variant.
  2344. *
  2345. * XXX: We do not signal an error if a transparent type map
  2346. * selects a _non_transparent multiviews resource as the best
  2347. * variant, because we can generate a legal negotiation response
  2348. * in this case. In this case, the vlist_validator of the
  2349. * nontransparent subrequest will be lost however. This could
  2350. * lead to cases in which a change in the set of variants or the
  2351. * negotiation algorithm of the nontransparent resource is never
  2352. * propagated up to a HTTP/1.1 cache which interprets Vary. To be
  2353. * completely on the safe side we should return HTTP_VARIANT_ALSO_VARIES
  2354. * for this type of recursive negotiation too.
  2355. */
  2356. if (neg->is_transparent &&
  2357. apr_table_get(sub_req->err_headers_out, "TCN")) {
  2358. return HTTP_VARIANT_ALSO_VARIES;
  2359. }
  2360. /* This catches the error that a transparent type map recursively
  2361. * selects, as the best variant, another type map which itself
  2362. * causes transparent negotiation to be done.
  2363. *
  2364. * XXX: Actually, we catch this error by catching all cases of
  2365. * type map recursion. There are some borderline recursive type
  2366. * map arrangements which would not produce transparent
  2367. * negotiation protocol errors or lack of cache propagation
  2368. * problems, but such arrangements are very hard to detect at this
  2369. * point in the control flow, so we do not bother to single them
  2370. * out.
  2371. *
  2372. * Recursive type maps imply a recursive arrangement of negotiated
  2373. * resources which is visible to outside clients, and this is not
  2374. * supported by the transparent negotiation caching protocols, so
  2375. * if we are to have generic support for recursive type maps, we
  2376. * have to create some configuration setting which makes all type
  2377. * maps non-transparent when recursion is enabled. Also, if we
  2378. * want recursive type map support which ensures propagation of
  2379. * type map changes into HTTP/1.1 caches that handle Vary, we
  2380. * would have to extend the current mechanism for generating
  2381. * variant list validators.
  2382. */
  2383. if (sub_req->handler && strcmp(sub_req->handler, "type-map") == 0) {
  2384. return HTTP_VARIANT_ALSO_VARIES;
  2385. }
  2386. /* This adds an appropriate Variant-Vary header if the subrequest
  2387. * is a multiviews resource.
  2388. *
  2389. * XXX: TODO: Note that this does _not_ handle any Vary header
  2390. * returned by a CGI if sub_req is a CGI script, because we don't
  2391. * see that Vary header yet at this point in the control flow.
  2392. * This won't cause any cache consistency problems _unless_ the
  2393. * CGI script also returns a Cache-Control header marking the
  2394. * response as cachable. This needs to be fixed, also there are
  2395. * problems if a CGI returns an Etag header which also need to be
  2396. * fixed.
  2397. */
  2398. if ((sub_vary = apr_table_get(sub_req->err_headers_out, "Vary")) != NULL) {
  2399. apr_table_setn(r->err_headers_out, "Variant-Vary", sub_vary);
  2400. /* Move the subreq Vary header into the main request to
  2401. * prevent having two Vary headers in the response, which
  2402. * would be legal but strange.
  2403. */
  2404. apr_table_setn(r->err_headers_out, "Vary", sub_vary);
  2405. apr_table_unset(sub_req->err_headers_out, "Vary");
  2406. }
  2407. apr_table_setn(r->err_headers_out, "Content-Location",
  2408. ap_escape_path_segment(r->pool, variant->file_name));
  2409. set_neg_headers(r, neg, alg_choice); /* add Alternates and Vary */
  2410. /* Still to do by caller: add Expires */
  2411. return 0;
  2412. }
  2413. /****************************************************************
  2414. *
  2415. * Executive...
  2416. */
  2417. static int do_negotiation(request_rec *r, negotiation_state *neg,
  2418. var_rec **bestp, int prefer_scripts)
  2419. {
  2420. var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
  2421. int alg_result; /* result of variant selection algorithm */
  2422. int res;
  2423. int j;
  2424. /* Decide if resource is transparently negotiable */
  2425. /* GET or HEAD? (HEAD has same method number as GET) */
  2426. if (r->method_number == M_GET) {
  2427. /* maybe this should be configurable, see also the comment
  2428. * about recursive type maps in setup_choice_response()
  2429. */
  2430. neg->is_transparent = 1;
  2431. /* We can't be transparent if we are a map file in the middle
  2432. * of the request URI.
  2433. */
  2434. if (r->path_info && *r->path_info)
  2435. neg->is_transparent = 0;
  2436. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  2437. var_rec *variant = &avail_recs[j];
  2438. /* We can't be transparent, because of internal
  2439. * assumptions in best_match(), if there is a
  2440. * non-neighboring variant. We can have a non-neighboring
  2441. * variant when processing a type map.
  2442. */
  2443. if (ap_strchr_c(variant->file_name, '/'))
  2444. neg->is_transparent = 0;
  2445. /* We can't be transparent, because of the behavior
  2446. * of variant typemap bodies.
  2447. */
  2448. if (variant->body) {
  2449. neg->is_transparent = 0;
  2450. }
  2451. }
  2452. }
  2453. if (neg->is_transparent) {
  2454. parse_negotiate_header(r, neg);
  2455. }
  2456. else { /* configure negotiation on non-transparent resource */
  2457. neg->may_choose = 1;
  2458. }
  2459. maybe_add_default_accepts(neg, prefer_scripts);
  2460. alg_result = best_match(neg, bestp);
  2461. /* alg_result is one of
  2462. * alg_choice: a best variant is chosen
  2463. * alg_list: no best variant is chosen
  2464. */
  2465. if (alg_result == alg_list) {
  2466. /* send a list response or HTTP_NOT_ACCEPTABLE error response */
  2467. neg->send_alternates = 1; /* always include Alternates header */
  2468. set_neg_headers(r, neg, alg_result);
  2469. store_variant_list(r, neg);
  2470. if (neg->is_transparent && neg->ua_supports_trans) {
  2471. /* XXX todo: expires? cachability? */
  2472. /* Some HTTP/1.0 clients are known to choke when they get
  2473. * a 300 (multiple choices) response without a Location
  2474. * header. However the 300 code response we are are about
  2475. * to generate will only reach 1.0 clients which support
  2476. * transparent negotiation, and they should be OK. The
  2477. * response should never reach older 1.0 clients, even if
  2478. * we have CacheNegotiatedDocs enabled, because no 1.0
  2479. * proxy cache (we know of) will cache and return 300
  2480. * responses (they certainly won't if they conform to the
  2481. * HTTP/1.0 specification).
  2482. */
  2483. return HTTP_MULTIPLE_CHOICES;
  2484. }
  2485. if (!*bestp) {
  2486. ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  2487. "no acceptable variant: %s", r->filename);
  2488. return HTTP_NOT_ACCEPTABLE;
  2489. }
  2490. }
  2491. /* Variant selection chose a variant */
  2492. /* XXX todo: merge the two cases in the if statement below */
  2493. if (neg->is_transparent) {
  2494. if ((res = setup_choice_response(r, neg, *bestp)) != 0) {
  2495. return res; /* return if error */
  2496. }
  2497. }
  2498. else {
  2499. set_neg_headers(r, neg, alg_result);
  2500. }
  2501. /* Make sure caching works - Vary should handle HTTP/1.1, but for
  2502. * HTTP/1.0, we can't allow caching at all.
  2503. */
  2504. /* XXX: Note that we only set r->no_cache to 1, which causes
  2505. * Expires: <now> to be added, when responding to a HTTP/1.0
  2506. * client. If we return the response to a 1.1 client, we do not
  2507. * add Expires <now>, because doing so would degrade 1.1 cache
  2508. * performance by preventing re-use of the response without prior
  2509. * revalidation. On the other hand, if the 1.1 client is a proxy
  2510. * which was itself contacted by a 1.0 client, or a proxy cache
  2511. * which can be contacted later by 1.0 clients, then we currently
  2512. * rely on this 1.1 proxy to add the Expires: <now> when it
  2513. * forwards the response.
  2514. *
  2515. * XXX: TODO: Find out if the 1.1 spec requires proxies and
  2516. * tunnels to add Expires: <now> when forwarding the response to
  2517. * 1.0 clients. I (kh) recall it is rather vague on this point.
  2518. * Testing actual 1.1 proxy implementations would also be nice. If
  2519. * Expires: <now> is not added by proxies then we need to always
  2520. * include Expires: <now> ourselves to ensure correct caching, but
  2521. * this would degrade HTTP/1.1 cache efficiency unless we also add
  2522. * Cache-Control: max-age=N, which we currently don't.
  2523. *
  2524. * Roy: No, we are not going to screw over HTTP future just to
  2525. * ensure that people who can't be bothered to upgrade their
  2526. * clients will always receive perfect server-side negotiation.
  2527. * Hell, those clients are sending bogus accept headers anyway.
  2528. *
  2529. * Manual setting of cache-control/expires always overrides this
  2530. * automated kluge, on purpose.
  2531. */
  2532. if ((!do_cache_negotiated_docs(r->server)
  2533. && (r->proto_num < HTTP_VERSION(1,1)))
  2534. && neg->count_multiviews_variants != 1) {
  2535. r->no_cache = 1;
  2536. }
  2537. return OK;
  2538. }
  2539. static int handle_map_file(request_rec *r)
  2540. {
  2541. negotiation_state *neg;
  2542. apr_file_t *map;
  2543. var_rec *best;
  2544. int res;
  2545. char *udir;
  2546. const char *new_req;
  2547. if(strcmp(r->handler,MAP_FILE_MAGIC_TYPE) && strcmp(r->handler,"type-map"))
  2548. return DECLINED;
  2549. neg = parse_accept_headers(r);
  2550. if ((res = read_type_map(&map, neg, r))) {
  2551. return res;
  2552. }
  2553. res = do_negotiation(r, neg, &best, 0);
  2554. if (res != 0) return res;
  2555. if (best->body)
  2556. {
  2557. conn_rec *c = r->connection;
  2558. apr_bucket_brigade *bb;
  2559. apr_bucket *e;
  2560. ap_allow_standard_methods(r, REPLACE_ALLOW, M_GET, M_OPTIONS,
  2561. M_POST, -1);
  2562. /* XXX: ?
  2563. * if (r->method_number == M_OPTIONS) {
  2564. * return ap_send_http_options(r);
  2565. *}
  2566. */
  2567. if (r->method_number != M_GET && r->method_number != M_POST) {
  2568. return HTTP_METHOD_NOT_ALLOWED;
  2569. }
  2570. /* ### These may be implemented by adding some 'extra' info
  2571. * of the file offset onto the etag
  2572. * ap_update_mtime(r, r->finfo.mtime);
  2573. * ap_set_last_modified(r);
  2574. * ap_set_etag(r);
  2575. */
  2576. apr_table_setn(r->headers_out, "Accept-Ranges", "bytes");
  2577. ap_set_content_length(r, best->bytes);
  2578. /* set MIME type and charset as negotiated */
  2579. if (best->mime_type && *best->mime_type) {
  2580. if (best->content_charset && *best->content_charset) {
  2581. ap_set_content_type(r, apr_pstrcat(r->pool,
  2582. best->mime_type,
  2583. "; charset=",
  2584. best->content_charset,
  2585. NULL));
  2586. }
  2587. else {
  2588. ap_set_content_type(r, apr_pstrdup(r->pool, best->mime_type));
  2589. }
  2590. }
  2591. /* set Content-language(s) as negotiated */
  2592. if (best->content_languages && best->content_languages->nelts) {
  2593. r->content_languages = apr_array_copy(r->pool,
  2594. best->content_languages);
  2595. }
  2596. /* set Content-Encoding as negotiated */
  2597. if (best->content_encoding && *best->content_encoding) {
  2598. r->content_encoding = apr_pstrdup(r->pool,
  2599. best->content_encoding);
  2600. }
  2601. if ((res = ap_meets_conditions(r)) != OK) {
  2602. return res;
  2603. }
  2604. if ((res = ap_discard_request_body(r)) != OK) {
  2605. return res;
  2606. }
  2607. bb = apr_brigade_create(r->pool, c->bucket_alloc);
  2608. e = apr_bucket_file_create(map, best->body,
  2609. (apr_size_t)best->bytes, r->pool,
  2610. c->bucket_alloc);
  2611. APR_BRIGADE_INSERT_TAIL(bb, e);
  2612. e = apr_bucket_eos_create(c->bucket_alloc);
  2613. APR_BRIGADE_INSERT_TAIL(bb, e);
  2614. return ap_pass_brigade(r->output_filters, bb);
  2615. }
  2616. if (r->path_info && *r->path_info) {
  2617. /* remove any path_info from the end of the uri before trying
  2618. * to change the filename. r->path_info from the original
  2619. * request is passed along on the redirect.
  2620. */
  2621. r->uri[ap_find_path_info(r->uri, r->path_info)] = '\0';
  2622. }
  2623. udir = ap_make_dirstr_parent(r->pool, r->uri);
  2624. udir = ap_escape_uri(r->pool, udir);
  2625. if (r->args) {
  2626. if (r->path_info) {
  2627. new_req = apr_pstrcat(r->pool, udir, best->file_name,
  2628. r->path_info, "?", r->args, NULL);
  2629. }
  2630. else {
  2631. new_req = apr_pstrcat(r->pool, udir, best->file_name,
  2632. "?", r->args, NULL);
  2633. }
  2634. }
  2635. else {
  2636. new_req = apr_pstrcat(r->pool, udir, best->file_name,
  2637. r->path_info, NULL);
  2638. }
  2639. ap_internal_redirect(new_req, r);
  2640. return OK;
  2641. }
  2642. static int handle_multi(request_rec *r)
  2643. {
  2644. negotiation_state *neg;
  2645. var_rec *best, *avail_recs;
  2646. request_rec *sub_req;
  2647. int res;
  2648. int j;
  2649. if (r->finfo.filetype != APR_NOFILE
  2650. || !(ap_allow_options(r) & OPT_MULTI)) {
  2651. return DECLINED;
  2652. }
  2653. neg = parse_accept_headers(r);
  2654. if ((res = read_types_multi(neg))) {
  2655. return_from_multi:
  2656. /* free all allocated memory from subrequests */
  2657. avail_recs = (var_rec *) neg->avail_vars->elts;
  2658. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  2659. var_rec *variant = &avail_recs[j];
  2660. if (variant->sub_req) {
  2661. ap_destroy_sub_req(variant->sub_req);
  2662. }
  2663. }
  2664. return res;
  2665. }
  2666. if (neg->avail_vars->nelts == 0) {
  2667. return DECLINED;
  2668. }
  2669. res = do_negotiation(r, neg, &best,
  2670. (r->method_number != M_GET) || r->args ||
  2671. (r->path_info && *r->path_info));
  2672. if (res != 0)
  2673. goto return_from_multi;
  2674. if (!(sub_req = best->sub_req)) {
  2675. /* We got this out of a map file, so we don't actually have
  2676. * a sub_req structure yet. Get one now.
  2677. */
  2678. sub_req = ap_sub_req_lookup_file(best->file_name, r, NULL);
  2679. if (sub_req->status != HTTP_OK) {
  2680. res = sub_req->status;
  2681. ap_destroy_sub_req(sub_req);
  2682. goto return_from_multi;
  2683. }
  2684. }
  2685. if (sub_req->args == NULL) {
  2686. sub_req->args = r->args;
  2687. }
  2688. /* now do a "fast redirect" ... promotes the sub_req into the main req */
  2689. ap_internal_fast_redirect(sub_req, r);
  2690. /* give no advise for time on this subrequest. Perhaps we
  2691. * should tally the last mtime amoung all variants, and date
  2692. * the most recent, but that could confuse the proxies.
  2693. */
  2694. r->mtime = 0;
  2695. /* clean up all but our favorite variant, since that sub_req
  2696. * is now merged into the main request!
  2697. */
  2698. avail_recs = (var_rec *) neg->avail_vars->elts;
  2699. for (j = 0; j < neg->avail_vars->nelts; ++j) {
  2700. var_rec *variant = &avail_recs[j];
  2701. if (variant != best && variant->sub_req) {
  2702. ap_destroy_sub_req(variant->sub_req);
  2703. }
  2704. }
  2705. return OK;
  2706. }
  2707. /**********************************************************************
  2708. * There is a problem with content-encoding, as some clients send and
  2709. * expect an x- token (e.g. x-gzip) while others expect the plain token
  2710. * (i.e. gzip). To try and deal with this as best as possible we do
  2711. * the following: if the client sent an Accept-Encoding header and it
  2712. * contains a plain token corresponding to the content encoding of the
  2713. * response, then set content encoding using the plain token. Else if
  2714. * the A-E header contains the x- token use the x- token in the C-E
  2715. * header. Else don't do anything.
  2716. *
  2717. * Note that if no A-E header was sent, or it does not contain a token
  2718. * compatible with the final content encoding, then the token in the
  2719. * C-E header will be whatever was specified in the AddEncoding
  2720. * directive.
  2721. */
  2722. static int fix_encoding(request_rec *r)
  2723. {
  2724. const char *enc = r->content_encoding;
  2725. char *x_enc = NULL;
  2726. apr_array_header_t *accept_encodings;
  2727. accept_rec *accept_recs;
  2728. int i;
  2729. if (!enc || !*enc) {
  2730. return DECLINED;
  2731. }
  2732. if (enc[0] == 'x' && enc[1] == '-') {
  2733. enc += 2;
  2734. }
  2735. if ((accept_encodings = do_header_line(r->pool,
  2736. apr_table_get(r->headers_in, "Accept-Encoding"))) == NULL) {
  2737. return DECLINED;
  2738. }
  2739. accept_recs = (accept_rec *) accept_encodings->elts;
  2740. for (i = 0; i < accept_encodings->nelts; ++i) {
  2741. char *name = accept_recs[i].name;
  2742. if (!strcmp(name, enc)) {
  2743. r->content_encoding = name;
  2744. return OK;
  2745. }
  2746. if (name[0] == 'x' && name[1] == '-' && !strcmp(name+2, enc)) {
  2747. x_enc = name;
  2748. }
  2749. }
  2750. if (x_enc) {
  2751. r->content_encoding = x_enc;
  2752. return OK;
  2753. }
  2754. return DECLINED;
  2755. }
  2756. static void register_hooks(apr_pool_t *p)
  2757. {
  2758. ap_hook_fixups(fix_encoding,NULL,NULL,APR_HOOK_MIDDLE);
  2759. ap_hook_type_checker(handle_multi,NULL,NULL,APR_HOOK_FIRST);
  2760. ap_hook_handler(handle_map_file,NULL,NULL,APR_HOOK_MIDDLE);
  2761. }
  2762. module AP_MODULE_DECLARE_DATA negotiation_module =
  2763. {
  2764. STANDARD20_MODULE_STUFF,
  2765. create_neg_dir_config, /* dir config creator */
  2766. merge_neg_dir_configs, /* dir merger --- default is to override */
  2767. NULL, /* server config */
  2768. NULL, /* merge server config */
  2769. negotiation_cmds, /* command apr_table_t */
  2770. register_hooks /* register hooks */
  2771. };