PersonalityProvider.jsm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. "use strict";
  5. const {RemoteSettings} = ChromeUtils.import("resource://services-settings/remote-settings.js");
  6. const {actionCreators: ac} = ChromeUtils.import("resource://activity-stream/common/Actions.jsm");
  7. ChromeUtils.defineModuleGetter(this, "perfService", "resource://activity-stream/common/PerfService.jsm");
  8. const {NaiveBayesTextTagger} = ChromeUtils.import("resource://activity-stream/lib/NaiveBayesTextTagger.jsm");
  9. const {NmfTextTagger} = ChromeUtils.import("resource://activity-stream/lib/NmfTextTagger.jsm");
  10. const {RecipeExecutor} = ChromeUtils.import("resource://activity-stream/lib/RecipeExecutor.jsm");
  11. ChromeUtils.defineModuleGetter(this, "NewTabUtils",
  12. "resource://gre/modules/NewTabUtils.jsm");
  13. const {Services} = ChromeUtils.import("resource://gre/modules/Services.jsm");
  14. const {XPCOMUtils} = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
  15. ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
  16. XPCOMUtils.defineLazyGlobalGetters(this, ["fetch"]);
  17. XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => new TextDecoder());
  18. XPCOMUtils.defineLazyGetter(this, "baseAttachmentsURL", async () => {
  19. const server = Services.prefs.getCharPref("services.settings.server");
  20. const serverInfo = await (await fetch(`${server}/`, {credentials: "omit"})).json();
  21. const {capabilities: {attachments: {base_url}}} = serverInfo;
  22. return base_url;
  23. });
  24. const PERSONALITY_PROVIDER_DIR = OS.Path.join(OS.Constants.Path.localProfileDir, "personality-provider");
  25. const RECIPE_NAME = "personality-provider-recipe";
  26. const MODELS_NAME = "personality-provider-models";
  27. function getHash(aStr) {
  28. // return the two-digit hexadecimal code for a byte
  29. let toHexString = charCode => (`0${charCode.toString(16)}`).slice(-2);
  30. let hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
  31. hasher.init(Ci.nsICryptoHash.SHA256);
  32. let stringStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance(Ci.nsIStringInputStream);
  33. stringStream.data = aStr;
  34. hasher.updateFromStream(stringStream, -1);
  35. // convert the binary hash data to a hex string.
  36. let binary = hasher.finish(false);
  37. return Array.from(binary, (c, i) => toHexString(binary.charCodeAt(i))).join("").toLowerCase();
  38. }
  39. /**
  40. * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
  41. * This allows Firefox to classify pages into topics, by examining the text found on the page.
  42. * It does this by looking at the history text content, title, and description.
  43. */
  44. this.PersonalityProvider = class PersonalityProvider {
  45. constructor(
  46. timeSegments,
  47. parameterSets,
  48. maxHistoryQueryResults,
  49. version,
  50. scores,
  51. v2Params) {
  52. this.v2Params = v2Params || {};
  53. this.dispatch = this.v2Params.dispatch || (() => {});
  54. this.modelKeys = this.v2Params.modelKeys;
  55. this.timeSegments = timeSegments;
  56. this.parameterSets = parameterSets;
  57. this.maxHistoryQueryResults = maxHistoryQueryResults;
  58. this.version = version;
  59. this.scores = scores || {};
  60. this.interestConfig = this.scores.interestConfig;
  61. this.interestVector = this.scores.interestVector;
  62. this.onSync = this.onSync.bind(this);
  63. this.setupSyncAttachment(RECIPE_NAME);
  64. this.setupSyncAttachment(MODELS_NAME);
  65. }
  66. async onSync(event) {
  67. const {
  68. data: {created, updated, deleted},
  69. } = event;
  70. // Remove every removed attachment.
  71. const toRemove = deleted.concat(updated.map(u => u.old));
  72. await Promise.all(toRemove.map(record => this.deleteAttachment(record)));
  73. // Download every new/updated attachment.
  74. const toDownload = created.concat(updated.map(u => u.new));
  75. await Promise.all(toDownload.map(record => this.maybeDownloadAttachment(record)));
  76. }
  77. setupSyncAttachment(collection) {
  78. RemoteSettings(collection).on("sync", this.onSync);
  79. }
  80. /**
  81. * Downloads the attachment to disk assuming the dir already exists
  82. * and any existing files matching the filename are clobbered.
  83. */
  84. async _downloadAttachment(record) {
  85. const {attachment: {location, filename}} = record;
  86. const remoteFilePath = (await baseAttachmentsURL) + location;
  87. const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
  88. const headers = new Headers();
  89. headers.set("Accept-Encoding", "gzip");
  90. const resp = await fetch(remoteFilePath, {headers, credentials: "omit"});
  91. if (!resp.ok) {
  92. Cu.reportError(`Failed to fetch ${remoteFilePath}: ${resp.status}`);
  93. return;
  94. }
  95. const buffer = await resp.arrayBuffer();
  96. const bytes = new Uint8Array(buffer);
  97. await OS.File.writeAtomic(localFilePath, bytes, {tmpPath: `${localFilePath}.tmp`});
  98. }
  99. /**
  100. * Attempts to download the attachment, but only if it doesn't already exist.
  101. */
  102. async maybeDownloadAttachment(record, retries = 3) {
  103. const {attachment: {filename, hash, size}} = record;
  104. await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
  105. const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
  106. let retry = 0;
  107. while ((retry++ < retries) &&
  108. (!await OS.File.exists(localFilePath) ||
  109. (await OS.File.stat(localFilePath)).size !== size ||
  110. getHash(await this._getFileStr(localFilePath)) !== hash)) {
  111. await this._downloadAttachment(record);
  112. }
  113. }
  114. async deleteAttachment(record) {
  115. const {attachment: {filename}} = record;
  116. await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
  117. const path = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
  118. await OS.File.remove(path, {ignoreAbsent: true});
  119. return OS.File.removeEmptyDir(PERSONALITY_PROVIDER_DIR, {ignoreAbsent: true});
  120. }
  121. /**
  122. * Gets contents of the attachment if it already exists on file,
  123. * and if not attempts to download it.
  124. */
  125. async getAttachment(record) {
  126. const {attachment: {filename}} = record;
  127. const filepath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
  128. try {
  129. await this.maybeDownloadAttachment(record);
  130. return JSON.parse(await this._getFileStr(filepath));
  131. } catch (error) {
  132. Cu.reportError(`Failed to load ${filepath}: ${error.message}`);
  133. }
  134. return {};
  135. }
  136. // A helper function to read and decode a file, it isn't a stand alone function.
  137. // If you use this, ensure you check the file exists and you have a try catch.
  138. async _getFileStr(filepath) {
  139. const binaryData = await OS.File.read(filepath);
  140. return gTextDecoder.decode(binaryData);
  141. }
  142. async init(callback) {
  143. const perfStart = perfService.absNow();
  144. this.interestConfig = this.interestConfig || await this.getRecipe();
  145. if (!this.interestConfig) {
  146. this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_GET_RECIPE_ERROR"}));
  147. return;
  148. }
  149. this.recipeExecutor = await this.generateRecipeExecutor();
  150. if (!this.recipeExecutor) {
  151. this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_GENERATE_RECIPE_EXECUTOR_ERROR"}));
  152. return;
  153. }
  154. this.interestVector = this.interestVector || await this.createInterestVector();
  155. if (!this.interestVector) {
  156. this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_ERROR"}));
  157. return;
  158. }
  159. this.dispatch(ac.PerfEvent({
  160. event: "PERSONALIZATION_V2_TOTAL_DURATION",
  161. value: Math.round(perfService.absNow() - perfStart),
  162. }));
  163. this.initialized = true;
  164. if (callback) {
  165. callback();
  166. }
  167. }
  168. async getFromRemoteSettings(name) {
  169. const result = await RemoteSettings(name).get();
  170. return Promise.all(result.map(async record => ({...await this.getAttachment(record), recordKey: record.key})));
  171. }
  172. /**
  173. * Returns a Recipe from remote settings to be consumed by a RecipeExecutor.
  174. * A Recipe is a set of instructions on how to processes a RecipeExecutor.
  175. */
  176. async getRecipe() {
  177. if (!this.recipes || !this.recipes.length) {
  178. const start = perfService.absNow();
  179. this.recipes = await this.getFromRemoteSettings(RECIPE_NAME);
  180. this.dispatch(ac.PerfEvent({
  181. event: "PERSONALIZATION_V2_GET_RECIPE_DURATION",
  182. value: Math.round(perfService.absNow() - start),
  183. }));
  184. }
  185. return this.recipes[0];
  186. }
  187. /**
  188. * Returns a Recipe Executor.
  189. * A Recipe Executor is a set of actions that can be consumed by a Recipe.
  190. * The Recipe determines the order and specifics of which the actions are called.
  191. */
  192. async generateRecipeExecutor() {
  193. if (!this.taggers) {
  194. const startTaggers = perfService.absNow();
  195. let nbTaggers = [];
  196. let nmfTaggers = {};
  197. const models = await this.getFromRemoteSettings(MODELS_NAME);
  198. if (models.length === 0) {
  199. return null;
  200. }
  201. for (let model of models) {
  202. if (!this.modelKeys.includes(model.recordKey)) {
  203. continue;
  204. }
  205. if (model.model_type === "nb") {
  206. nbTaggers.push(new NaiveBayesTextTagger(model));
  207. } else if (model.model_type === "nmf") {
  208. nmfTaggers[model.parent_tag] = new NmfTextTagger(model);
  209. }
  210. }
  211. this.dispatch(ac.PerfEvent({
  212. event: "PERSONALIZATION_V2_TAGGERS_DURATION",
  213. value: Math.round(perfService.absNow() - startTaggers),
  214. }));
  215. this.taggers = {nbTaggers, nmfTaggers};
  216. }
  217. const startRecipeExecutor = perfService.absNow();
  218. const recipeExecutor = new RecipeExecutor(this.taggers.nbTaggers, this.taggers.nmfTaggers);
  219. this.dispatch(ac.PerfEvent({
  220. event: "PERSONALIZATION_V2_RECIPE_EXECUTOR_DURATION",
  221. value: Math.round(perfService.absNow() - startRecipeExecutor),
  222. }));
  223. return recipeExecutor;
  224. }
  225. /**
  226. * Grabs a slice of browse history for building a interest vector
  227. */
  228. async fetchHistory(columns, beginTimeSecs, endTimeSecs) {
  229. let sql = `SELECT url, title, visit_count, frecency, last_visit_date, description
  230. FROM moz_places
  231. WHERE last_visit_date >= ${beginTimeSecs * 1000000}
  232. AND last_visit_date < ${endTimeSecs * 1000000}`;
  233. columns.forEach(requiredColumn => {
  234. sql += ` AND IFNULL(${requiredColumn}, "") <> ""`;
  235. });
  236. sql += " LIMIT 30000";
  237. const {activityStreamProvider} = NewTabUtils;
  238. const history = await activityStreamProvider.executePlacesQuery(sql, {
  239. columns,
  240. params: {},
  241. });
  242. return history;
  243. }
  244. /**
  245. * Examines the user's browse history and returns an interest vector that
  246. * describes the topics the user frequently browses.
  247. */
  248. async createInterestVector() {
  249. let interestVector = {};
  250. let endTimeSecs = ((new Date()).getTime() / 1000);
  251. let beginTimeSecs = endTimeSecs - this.interestConfig.history_limit_secs;
  252. let history = await this.fetchHistory(this.interestConfig.history_required_fields, beginTimeSecs, endTimeSecs);
  253. this.dispatch(ac.PerfEvent({
  254. event: "PERSONALIZATION_V2_HISTORY_SIZE",
  255. value: history.length,
  256. }));
  257. const start = perfService.absNow();
  258. for (let historyRec of history) {
  259. let ivItem = this.recipeExecutor.executeRecipe(
  260. historyRec,
  261. this.interestConfig.history_item_builder);
  262. if (ivItem === null) {
  263. continue;
  264. }
  265. interestVector = this.recipeExecutor.executeCombinerRecipe(
  266. interestVector,
  267. ivItem,
  268. this.interestConfig.interest_combiner);
  269. if (interestVector === null) {
  270. return null;
  271. }
  272. }
  273. const finalResult = this.recipeExecutor.executeRecipe(
  274. interestVector,
  275. this.interestConfig.interest_finalizer);
  276. this.dispatch(ac.PerfEvent({
  277. event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_DURATION",
  278. value: Math.round(perfService.absNow() - start),
  279. }));
  280. return finalResult;
  281. }
  282. /**
  283. * Calculates a score of a Pocket item when compared to the user's interest
  284. * vector. Returns the score. Higher scores are better. Assumes this.interestVector
  285. * is populated.
  286. */
  287. calculateItemRelevanceScore(pocketItem) {
  288. if (!this.initialized) {
  289. return pocketItem.item_score || 1;
  290. }
  291. let scorableItem = this.recipeExecutor.executeRecipe(
  292. pocketItem,
  293. this.interestConfig.item_to_rank_builder);
  294. if (scorableItem === null) {
  295. return -1;
  296. }
  297. let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
  298. Object.keys(scorableItem).forEach(key => {
  299. rankingVector[key] = scorableItem[key];
  300. });
  301. rankingVector = this.recipeExecutor.executeRecipe(
  302. rankingVector,
  303. this.interestConfig.item_ranker);
  304. if (rankingVector === null) {
  305. return -1;
  306. }
  307. return rankingVector.score;
  308. }
  309. /**
  310. * Returns an object holding the settings and affinity scores of this provider instance.
  311. */
  312. getAffinities() {
  313. return {
  314. timeSegments: this.timeSegments,
  315. parameterSets: this.parameterSets,
  316. maxHistoryQueryResults: this.maxHistoryQueryResults,
  317. version: this.version,
  318. scores: {
  319. interestConfig: this.interestConfig,
  320. interestVector: this.interestVector,
  321. taggers: this.taggers,
  322. },
  323. };
  324. }
  325. };
  326. const EXPORTED_SYMBOLS = ["PersonalityProvider"];