UserDomainAffinityProvider.jsm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. "use strict";
  5. const {Services} = ChromeUtils.import("resource://gre/modules/Services.jsm");
  6. ChromeUtils.defineModuleGetter(this, "PlacesUtils",
  7. "resource://gre/modules/PlacesUtils.jsm");
  8. const DEFAULT_TIME_SEGMENTS = [
  9. {"id": "hour", "startTime": 3600, "endTime": 0, "weightPosition": 1},
  10. {"id": "day", "startTime": 86400, "endTime": 3600, "weightPosition": 0.75},
  11. {"id": "week", "startTime": 604800, "endTime": 86400, "weightPosition": 0.5},
  12. {"id": "weekPlus", "startTime": 0, "endTime": 604800, "weightPosition": 0.25},
  13. {"id": "alltime", "startTime": 0, "endTime": 0, "weightPosition": 0.25},
  14. ];
  15. const DEFAULT_PARAMETER_SETS = {
  16. "linear-frequency": {
  17. "recencyFactor": 0.4,
  18. "frequencyFactor": 0.5,
  19. "combinedDomainFactor": 0.5,
  20. "perfectFrequencyVisits": 10,
  21. "perfectCombinedDomainScore": 2,
  22. "multiDomainBoost": 0.1,
  23. "itemScoreFactor": 0,
  24. },
  25. };
  26. const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
  27. function merge(...args) {
  28. return Object.assign.apply(this, args);
  29. }
  30. /**
  31. * Provides functionality to personalize content recommendations by calculating
  32. * user domain affinity scores. These scores are used to calculate relevance
  33. * scores for items/recs/stories that have domain affinities.
  34. *
  35. * The algorithm works as follows:
  36. *
  37. * - The recommendation endpoint returns a settings object containing
  38. * timeSegments and parametersets.
  39. *
  40. * - For every time segment we calculate the corresponding domain visit counts,
  41. * yielding result objects of the following structure: {"mozilla.org": 12,
  42. * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
  43. *
  44. * - These visit counts are transformed to domain affinity scores for all
  45. * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
  46. * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
  47. * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
  48. *
  49. * - The parameter sets provide factors for weighting which allows for
  50. * flexible targeting. The functionality to calculate final scores can
  51. * be seen in UserDomainAffinityProvider#calculateScores
  52. *
  53. * - The user domain affinity scores are summed up across all time segments
  54. * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
  55. *
  56. * - An item's domain affinities are matched to the user's domain affinity
  57. * scores by calculating an item relevance score
  58. * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
  59. *
  60. * - The item relevance scores are used to sort items (see TopStoriesFeed for
  61. * more details)
  62. *
  63. * - The data structure was chosen to allow for fast cache lookups during
  64. * relevance score calculation. While user domain affinities are calculated
  65. * infrequently (i.e. only once a day), the item relevance score (potentially)
  66. * needs to be calculated every time the feed updates. Therefore allowing cache
  67. * lookups of scores[domain][parameterSet] is beneficial
  68. */
  69. this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
  70. constructor(
  71. timeSegments = DEFAULT_TIME_SEGMENTS,
  72. parameterSets = DEFAULT_PARAMETER_SETS,
  73. maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
  74. version,
  75. scores) {
  76. this.timeSegments = timeSegments;
  77. this.maxHistoryQueryResults = maxHistoryQueryResults;
  78. this.version = version;
  79. if (scores) {
  80. this.parameterSets = parameterSets;
  81. this.scores = scores;
  82. } else {
  83. this.parameterSets = this.prepareParameterSets(parameterSets);
  84. this.scores = this.calculateAllUserDomainAffinityScores();
  85. }
  86. }
  87. /**
  88. * Adds dynamic parameters to the given parameter sets that need to be
  89. * computed based on time segments.
  90. *
  91. * @param ps The parameter sets
  92. * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
  93. */
  94. prepareParameterSets(ps) {
  95. return Object
  96. .keys(ps)
  97. // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
  98. .map(k => ({[k]: merge(ps[k], {timeSegmentWeights: this.calculateTimeSegmentWeights(ps[k].recencyFactor)})}))
  99. .reduce((acc, cur) => merge(acc, cur));
  100. }
  101. /**
  102. * Calculates a time segment weight based on the provided recencyFactor.
  103. *
  104. * @param recencyFactor The recency factor indicating how to weigh recency
  105. * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
  106. */
  107. calculateTimeSegmentWeights(recencyFactor) {
  108. return this.timeSegments
  109. .reduce((acc, cur) => merge(acc, ({[cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor)})), {});
  110. }
  111. /**
  112. * Calculates user domain affinity scores based on browsing history and the
  113. * available times segments and parameter sets.
  114. */
  115. calculateAllUserDomainAffinityScores() {
  116. return this.timeSegments
  117. // Calculate parameter set specific domain scores for each time segment
  118. // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
  119. .map(ts => this.calculateUserDomainAffinityScores(ts))
  120. // Keep format, but reduce to single object, with combined scores across all time segments
  121. // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
  122. .reduce((acc, cur) => this._combineScores(acc, cur));
  123. }
  124. /**
  125. * Calculates the user domain affinity scores for the given time segment.
  126. *
  127. * @param ts The time segment
  128. * @return The parameter specific scores for all domains with visits in
  129. * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
  130. */
  131. calculateUserDomainAffinityScores(ts) {
  132. // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
  133. let visits = this.queryVisits(ts);
  134. return Object
  135. .keys(visits)
  136. .reduce((acc, d) => merge(acc, {[d]: this.calculateScoresForParameterSets(ts, visits[d])}), {});
  137. }
  138. /**
  139. * Calculates the scores for all parameter sets for the given time segment
  140. * and domain visit count.
  141. *
  142. * @param ts The time segment
  143. * @param vc The domain visit count in the given time segment
  144. * @return The parameter specific scores for the visit count in
  145. * this time segment: {"ps1": 12, "ps2": 34}
  146. */
  147. calculateScoresForParameterSets(ts, vc) {
  148. return Object
  149. .keys(this.parameterSets)
  150. .reduce((acc, ps) => merge(acc, {[ps]: this.calculateScoreForParameterSet(ts, vc, this.parameterSets[ps])}), {});
  151. }
  152. /**
  153. * Calculates the final affinity score in the given time segment for the given parameter set
  154. *
  155. * @param timeSegment The time segment
  156. * @param visitCount The domain visit count in the given time segment
  157. * @param parameterSet The parameter set to use for scoring
  158. * @return The final score
  159. */
  160. calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
  161. return this.calculateScore(
  162. visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
  163. parameterSet.perfectFrequencyVisits,
  164. parameterSet.frequencyFactor);
  165. }
  166. /**
  167. * Keeps the same format, but reduces the two objects to a single object, with
  168. * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2},
  169. * "b.com":{"ps1":3,"ps2":3}}
  170. */
  171. _combineScores(a, b) {
  172. // Merge both score objects so we get a combined object holding all domains.
  173. // This is so we can combine them without missing domains that are in a and not in b and vice versa.
  174. const c = merge({}, a, b);
  175. return Object.keys(c).reduce((acc, d) => merge(acc, this._combine(a, b, c, d)), {});
  176. }
  177. _combine(a, b, c, d) {
  178. return Object
  179. .keys(c[d])
  180. // Summing up the parameter set specific scores of each domain
  181. .map(ps => ({[d]: {[ps]: Math.min(1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0))}}))
  182. // Reducing from an array of objects with a single parameter set to a single object
  183. // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
  184. .reduce((acc, cur) => ({[d]: merge(acc[d], cur[d])}));
  185. }
  186. /**
  187. * Calculates a value on the curve described by the provided parameters. The curve we're using is
  188. * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
  189. *
  190. * @param {number} score A value between 0 and maxScore, representing x.
  191. * @param {number} maxScore Highest possible score.
  192. * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
  193. * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
  194. * a slope of exactly 0.5 is linear.
  195. * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
  196. * effect of the slope can be. This represents b in the formula above.
  197. * @return {number} the final score
  198. */
  199. calculateScore(score, maxScore, factor, ease = 2) {
  200. let a = 0;
  201. let x = Math.max(0, score / maxScore);
  202. if (x >= 1) {
  203. return 1;
  204. }
  205. if (factor === 0.5) {
  206. return x;
  207. }
  208. if (factor < 0.5) {
  209. // We want a log-shaped curve so we scale "a" between 0 and .99
  210. a = (factor / 0.5) * 0.49;
  211. } else if (factor > 0.5) {
  212. // We want an exp-shaped curve so we scale "a" between 1.01 and 10
  213. a = 1 + (factor - 0.5) / 0.5 * 9;
  214. }
  215. return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
  216. }
  217. /**
  218. * Queries the visit counts in the given time segment.
  219. *
  220. * @param ts the time segment
  221. * @return the visit count object: {"a.com": 1, "b.com": 2}
  222. */
  223. queryVisits(ts) {
  224. const visitCounts = {};
  225. const query = PlacesUtils.history.getNewQuery();
  226. const wwwRegEx = /^www\./;
  227. query.beginTimeReference = query.TIME_RELATIVE_NOW;
  228. query.beginTime = (ts.startTime && ts.startTime !== 0) ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000);
  229. query.endTimeReference = query.TIME_RELATIVE_NOW;
  230. query.endTime = (ts.endTime && ts.endTime !== 0) ? -(ts.endTime * 1000 * 1000) : 0;
  231. const options = PlacesUtils.history.getNewQueryOptions();
  232. options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
  233. options.maxResults = this.maxHistoryQueryResults;
  234. const {root} = PlacesUtils.history.executeQuery(query, options);
  235. root.containerOpen = true;
  236. for (let i = 0; i < root.childCount; i++) {
  237. let node = root.getChild(i);
  238. let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
  239. if (!visitCounts[host]) {
  240. visitCounts[host] = 0;
  241. }
  242. visitCounts[host] += node.accessCount;
  243. }
  244. root.containerOpen = false;
  245. return visitCounts;
  246. }
  247. /**
  248. * Calculates an item's relevance score.
  249. *
  250. * @param item the item (story), must contain domain affinities, otherwise a
  251. * score of 1 is returned.
  252. * @return the calculated item's score or 1 if item has no domain_affinities
  253. * or references an unknown parameter set.
  254. */
  255. calculateItemRelevanceScore(item) {
  256. const params = this.parameterSets[item.parameter_set];
  257. if (!item.domain_affinities || !params) {
  258. return item.item_score;
  259. }
  260. const scores = Object
  261. .keys(item.domain_affinities)
  262. .reduce((acc, d) => {
  263. let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false;
  264. if (userDomainAffinityScore) {
  265. acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d];
  266. acc.matchingDomainsCount++;
  267. }
  268. return acc;
  269. }, {combinedDomainScore: 0, matchingDomainsCount: 0});
  270. // Boost the score as configured in the provided parameter set
  271. const boostedCombinedDomainScore = scores.combinedDomainScore *
  272. Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
  273. // Calculate what the score would be if the item score is ignored
  274. const normalizedCombinedDomainScore = this.calculateScore(boostedCombinedDomainScore,
  275. params.perfectCombinedDomainScore,
  276. params.combinedDomainFactor);
  277. // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
  278. // allows weighting the item score in relation to the normalizedCombinedDomainScore:
  279. // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
  280. // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
  281. // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
  282. return params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore;
  283. }
  284. /**
  285. * Returns an object holding the settings and affinity scores of this provider instance.
  286. */
  287. getAffinities() {
  288. return {
  289. timeSegments: this.timeSegments,
  290. parameterSets: this.parameterSets,
  291. maxHistoryQueryResults: this.maxHistoryQueryResults,
  292. version: this.version,
  293. scores: this.scores,
  294. };
  295. }
  296. };
  297. const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];