123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- "use strict";
- const {Services} = ChromeUtils.import("resource://gre/modules/Services.jsm");
- ChromeUtils.defineModuleGetter(this, "PlacesUtils",
- "resource://gre/modules/PlacesUtils.jsm");
- const DEFAULT_TIME_SEGMENTS = [
- {"id": "hour", "startTime": 3600, "endTime": 0, "weightPosition": 1},
- {"id": "day", "startTime": 86400, "endTime": 3600, "weightPosition": 0.75},
- {"id": "week", "startTime": 604800, "endTime": 86400, "weightPosition": 0.5},
- {"id": "weekPlus", "startTime": 0, "endTime": 604800, "weightPosition": 0.25},
- {"id": "alltime", "startTime": 0, "endTime": 0, "weightPosition": 0.25},
- ];
- const DEFAULT_PARAMETER_SETS = {
- "linear-frequency": {
- "recencyFactor": 0.4,
- "frequencyFactor": 0.5,
- "combinedDomainFactor": 0.5,
- "perfectFrequencyVisits": 10,
- "perfectCombinedDomainScore": 2,
- "multiDomainBoost": 0.1,
- "itemScoreFactor": 0,
- },
- };
- const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
- function merge(...args) {
- return Object.assign.apply(this, args);
- }
- /**
- * Provides functionality to personalize content recommendations by calculating
- * user domain affinity scores. These scores are used to calculate relevance
- * scores for items/recs/stories that have domain affinities.
- *
- * The algorithm works as follows:
- *
- * - The recommendation endpoint returns a settings object containing
- * timeSegments and parametersets.
- *
- * - For every time segment we calculate the corresponding domain visit counts,
- * yielding result objects of the following structure: {"mozilla.org": 12,
- * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
- *
- * - These visit counts are transformed to domain affinity scores for all
- * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
- * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
- * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
- *
- * - The parameter sets provide factors for weighting which allows for
- * flexible targeting. The functionality to calculate final scores can
- * be seen in UserDomainAffinityProvider#calculateScores
- *
- * - The user domain affinity scores are summed up across all time segments
- * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
- *
- * - An item's domain affinities are matched to the user's domain affinity
- * scores by calculating an item relevance score
- * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
- *
- * - The item relevance scores are used to sort items (see TopStoriesFeed for
- * more details)
- *
- * - The data structure was chosen to allow for fast cache lookups during
- * relevance score calculation. While user domain affinities are calculated
- * infrequently (i.e. only once a day), the item relevance score (potentially)
- * needs to be calculated every time the feed updates. Therefore allowing cache
- * lookups of scores[domain][parameterSet] is beneficial
- */
- this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
- constructor(
- timeSegments = DEFAULT_TIME_SEGMENTS,
- parameterSets = DEFAULT_PARAMETER_SETS,
- maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
- version,
- scores) {
- this.timeSegments = timeSegments;
- this.maxHistoryQueryResults = maxHistoryQueryResults;
- this.version = version;
- if (scores) {
- this.parameterSets = parameterSets;
- this.scores = scores;
- } else {
- this.parameterSets = this.prepareParameterSets(parameterSets);
- this.scores = this.calculateAllUserDomainAffinityScores();
- }
- }
- /**
- * Adds dynamic parameters to the given parameter sets that need to be
- * computed based on time segments.
- *
- * @param ps The parameter sets
- * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
- */
- prepareParameterSets(ps) {
- return Object
- .keys(ps)
- // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
- .map(k => ({[k]: merge(ps[k], {timeSegmentWeights: this.calculateTimeSegmentWeights(ps[k].recencyFactor)})}))
- .reduce((acc, cur) => merge(acc, cur));
- }
- /**
- * Calculates a time segment weight based on the provided recencyFactor.
- *
- * @param recencyFactor The recency factor indicating how to weigh recency
- * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
- */
- calculateTimeSegmentWeights(recencyFactor) {
- return this.timeSegments
- .reduce((acc, cur) => merge(acc, ({[cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor)})), {});
- }
- /**
- * Calculates user domain affinity scores based on browsing history and the
- * available times segments and parameter sets.
- */
- calculateAllUserDomainAffinityScores() {
- return this.timeSegments
- // Calculate parameter set specific domain scores for each time segment
- // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
- .map(ts => this.calculateUserDomainAffinityScores(ts))
- // Keep format, but reduce to single object, with combined scores across all time segments
- // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
- .reduce((acc, cur) => this._combineScores(acc, cur));
- }
- /**
- * Calculates the user domain affinity scores for the given time segment.
- *
- * @param ts The time segment
- * @return The parameter specific scores for all domains with visits in
- * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
- */
- calculateUserDomainAffinityScores(ts) {
- // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
- let visits = this.queryVisits(ts);
- return Object
- .keys(visits)
- .reduce((acc, d) => merge(acc, {[d]: this.calculateScoresForParameterSets(ts, visits[d])}), {});
- }
- /**
- * Calculates the scores for all parameter sets for the given time segment
- * and domain visit count.
- *
- * @param ts The time segment
- * @param vc The domain visit count in the given time segment
- * @return The parameter specific scores for the visit count in
- * this time segment: {"ps1": 12, "ps2": 34}
- */
- calculateScoresForParameterSets(ts, vc) {
- return Object
- .keys(this.parameterSets)
- .reduce((acc, ps) => merge(acc, {[ps]: this.calculateScoreForParameterSet(ts, vc, this.parameterSets[ps])}), {});
- }
- /**
- * Calculates the final affinity score in the given time segment for the given parameter set
- *
- * @param timeSegment The time segment
- * @param visitCount The domain visit count in the given time segment
- * @param parameterSet The parameter set to use for scoring
- * @return The final score
- */
- calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
- return this.calculateScore(
- visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
- parameterSet.perfectFrequencyVisits,
- parameterSet.frequencyFactor);
- }
- /**
- * Keeps the same format, but reduces the two objects to a single object, with
- * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2},
- * "b.com":{"ps1":3,"ps2":3}}
- */
- _combineScores(a, b) {
- // Merge both score objects so we get a combined object holding all domains.
- // This is so we can combine them without missing domains that are in a and not in b and vice versa.
- const c = merge({}, a, b);
- return Object.keys(c).reduce((acc, d) => merge(acc, this._combine(a, b, c, d)), {});
- }
- _combine(a, b, c, d) {
- return Object
- .keys(c[d])
- // Summing up the parameter set specific scores of each domain
- .map(ps => ({[d]: {[ps]: Math.min(1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0))}}))
- // Reducing from an array of objects with a single parameter set to a single object
- // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
- .reduce((acc, cur) => ({[d]: merge(acc[d], cur[d])}));
- }
- /**
- * Calculates a value on the curve described by the provided parameters. The curve we're using is
- * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
- *
- * @param {number} score A value between 0 and maxScore, representing x.
- * @param {number} maxScore Highest possible score.
- * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
- * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
- * a slope of exactly 0.5 is linear.
- * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
- * effect of the slope can be. This represents b in the formula above.
- * @return {number} the final score
- */
- calculateScore(score, maxScore, factor, ease = 2) {
- let a = 0;
- let x = Math.max(0, score / maxScore);
- if (x >= 1) {
- return 1;
- }
- if (factor === 0.5) {
- return x;
- }
- if (factor < 0.5) {
- // We want a log-shaped curve so we scale "a" between 0 and .99
- a = (factor / 0.5) * 0.49;
- } else if (factor > 0.5) {
- // We want an exp-shaped curve so we scale "a" between 1.01 and 10
- a = 1 + (factor - 0.5) / 0.5 * 9;
- }
- return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
- }
- /**
- * Queries the visit counts in the given time segment.
- *
- * @param ts the time segment
- * @return the visit count object: {"a.com": 1, "b.com": 2}
- */
- queryVisits(ts) {
- const visitCounts = {};
- const query = PlacesUtils.history.getNewQuery();
- const wwwRegEx = /^www\./;
- query.beginTimeReference = query.TIME_RELATIVE_NOW;
- query.beginTime = (ts.startTime && ts.startTime !== 0) ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000);
- query.endTimeReference = query.TIME_RELATIVE_NOW;
- query.endTime = (ts.endTime && ts.endTime !== 0) ? -(ts.endTime * 1000 * 1000) : 0;
- const options = PlacesUtils.history.getNewQueryOptions();
- options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
- options.maxResults = this.maxHistoryQueryResults;
- const {root} = PlacesUtils.history.executeQuery(query, options);
- root.containerOpen = true;
- for (let i = 0; i < root.childCount; i++) {
- let node = root.getChild(i);
- let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
- if (!visitCounts[host]) {
- visitCounts[host] = 0;
- }
- visitCounts[host] += node.accessCount;
- }
- root.containerOpen = false;
- return visitCounts;
- }
- /**
- * Calculates an item's relevance score.
- *
- * @param item the item (story), must contain domain affinities, otherwise a
- * score of 1 is returned.
- * @return the calculated item's score or 1 if item has no domain_affinities
- * or references an unknown parameter set.
- */
- calculateItemRelevanceScore(item) {
- const params = this.parameterSets[item.parameter_set];
- if (!item.domain_affinities || !params) {
- return item.item_score;
- }
- const scores = Object
- .keys(item.domain_affinities)
- .reduce((acc, d) => {
- let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false;
- if (userDomainAffinityScore) {
- acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d];
- acc.matchingDomainsCount++;
- }
- return acc;
- }, {combinedDomainScore: 0, matchingDomainsCount: 0});
- // Boost the score as configured in the provided parameter set
- const boostedCombinedDomainScore = scores.combinedDomainScore *
- Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
- // Calculate what the score would be if the item score is ignored
- const normalizedCombinedDomainScore = this.calculateScore(boostedCombinedDomainScore,
- params.perfectCombinedDomainScore,
- params.combinedDomainFactor);
- // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
- // allows weighting the item score in relation to the normalizedCombinedDomainScore:
- // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
- // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
- // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
- return params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore;
- }
- /**
- * Returns an object holding the settings and affinity scores of this provider instance.
- */
- getAffinities() {
- return {
- timeSegments: this.timeSegments,
- parameterSets: this.parameterSets,
- maxHistoryQueryResults: this.maxHistoryQueryResults,
- version: this.version,
- scores: this.scores,
- };
- }
- };
- const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];
|