NmfTextTagger.jsm 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. "use strict";
  5. const {toksToTfIdfVector} = ChromeUtils.import("resource://activity-stream/lib/Tokenize.jsm");
  6. this.NmfTextTagger = class NmfTextTagger {
  7. constructor(model) {
  8. this.model = model;
  9. }
  10. /**
  11. * A multiclass classifier that scores tokenized text against several classes through
  12. * inference of a nonnegative matrix factorization of TF-IDF vectors and
  13. * class labels. Returns a map of class labels as string keys to scores.
  14. * (Higher is more confident.) All classes get scored, so it is up to
  15. * consumer of this data determine what classes are most valuable.
  16. */
  17. tagTokens(tokens) {
  18. let fv = toksToTfIdfVector(tokens, this.model.vocab_idfs);
  19. let fve = Object.values(fv);
  20. // normalize by the sum of the vector
  21. let sum = 0.0;
  22. for (let pair of fve) {
  23. // eslint-disable-next-line prefer-destructuring
  24. sum += pair[1];
  25. }
  26. for (let i = 0; i < fve.length; i++) {
  27. // eslint-disable-next-line prefer-destructuring
  28. fve[i][1] /= sum;
  29. }
  30. // dot the document with each topic vector so that we can transform it into
  31. // the latent space
  32. let toksInLatentSpace = [];
  33. for (let topicVect of this.model.topic_word) {
  34. let fvDotTwv = 0;
  35. // dot fv with each topic word vector
  36. for (let pair of fve) {
  37. let [termId, tfidf] = pair;
  38. fvDotTwv += tfidf * topicVect[termId];
  39. }
  40. toksInLatentSpace.push(fvDotTwv);
  41. }
  42. // now project toksInLatentSpace back into class space
  43. let predictions = {};
  44. Object.keys(this.model.document_topic).forEach(topic => {
  45. let score = 0;
  46. for (let i = 0; i < toksInLatentSpace.length; i++) {
  47. score += toksInLatentSpace[i] * this.model.document_topic[topic][i];
  48. }
  49. predictions[topic] = score;
  50. });
  51. return predictions;
  52. }
  53. };
  54. const EXPORTED_SYMBOLS = ["NmfTextTagger"];