NaiveBayesTextTagger.jsm 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. "use strict";
  5. const {toksToTfIdfVector} = ChromeUtils.import("resource://activity-stream/lib/Tokenize.jsm");
  6. this.NaiveBayesTextTagger = class NaiveBayesTextTagger {
  7. constructor(model) {
  8. this.model = model;
  9. }
  10. /**
  11. * Determines if the tokenized text belongs to class according to binary naive Bayes
  12. * classifier. Returns an object containing the class label ("label"), and
  13. * the log probability ("logProb") that the text belongs to that class. If
  14. * the positive class is more likely, then "label" is the positive class
  15. * label. If the negative class is matched, then "label" is set to null.
  16. */
  17. tagTokens(tokens) {
  18. let fv = toksToTfIdfVector(tokens, this.model.vocab_idfs);
  19. let bestLogProb = null;
  20. let bestClassId = -1;
  21. let bestClassLabel = null;
  22. let logSumExp = 0.0; // will be P(x). Used to create a proper probability
  23. for (let classId = 0; classId < this.model.classes.length; classId++) {
  24. let classModel = this.model.classes[classId];
  25. let classLogProb = classModel.log_prior;
  26. // dot fv with the class model
  27. for (let pair of Object.values(fv)) {
  28. let [termId, tfidf] = pair;
  29. classLogProb += tfidf * classModel.feature_log_probs[termId];
  30. }
  31. if ((bestLogProb === null) || (classLogProb > bestLogProb)) {
  32. bestLogProb = classLogProb;
  33. bestClassId = classId;
  34. }
  35. logSumExp += Math.exp(classLogProb);
  36. }
  37. // now normalize the probability by dividing by P(x)
  38. logSumExp = Math.log(logSumExp);
  39. bestLogProb -= logSumExp;
  40. if (bestClassId === this.model.positive_class_id) {
  41. bestClassLabel = this.model.positive_class_label;
  42. } else {
  43. bestClassLabel = null;
  44. }
  45. let confident = ((bestClassId === this.model.positive_class_id) &&
  46. (bestLogProb > this.model.positive_class_threshold_log_prob));
  47. return {
  48. "label": bestClassLabel,
  49. "logProb": bestLogProb,
  50. "confident": confident,
  51. };
  52. }
  53. };
  54. const EXPORTED_SYMBOLS = ["NaiveBayesTextTagger"];