2 Commits 887beb0096 ... 6e7107b490

Author SHA1 Message Date
  Laura Stewart 6e7107b490 Being sure not to pass pyenchant empty strings. 7 years ago
  Laura Stewart ed6622470c Removing print which was used for debutting purposes. 8 years ago
2 changed files with 26 additions and 9 deletions
  1. 14 3
      idiolectalyzer.py
  2. 12 6
      tests/test_idiolectalyzer.py

+ 14 - 3
idiolectalyzer.py

@@ -112,7 +112,6 @@ def countFunctionWords ( textSample ):
     functionWords = enchant.request_pwl_dict((localpath+"/wordLists/englishFunctionWords.txt"))
     wordList = textSample.split(" ")
     wordList = filter(None, wordList) #remove any empty strings from list. Have made this unecessary in my fork of PyEnchant
-    print wordList
     functionWordCount = 0
     for word in wordList:
         if functionWords.check(word):
@@ -205,8 +204,9 @@ def findCommonMisspellings ( textSample, req='count' ):
     wordList = textSample.split(" ")
     commonMisspellingsInSample = []
     for word in wordList:
-        if commonMisspellings.check(word):
-            commonMisspellingsInSample.append(word)
+        if len(word)>0: 
+            if commonMisspellings.check(word):
+                commonMisspellingsInSample.append(word)
     #only proceed if the list is not empty
     if commonMisspellingsInSample == []:
         return
@@ -240,6 +240,17 @@ class textData(object):
         self.linebreakratio = float(0)
         self.repeatwordcounts = {}
         self.commonmisspellingscounts = {}
+    def fill(self, textSample):
+        self.strippedSample = stripData(textSample)
+        self.samplewordcount = countWordsIn(textSample)
+        self.rawlexicaldesnity = calculateLexicalDensity(textSample)
+        self.strippedlexicaldensity = calculateLexicalDensity(self.strippedSample)
+        self.lowercasepercentage = checkStructureMarkers(textSample,'lowercase')
+        self.doublespacingratio = checkStructureMarkers(textSample,'doublespace')
+        self.unusualspacingratio = checkStructureMarkers(textSample,'unusualspacing')
+        self.linebreakratio = checkStructureMarkers(textSample,'linebreak')
+        self.repeatwordcounts = findRepeatWords(self.strippedSample, 2)
+        self.commonmisspellingscounts = findCommonMisspellings(self.strippedSample, 'count')
                    
 if __name__ == '__main__':
     print "Idiolectalyzer 0.1\n\n"

+ 12 - 6
tests/test_idiolectalyzer.py

@@ -131,14 +131,20 @@ class testTextAnalysis(unittest.TestCase):
         self.assertEqual(expectedFrequency,frequency)
         
     def testTextDataObjectCreate(self):
-        testTextFile="mockdata/251words.txt"
+        testTextFile="mockdata/lotsofpunctuation.txt"
         testText = readTestText(testTextFile)
         strippedText = idiolectalyzer.stripData(testText)
         testDataObject = idiolectalyzer.textData()
-        testDataObject.samplewordcount = idiolectalyzer.countWordsIn(testText)
-        testDataObject.rawlexicaldensity = idiolectalyzer.calculateLexicalDensity(testText)
-        testDataObject.strippedlexicaldensity = idiolectalyzer.calculateLexicalDensity(strippedText)
-        
-    
+        testDataObject.fill(testText)
+        print testDataObject.samplewordcount
+        print testDataObject.rawlexicaldensity
+        print testDataObject.strippedlexicaldensity
+        print testDataObject.lowercasepercentage
+        print testDataObject.doublespacingratio
+        print testDataObject.unusualspacingratio
+        print testDataObject.linebreakratio
+        print testDataObject.repeatwordcounts
+        print testDataObject.commonmisspellingscounts
+                  
 if __name__ == '__main__':
     unittest.main()