SHA1
--- a/idiolectalyzer.py
+++ b/idiolectalyzer.py
@@ -112,7 +112,6 @@ def countFunctionWords ( textSample ):
 
				     functionWords = enchant.request_pwl_dict((localpath+"/wordLists/englishFunctionWords.txt"))
			
 
				     wordList = textSample.split(" ")
			
 
				     wordList = filter(None, wordList) #remove any empty strings from list. Have made this unecessary in my fork of PyEnchant
			
 
				-    print wordList
			
 
				     functionWordCount = 0
			
 
				     for word in wordList:
			
 
				         if functionWords.check(word):
			
@@ -205,8 +204,9 @@ def findCommonMisspellings ( textSample, req='count' ):
 
				     wordList = textSample.split(" ")
			
 
				     commonMisspellingsInSample = []
			
 
				     for word in wordList:
			
 
				-        if commonMisspellings.check(word):
			
 
				-            commonMisspellingsInSample.append(word)
			
 
				+        if len(word)>0: 
			
 
				+            if commonMisspellings.check(word):
			
 
				+                commonMisspellingsInSample.append(word)
			
 
				     #only proceed if the list is not empty
			
 
				     if commonMisspellingsInSample == []:
			
 
				         return
			
@@ -240,6 +240,17 @@ class textData(object):
 
				         self.linebreakratio = float(0)
			
 
				         self.repeatwordcounts = {}
			
 
				         self.commonmisspellingscounts = {}
			
 
				+    def fill(self, textSample):
			
 
				+        self.strippedSample = stripData(textSample)
			
 
				+        self.samplewordcount = countWordsIn(textSample)
			
 
				+        self.rawlexicaldesnity = calculateLexicalDensity(textSample)
			
 
				+        self.strippedlexicaldensity = calculateLexicalDensity(self.strippedSample)
			
 
				+        self.lowercasepercentage = checkStructureMarkers(textSample,'lowercase')
			
 
				+        self.doublespacingratio = checkStructureMarkers(textSample,'doublespace')
			
 
				+        self.unusualspacingratio = checkStructureMarkers(textSample,'unusualspacing')
			
 
				+        self.linebreakratio = checkStructureMarkers(textSample,'linebreak')
			
 
				+        self.repeatwordcounts = findRepeatWords(self.strippedSample, 2)
			
 
				+        self.commonmisspellingscounts = findCommonMisspellings(self.strippedSample, 'count')
			
 
				                    
			
 
				 if __name__ == '__main__':
			
 
				     print "Idiolectalyzer 0.1\n\n"
			
--- a/tests/test_idiolectalyzer.py
+++ b/tests/test_idiolectalyzer.py
@@ -131,14 +131,20 @@ class testTextAnalysis(unittest.TestCase):
 
				         self.assertEqual(expectedFrequency,frequency)
			
 
				         
			
 
				     def testTextDataObjectCreate(self):
			
 
				-        testTextFile="mockdata/251words.txt"
			
 
				+        testTextFile="mockdata/lotsofpunctuation.txt"
			
 
				         testText = readTestText(testTextFile)
			
 
				         strippedText = idiolectalyzer.stripData(testText)
			
 
				         testDataObject = idiolectalyzer.textData()
			
 
				-        testDataObject.samplewordcount = idiolectalyzer.countWordsIn(testText)
			
 
				-        testDataObject.rawlexicaldensity = idiolectalyzer.calculateLexicalDensity(testText)
			
 
				-        testDataObject.strippedlexicaldensity = idiolectalyzer.calculateLexicalDensity(strippedText)
			
 
				-        
			
 
				-    
			
 
				+        testDataObject.fill(testText)
			
 
				+        print testDataObject.samplewordcount
			
 
				+        print testDataObject.rawlexicaldensity
			
 
				+        print testDataObject.strippedlexicaldensity
			
 
				+        print testDataObject.lowercasepercentage
			
 
				+        print testDataObject.doublespacingratio
			
 
				+        print testDataObject.unusualspacingratio
			
 
				+        print testDataObject.linebreakratio
			
 
				+        print testDataObject.repeatwordcounts
			
 
				+        print testDataObject.commonmisspellingscounts
			
 
				+                  
			
 
				 if __name__ == '__main__':
			
 
				     unittest.main()
作者	SHA1 メッセージ	日付
Laura Stewart	6e7107b490 Being sure not to pass pyenchant empty strings.	8 年前
Laura Stewart	ed6622470c Removing print which was used for debutting purposes.	8 年前