|
@@ -112,7 +112,6 @@ def countFunctionWords ( textSample ):
|
|
|
functionWords = enchant.request_pwl_dict((localpath+"/wordLists/englishFunctionWords.txt"))
|
|
|
wordList = textSample.split(" ")
|
|
|
wordList = filter(None, wordList) #remove any empty strings from list. Have made this unecessary in my fork of PyEnchant
|
|
|
- print wordList
|
|
|
functionWordCount = 0
|
|
|
for word in wordList:
|
|
|
if functionWords.check(word):
|
|
@@ -205,8 +204,9 @@ def findCommonMisspellings ( textSample, req='count' ):
|
|
|
wordList = textSample.split(" ")
|
|
|
commonMisspellingsInSample = []
|
|
|
for word in wordList:
|
|
|
- if commonMisspellings.check(word):
|
|
|
- commonMisspellingsInSample.append(word)
|
|
|
+ if len(word)>0:
|
|
|
+ if commonMisspellings.check(word):
|
|
|
+ commonMisspellingsInSample.append(word)
|
|
|
#only proceed if the list is not empty
|
|
|
if commonMisspellingsInSample == []:
|
|
|
return
|
|
@@ -240,6 +240,17 @@ class textData(object):
|
|
|
self.linebreakratio = float(0)
|
|
|
self.repeatwordcounts = {}
|
|
|
self.commonmisspellingscounts = {}
|
|
|
+ def fill(self, textSample):
|
|
|
+ self.strippedSample = stripData(textSample)
|
|
|
+ self.samplewordcount = countWordsIn(textSample)
|
|
|
+ self.rawlexicaldesnity = calculateLexicalDensity(textSample)
|
|
|
+ self.strippedlexicaldensity = calculateLexicalDensity(self.strippedSample)
|
|
|
+ self.lowercasepercentage = checkStructureMarkers(textSample,'lowercase')
|
|
|
+ self.doublespacingratio = checkStructureMarkers(textSample,'doublespace')
|
|
|
+ self.unusualspacingratio = checkStructureMarkers(textSample,'unusualspacing')
|
|
|
+ self.linebreakratio = checkStructureMarkers(textSample,'linebreak')
|
|
|
+ self.repeatwordcounts = findRepeatWords(self.strippedSample, 2)
|
|
|
+ self.commonmisspellingscounts = findCommonMisspellings(self.strippedSample, 'count')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
print "Idiolectalyzer 0.1\n\n"
|