|
@@ -10,6 +10,7 @@ import re
|
|
|
import os
|
|
|
import collections
|
|
|
import string
|
|
|
+import fileinput
|
|
|
localpath = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
def countWordsIn( textSample ):
|
|
@@ -123,12 +124,27 @@ def findRepeatwords ( findRepeatsSample ):
|
|
|
print "Finding repeat words not yet implimented"
|
|
|
return
|
|
|
|
|
|
-def howCommonIs ( word, context='all_google' ):
|
|
|
- #this will become dependant on context.
|
|
|
- frequencyFile = localpath+"/wordLists/frequency_allgooglengrams.txt"
|
|
|
- print frequencyFile
|
|
|
- #rankedWordList = enchant.request_pwl_dict()
|
|
|
- print "Finding the use rank of words not yet implimented"
|
|
|
+def howCommonIs ( word, context='all_google' ):
|
|
|
+ if (context == 'all_google'):
|
|
|
+ frequencyFile = localpath+"/wordLists/frequency_allgooglengrams.txt"
|
|
|
+ elif (context == 'google_1965'):
|
|
|
+ frequencyFile = localpath+"/wordLists/frequency_googlengramssince1965.txt"
|
|
|
+ else:
|
|
|
+ print "Invalid context. Available options: all_google, google_1965"
|
|
|
+ return False
|
|
|
+
|
|
|
+ rankedWordList = enchant.request_pwl_dict(frequencyFile)
|
|
|
+ if (rankedWordList.check(word)):
|
|
|
+ wordRank = 0
|
|
|
+ for line in fileinput.input(frequencyFile):
|
|
|
+ currentLine = line.strip()
|
|
|
+ if (word==currentLine):
|
|
|
+ rank=fileinput.filelineno()
|
|
|
+ fileinput.close()
|
|
|
+ return rank
|
|
|
+ else:
|
|
|
+ return 'unique'
|
|
|
+ print "I should not be printed"
|
|
|
return
|
|
|
|
|
|
def findCommonMisspellings ( textSample, req='none' ):
|
|
@@ -158,10 +174,9 @@ def findCommonMisspellings ( textSample, req='none' ):
|
|
|
#should be raise error
|
|
|
return 'invalid req argument (list|count|none)'
|
|
|
return
|
|
|
-
|
|
|
-print "Idiolectalyzer 0.1\n"
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
+ print "Idiolectalyzer 0.1\n"
|
|
|
|
|
|
textSample1 = getTextSample()
|
|
|
#textSample2 = getTextSample()
|