SHA1
--- a/idiolectalyzer.py
+++ b/idiolectalyzer.py
@@ -9,25 +9,28 @@ Idiolectalyzer 0.1
 
				 import enchant
			
 
				 import re
			
 
				 import os
			
 
				-localpath = os.path.dirname(os.path.realpath(__file__))
			
 
				+localpath = os.path.dirname(os.path.realpath(__file__)) 
			
 
				+#from idiolectalyzerclasses import *
			
 
				 
			
 
				-def countWordsIn( countWordsSample ):
			
 
				-    a = len( countWordsSample.split() )
			
 
				+def countWordsIn( textSample ):
			
 
				+    a = len( textSample.split() )
			
 
				     return a
			
 
				 
			
 
				-def checkWordCount ( checkCountSample ):
			
 
				+def checkWordCount ( textSample ):
			
 
				     goodCount = 0
			
 
				-    sampleWordcount = countWordsIn(checkCountSample)
			
 
				+    sampleWordcount = countWordsIn(textSample)
			
 
				     if sampleWordcount > 1000:
			
 
				         goodCount = 1
			
 
				     return goodCount
			
 
				 
			
 
				-def stripData( stripSample ):
			
 
				+def stripData( textSample ):
			
 
				     #Join elements to string if necessary
			
 
				-    stripSample = ''.join(stripSample)
			
 
				-    stripSample = stripSample.replace("\n",' ')
			
 
				-    stripSample = re.sub('  +',' ', stripSample)
			
 
				-    return stripSample
			
 
				+    textSample = ''.join(textSample)
			
 
				+    textSample = textSample.replace("\n",' ')
			
 
				+    textSample = re.sub('  +',' ', textSample)
			
 
				+    pattern = re.compile('[\W ]+')
			
 
				+    toAlphanumeric=pattern.sub(' ', textSample)
			
 
				+    return toAlphanumeric
			
 
				 
			
 
				 def getTextSample():
			
 
				     done = 0
			
@@ -54,19 +57,19 @@ def checkStructureMarkers(textSample):
 
				     lineBreakCount = textSample.count('\n')
			
 
				     return
			
 
				 
			
 
				-def countFunctionWords ( functionWordsSample ):
			
 
				+def countFunctionWords ( textSample ):
			
 
				     functionWords = enchant.request_pwl_dict((localpath+"/wordLists/englishFunctionWords.txt"))
			
 
				-    wordList = functionWordsSample.split(" ")
			
 
				+    wordList = textSample.split(" ")
			
 
				     functionWordCount = 0
			
 
				     for word in wordList:
			
 
				         if functionWords.check(word.strip()):
			
 
				             functionWordCount +=1
			
 
				     return functionWordCount
			
 
				 
			
 
				-def calculateLexicalDensity( lexicalSample ):
			
 
				-    functionWordCount = countFunctionWords( lexicalSample )
			
 
				+def calculateLexicalDensity( textSample ):
			
 
				+    functionWordCount = countFunctionWords( textSample )
			
 
				     print "functionwordcount", functionWordCount
			
 
				-    totalWordCount = countWordsIn( lexicalSample )
			
 
				+    totalWordCount = countWordsIn( textSample )
			
 
				     print "totalwordcount", totalWordCount
			
 
				     rawLexicalDensity = ((totalWordCount-functionWordCount)*100/totalWordCount)
			
 
				     print "rawlexicaldensity", rawLexicalDensity
			
@@ -84,8 +87,13 @@ def howCommonIs ( commonIsSample ):
 
				     print "Finding the use rank of words not yet implimented"
			
 
				     return
			
 
				 
			
 
				-def findCommonMispellings ( commonMisspellingsSample ):
			
 
				-    print "Identifying commonly misspelled words in string not yet implimented"
			
 
				+def findCommonMisspellings ( textSample ):
			
 
				+    commonMisspellings = enchant.request_pwl_dict((localpath+"/wordLists/commonMisspellingsOxford"))
			
 
				+    wordList = textSample.split(" ")
			
 
				+    containsCommonMisspelling = 0
			
 
				+    for word in wordList:
			
 
				+        if commonMisspellings.check(word.strip()):
			
 
				+            print word
			
 
				     return
			
 
				     
			
 
				 print "Idiolectalyzer 0.1\n"
			
--- a/tests/mockdata/withspellingerrors.txt
+++ b/tests/mockdata/withspellingerrors.txt
@@ -0,0 +1,3 @@
 
				+I need to get out agression with an assasination attempt on my chauffer. 
			
 
				+
			
 
				+That might seem bizzare, but mind your own buisness.
			
--- a/tests/test_idiolectalyzer.py
+++ b/tests/test_idiolectalyzer.py
@@ -32,8 +32,15 @@ class testTextAnalysis(unittest.TestCase):
 
				         
			
 
				         lineBreaks = testText.count('\n')
			
 
				         doubleSpaces = len(re.findall('  [^ ]', testText))
			
 
				+        commas = testText.count('\.')
			
 
				         self.assertEqual(lineBreaks,0)
			
 
				         self.assertEqual(doubleSpaces,0)
			
 
				+        self.assertEqual(commas,0)
			
 
				+    
			
 
				+    def testCommonMisspellings(self):
			
 
				+        testTextFile="mockdata/withspellingerrors.txt"
			
 
				+        testText = readTestText(testTextFile)
			
 
				+        idiolectalyzer.findCommonMisspellings(testText)
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     unittest.main()
Autor	SHA1 Mensagem	Data
Laura Stewart	6143e13ebf Test additional data stripping	8 anos atrás
Laura Stewart	7e81152a3d Data file with common spelling errors.	8 anos atrás
Laura Stewart	fc87657d9b Refactoring unecessary complicated names	8 anos atrás