diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..65605d3 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +#Makefile + +EXECUTABLE := __init__ + +SOURCES := *.py + +EXT := py +CC := python + +0: + $(CC) $(SOURCES) + $(CC) $(EXECUTABLE).$(EXT) 0 + +1: + $(CC) $(SOURCES) + $(CC) $(EXECUTABLE).$(EXT) 1 + + +# this line required by make - don't delete diff --git a/README.md b/README.md index a002203..00873f8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ -# ReadMe-bot - -a bot to check github readme for spelling and grammer errors and create a pull request with fixes and details. +# ReadMe-bot +a bot to check github readme for spelling and grammer errors and create a pull request with fixes and details. + + +Spell-Check +=========== + +Spell Checker in Python + +Use +---- +Cloning and Running Program +cd Spell-Check +make 0 or make 1 + +Removing .pyc files if needed +
make realclean
+
+Note: When using word generated mistakes, reoccuring words or letters may appear. Cause being that random numbers aren't always completely random when generated reoccuringly.
+
+
+# Contributors
+Manas-kashyap
+Xeon-xolt
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..f0601de
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,11 @@
+from spellcheck import *
+import sys
+
+def main():
+ spellchk = SpellCheck('/usr/share/dict/words')
+ spellchk.run(sys.argv[1])
+
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/misspell.py b/misspell.py
new file mode 100644
index 0000000..e195e76
--- /dev/null
+++ b/misspell.py
@@ -0,0 +1,49 @@
+import re, random
+class Misspell:
+ #Give list of words to mispell
+ def __init__(self, wordList):
+ self.wList = wordList
+
+ def genWord(self):
+ return self.misspelled(self.wList[random.randint(0,len(self.wList)-1)])
+
+ def misspelled(self, word):
+ if len(word) == 1:
+ return word
+ vowels = 'aeiouy'
+ consonants = 'bcdfghjklmnpqrstvwxyz'
+ if len(word) < 9:
+ mistakes = 1
+ elif len(word) < 12:
+ mistakes = 2
+ elif len(word) < 17:
+ mistakes = 3
+ else:
+ mistakes = 4
+ newWord = word[0]
+ prev = word[0]
+ for i in word[1:]:
+ if mistakes != 0:
+ rNum = random.randint(1,10)
+ else:
+ rNum = 5
+ if rNum == 2:
+ newWord = newWord[:len(newWord)-2] + i + prev
+ elif rNum == 3:
+ if i in vowels:
+ c = vowels[random.randint(0, len(vowels)-1)]
+ while i == c:
+ c = vowels[random.randint(0, len(vowels)-1)]
+ else:
+ c = i
+ newWord += c
+ elif rNum == 4:
+ newWord += i + i
+ else:
+ newWord += i
+ prev = i
+ mistakes -= 1
+ if newWord == word:
+ newWord += prev
+ return newWord
+
diff --git a/misspell.pyc b/misspell.pyc
new file mode 100644
index 0000000..ed8cf0f
Binary files /dev/null and b/misspell.pyc differ
diff --git a/spellcheck.py b/spellcheck.py
new file mode 100755
index 0000000..8f21c86
--- /dev/null
+++ b/spellcheck.py
@@ -0,0 +1,73 @@
+import re, collections, sys
+from misspell import Misspell
+class SpellCheck:
+
+ alphabet = 'abcdefghijklmnopqrstuvwxyz'
+
+ def __init__(self, path):
+ self.dictPath = path
+
+ def words(self, text):
+ return re.findall('[a-z]+', text.lower())
+
+ def train(self, words):
+ occurences = {}
+ for l in self.alphabet:
+ occurences[l] = collections.defaultdict(lambda: 1)
+ for w in words:
+ occurences[w[0]][w] += 1 #Incrementing occurence of word
+ return occurences
+
+ def edits1(self, word):
+ splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
+ deletes = [a + b[1:] for a, b in splits if b]
+ transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
+ replaces = [a + c + b[1:] for a, b in splits for c in self.alphabet if b]
+ inserts = [a + c + b for a, b in splits for c in self.alphabet]
+ return set(deletes + transposes + replaces + inserts)
+
+ def known_edits2(self, word, wDict):
+ return set(e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if e2 in wDict)
+
+ def known(self, word, wDict):
+ return set(w for w in word if w in wDict)
+
+ def correct(self, word, wDict):
+ candidates = self.known([word], wDict[word[0]]) or self.known(self.edits1(word), wDict[word[0]]) or self.known_edits2(word, wDict[word[0]]) or [word]
+ return max(candidates, key=wDict.get) # returning the element of the set with the highest probability of being the correct word
+
+
+
+ def run(self, option):
+ lWords = self.words(file(self.dictPath).read())
+ try:
+ if option == '0':
+ lWords = self.train(lWords)
+ while True:
+ word = raw_input('>')
+ if not word.isalpha():
+ continue
+ spellchk = self.correct(word.lower(), lWords)
+ if spellchk == word and spellchk not in lWords[word[0]]:
+ print 'NO SUGGESTION'
+ else:
+ print spellchk
+ print #'\n'
+ elif option == '1':
+ misspell = Misspell(lWords)
+ lWords = self.train(lWords)
+ while True:
+ word = misspell.genWord()
+ print 'Incorrect -', word
+ spellchk = self.correct(word, lWords)
+ if spellchk == word and spellchk not in lWords[word[0]]:
+ print 'NO SUGGESTION'
+ else:
+ print 'Correct -',spellchk
+ print #'\n'
+ raw_input('