diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..65605d3 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +#Makefile + +EXECUTABLE := __init__ + +SOURCES := *.py + +EXT := py +CC := python + +0: + $(CC) $(SOURCES) + $(CC) $(EXECUTABLE).$(EXT) 0 + +1: + $(CC) $(SOURCES) + $(CC) $(EXECUTABLE).$(EXT) 1 + + +# this line required by make - don't delete diff --git a/README.md b/README.md index a002203..00873f8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ -# ReadMe-bot - -a bot to check github readme for spelling and grammer errors and create a pull request with fixes and details. +# ReadMe-bot +a bot to check github readme for spelling and grammer errors and create a pull request with fixes and details. + + +Spell-Check +=========== + +Spell Checker in Python + +Use +---- +Cloning and Running Program +cd Spell-Check +make 0 or make 1 + +Removing .pyc files if needed +
make realclean
+ +Note: When using word generated mistakes, reoccuring words or letters may appear. Cause being that random numbers aren't always completely random when generated reoccuringly. + + +# Contributors +Manas-kashyap +Xeon-xolt \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..f0601de --- /dev/null +++ b/__init__.py @@ -0,0 +1,11 @@ +from spellcheck import * +import sys + +def main(): + spellchk = SpellCheck('/usr/share/dict/words') + spellchk.run(sys.argv[1]) + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/misspell.py b/misspell.py new file mode 100644 index 0000000..e195e76 --- /dev/null +++ b/misspell.py @@ -0,0 +1,49 @@ +import re, random +class Misspell: + #Give list of words to mispell + def __init__(self, wordList): + self.wList = wordList + + def genWord(self): + return self.misspelled(self.wList[random.randint(0,len(self.wList)-1)]) + + def misspelled(self, word): + if len(word) == 1: + return word + vowels = 'aeiouy' + consonants = 'bcdfghjklmnpqrstvwxyz' + if len(word) < 9: + mistakes = 1 + elif len(word) < 12: + mistakes = 2 + elif len(word) < 17: + mistakes = 3 + else: + mistakes = 4 + newWord = word[0] + prev = word[0] + for i in word[1:]: + if mistakes != 0: + rNum = random.randint(1,10) + else: + rNum = 5 + if rNum == 2: + newWord = newWord[:len(newWord)-2] + i + prev + elif rNum == 3: + if i in vowels: + c = vowels[random.randint(0, len(vowels)-1)] + while i == c: + c = vowels[random.randint(0, len(vowels)-1)] + else: + c = i + newWord += c + elif rNum == 4: + newWord += i + i + else: + newWord += i + prev = i + mistakes -= 1 + if newWord == word: + newWord += prev + return newWord + diff --git a/misspell.pyc b/misspell.pyc new file mode 100644 index 0000000..ed8cf0f Binary files /dev/null and b/misspell.pyc differ diff --git a/spellcheck.py b/spellcheck.py new file mode 100755 index 0000000..8f21c86 --- /dev/null +++ b/spellcheck.py @@ -0,0 +1,73 @@ +import re, collections, sys +from misspell import Misspell +class SpellCheck: + + alphabet = 'abcdefghijklmnopqrstuvwxyz' + + def __init__(self, path): + self.dictPath = path + + def words(self, text): + return re.findall('[a-z]+', text.lower()) + + def train(self, words): + occurences = {} + for l in self.alphabet: + occurences[l] = collections.defaultdict(lambda: 1) + for w in words: + occurences[w[0]][w] += 1 #Incrementing occurence of word + return occurences + + def edits1(self, word): + splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] + deletes = [a + b[1:] for a, b in splits if b] + transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1] + replaces = [a + c + b[1:] for a, b in splits for c in self.alphabet if b] + inserts = [a + c + b for a, b in splits for c in self.alphabet] + return set(deletes + transposes + replaces + inserts) + + def known_edits2(self, word, wDict): + return set(e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if e2 in wDict) + + def known(self, word, wDict): + return set(w for w in word if w in wDict) + + def correct(self, word, wDict): + candidates = self.known([word], wDict[word[0]]) or self.known(self.edits1(word), wDict[word[0]]) or self.known_edits2(word, wDict[word[0]]) or [word] + return max(candidates, key=wDict.get) # returning the element of the set with the highest probability of being the correct word + + + + def run(self, option): + lWords = self.words(file(self.dictPath).read()) + try: + if option == '0': + lWords = self.train(lWords) + while True: + word = raw_input('>') + if not word.isalpha(): + continue + spellchk = self.correct(word.lower(), lWords) + if spellchk == word and spellchk not in lWords[word[0]]: + print 'NO SUGGESTION' + else: + print spellchk + print #'\n' + elif option == '1': + misspell = Misspell(lWords) + lWords = self.train(lWords) + while True: + word = misspell.genWord() + print 'Incorrect -', word + spellchk = self.correct(word, lWords) + if spellchk == word and spellchk not in lWords[word[0]]: + print 'NO SUGGESTION' + else: + print 'Correct -',spellchk + print #'\n' + raw_input('\n') + except KeyboardInterrupt: + + 'exit' + except EOFError: + 'exit' diff --git a/spellcheck.pyc b/spellcheck.pyc new file mode 100644 index 0000000..0c21ecb Binary files /dev/null and b/spellcheck.pyc differ