#!/usr/bin/python """ Lexicon: A class for dealing with NIPyLL lexicons. Lexicons can be created, primed from file, loaded from and saved to file. Sentences can be added. Random sentences can be generated according to the follow rules recorded by the lexicon. Depends on NIPyLL Sentence class. Includes self-test. You are free to use, modify or distribute this program under the terms of the GNU General Public License: http://www.gnu.org/copyleft/gpl.html Author: Chris Reece See also: http://www.jessies.org/~car/projects/nipyll/ """ import random import re import Sentence class Lexicon: fileName = '' followSets = {'_start': ['hello'], 'hello': ['_end']} followSetsRE = re.compile('^\s*followSets\s*=\s*{') primed = False sentenceDelimiterRE = re.compile ('[.!?]+') def __init__(self, fileName = None): self.fileName = fileName self.followSets = Lexicon.followSets self.primed = False self.prime(fileName) def __del__(self): self.save() def __str__(self): return str(self.followSets) def __repr__(self): return 'followSets = ' + str(self.followSets) def load(self, fileName = None): if fileName == None: fileName = self.fileName if not fileName == None: for line in open(fileName, 'r'): if Lexicon.followSetsRE.match(line): exec line in self.__dict__ break else: raise IOError def prime(self, fileName): if fileName == None: return if not self.primed: try: self.load(fileName) except IOError: pass else: self.primed = True def save(self, fileName = None): if fileName == None: fileName = self.fileName if not fileName == None: outFile = open(fileName, 'w') outFile.write(repr(self) + '\n') outFile.close() def talk(self): sentence = Sentence.Sentence() lastWord = '_start' while True: # mend broken data structure on the fly if not lastWord in self.followSets: self.followSets[lastWord] = ['_end'] nextWord = random.choice(self.followSets[lastWord]) if nextWord == '_end': break sentence += nextWord lastWord = nextWord return sentence def listen(self, input): for text in Lexicon.sentenceDelimiterRE.split(input): sentence = Sentence.Sentence(text) lastWord = '_start' for nextWord in sentence: self.followSets[lastWord] = [nextWord] + self.followSets.get(lastWord, []) lastWord = nextWord # ignore the degenerate "no input" case if not lastWord == '_start': self.followSets[lastWord] = ['_end'] + self.followSets.get(lastWord, []) random.seed() if __name__ == '__main__': import os testFile = 'test.niplex' primeFile = 'parrot.niplex' for i in xrange(3): print 'SELF-TEST LOOP', i l = Lexicon(testFile) print '__class__\n\t', print l.__class__ print '__str__\n\t', print str(l) print '__repr__\n\t', print repr(l) print 'listen()\n\t', l.listen('This is a TEST!') print str(l) print 'talk()\n\t', print l.talk() print 'prime()\n\t', l.prime(primeFile) print str(l) del l os.remove(testFile)