Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

""" 

Write a poem based on themes and keywords 

""" 

 

import random 

import sys 

import re 

from nltk import pos_tag, word_tokenize 

from nltk.corpus import wordnet as wn 

from nltk.wsd import lesk 

import hunspell 

import pyphen 

from metaphone import doublemetaphone 

#from pattern import sentiment, parsetree 

from pattern.en import sentiment, parsetree 

from pattern.web import plaintext 

from pattern.web import Twitter, Bing 

#from pattern import search 

from pattern.search import search 

 

def get_vocabulary_from_theme(theme): 

    """ Get vocabulary from themes, themes being words 

        Actually get lines not individual words """ 

    vocabulary = [] 

    twitter = Twitter(language='en') 

    metaphor_patterns = ["is like", "feels like", "is more important than"] 

    for metaphor_source in metaphor_patterns: 

        for tweet in twitter.search('"' + theme + " " + metaphor_source + 

                                    '"', cached=False): 

            cleaned_tweet = re.sub("RT", "", 

                                   plaintext(tweet.text).encode('ascii', 'ignore')) 

            cleaned_tweet = re.sub(r"@\w+", "", cleaned_tweet) 

            vocabulary.append(cleaned_tweet) 

 

        result = Bing().search(theme + " " + metaphor_source, start=1, count=50) 

        for searchresult in result: 

            s = searchresult.text.lower() 

            s = plaintext(s) 

            s = parsetree(s) 

            p = '{NP} ' + metaphor_source + '{NP}' 

            for m in search(p, s): 

                vocabulary.append(m.group(2).string) 

 

    return list(set(vocabulary)) 

# http://www.clips.ua.ac.be/pages/pattern-search 

#get related vocabulary from theme : Pattern 

 

def get_disambiguation(phrase, word): 

#word-sense disembiguation in Python 

# https://github.com/alvations/pywsd 

    disambiguated = lesk(context_sentence=phrase, ambiguous_word=word) 

    return disambiguated.definition() 

 

def find_synonyms(word): 

    """ Find synonyms or lemma from WordNet 

        """ 

    synonyms = [] 

    for ss in wn.synsets(word): 

        synonyms.append(ss.name().split(".")[0].replace('_', ' ')) 

        for sim in ss.similar_tos(): 

            synonyms.append(sim.name().split(".")[0].replace('_', ' ')) 

    return list(set(synonyms)) 

# http://stackoverflow.com/questions/5534926/to-find-synonyms-defintions-and-example-sentences-using-wordnet 

 

def count_syllables(phrase): 

    """ Count syllables in a piece of text 

        Not very reliable 

        """ 

#could syllables : Pyphen 

    dic = pyphen.Pyphen(lang='en_US') 

    sentence = dic.inserted(phrase) 

    return len(re.findall(r"[\w']+", sentence)) 

#from nltk_contrib.readability.textanalyzer import syllables_en 

#print syllables_en.count("potatoes ") 

# http://image.slidesharecdn.com/nltk-110105180423-phpapp02/95/nltk-natural-language-processing-in-python-22-728.jpg?cb=1309726267 

# using PyPhen instead 

 

def words_rhymes(word1, word2): 

#detect rhymes : Metaphone 

    word1_a, word1_b = doublemetaphone(word1) 

    word2_a, word2_b = doublemetaphone(word2) 

    return word1_a[-1] == word2_a[-1] 

    # does not actually use the 2nd value... could use metaphone simple instead? 

 

def syllables_matching_words_from_list(list_of_words, word): 

    """ Find words from a list with the same number of syllables than a given word 

        """ 

    result = [] 

    for candidate_word in list_of_words: 

        if count_syllables(candidate_word) == count_syllables(word): 

            result.append(candidate_word) 

    return result 

 

def rhyming_words_from_list(list_of_words, word): 

    """ 

        Find words from a list rhyming with a given word 

        """ 

    result = [] 

    for candidate_word in list_of_words: 

        if words_rhymes(candidate_word, word): 

            result.append(candidate_word) 

    return result 

 

def spellcheck(text): 

#spell check : Hunspell 

    potential_mistakes = {} 

    hobj = hunspell.HunSpell('/usr/share/hunspell/en_US.dic', '/usr/share/hunspell/en_US.aff') 

    for word in re.findall(r"[\w']+", text): 

        if not hobj.spell(word): 

            potential_mistakes[word] = hobj.suggest(word) 

    return potential_mistakes 

 

def estimate_sentiment(text): 

#sentiment analysis : Pattern sentiment() 

    return sentiment(text) 

    #useless so far, just a personal warpper 

    # for meaning of values see http://www.clips.ua.ac.be/pages/pattern-en#sentiment 

 

def replace_tag(text, tag, replacement): 

    #replace the first found tag 

    #print replace_tag("I want to eat an apple.", "NN", "orange") 

    tokenized_sent = word_tokenize(text) 

    pos_tagged = pos_tag(tokenized_sent) 

    for candidate_tag in pos_tagged: 

        # print t 

        if tag == candidate_tag[-1]: 

            return re.sub(candidate_tag[0], replacement, text) 

    return text 

 

def respect_structure(text, structure): 

    # text is not a string but a list of verses 

    if not len(text) == len(structure): 

        print 'failed number of lines' 

        return False 

    for i, line in enumerate(text): 

        if not count_syllables(line) == structure[i][0]: 

                    # should be able to handle None too when syllable count does not matter 

            print 'failed syllables' 

            print i 

            return False 

        if not structure[i][1] == None: 

            if not words_rhymes(line, text[structure[i][1]]): 

                                # redundant test at corresponding later line 

                print 'failed rhyming' 

                print i 

                return False 

    return True 

    # poor return value, unable to tell what failed 

 

#should become a grammar respecting function 

def respect_grammar(text, grammar): 

#for grammatical rules 

    tokenized_sent = word_tokenize(text) 

    pos_tagged = pos_tag(tokenized_sent) 

    testinggrammar = [] 

    for t in pos_tagged: 

        testinggrammar.append(t[-1]) 

    return testinggrammar == grammar 

    # http://image.slidesharecdn.com/nltk-110105180423-phpapp02/95/nltk-natural-language-processing-in-python-22-728.jpg?cb=1309726267 

 

if __name__ == '__main__': 

    testwords = [] 

    for arg in sys.argv[1:]: 

        testwords.append(arg) 

 

    themes = ['Valentines', 'Relationship'] 

    nicknames = ['gingersnap', 'Beau bear'] 

    names = ['Alison', 'Beau'] 

    themes += ['Thirteenth'] 

 

    themes = ['bed', 'under', 'quest'] 

    names = ['Fabien'] 

    nicknames = ['Miss Moles'] 

 

    # include nicknames as potential synonym for each name 

    print 'generating for :' 

    print 'themes' + str(themes) 

    print 'names' + str(names) + ' aka ' + str(nicknames) 

    print '-------------------------------------------------' 

 

    print "test words from the command line:" 

 

    structures = {} 

    structures['haiku3'] = [(5, None), (7, None), (5, None)] 

    #structures['rhymetest'] = [(2,1),(2,0)] 

                                # redundant test 

    # structures have : 

    #   metres or syllables 

    #   stanza or lines or verses 

    #   rhyme_form (e.g. current line rhymes with line 3) 

    #               Python indexing, line 1 is in fact index 0 

    # WARNING : better overdefine multiples versions of one structure 

    # than handle strange open cases! 

    #           no free poetry but N different lines with N different rhymes 

    #           kaiku3 haiku5 haiku7 for the different number of verses 

    #           etc 

 

 

    vocabulary_from_theme = {} 

    for current_theme in themes: 

        vocabulary_from_theme[current_theme] = get_vocabulary_from_theme(current_theme) 

    #print 'vocabulary_from_theme' + str(vocabulary_from_theme) 

 

    poems = [] 

    for structure in structures: 

        poem = [] 

        pickedthemes = [] 

        print structure 

        print "theoretical number of lines = " + str(len(structures[structure])) 

 

        syllables = 0 

        for line in structures[structure]: 

            syllables += line[0] 

        print "theoretical number of syllables = " + str(syllables) 

 

        # looping to adjust until 

            # change to synonyms, names and nicknames 

            # rhymes work 

            # syllable count work 

            # splitting in appropriate lines  

        for i in range(1, 30): 

            picked_text = random.choice(vocabulary_from_theme[random.choice(themes)]) 

            picked_text = replace_tag(picked_text, "NNS", random.choice(names)) 

            picked_text = replace_tag(picked_text, "NN", random.choice(nicknames)) 

            if count_syllables(picked_text) < syllables: 

                if count_syllables(picked_text) < syllables + 3: 

                    picked_text += random.choice(vocabulary_from_theme[random.choice(themes)]) 

                    picked_text = replace_tag(picked_text, "NNS", random.choice(names)) 

                else: 

                    print "should get a synonym for a name or adjective (JJ*)" 

                    print "but not implemented yet" 

                    exit() 

 

            # cut in 5 7 5 

            #print picked_text 

            dic = pyphen.Pyphen(lang='en_US') 

            sentence = dic.inserted(picked_text) 

            #print sentence 

            allsyllables = re.findall(r"[\w']+", sentence) 

            #print allsyllables 

            #exit() 

 

            poem_by_line = [] 

            for line in structures[structure]: 

                syllables_added = 0 

                newline = "" 

                while syllables_added <= line[0] and len(allsyllables) > 0: 

                    #print allsyllables 

                    ending_syllable = allsyllables.pop(0) 

                    newline += ending_syllable  + " " 

                    syllables_added += 1 

                poem_cut = re.sub(ending_syllable+"?", ending_syllable+"\n", 

                                  picked_text) 

                poem_by_line.append(newline) 

            #print picked_text 

            print poem_by_line 

            #print poem_cut  

            #not working well so far 

        exit() 

 

        for line_number in range(0, len(structures[structure])): 

            #should pop themes to insure diversity 

            picked_text = random.choice(vocabulary_from_theme[random.choice(themes)]) 

            if line_number > 0: 

                if not structures[structure][line_number][-1] == None: 

                    print "should check if it rhymes" 

            #should pop names and nicknames to insure diversity 

            picked_text = replace_tag(picked_text, "NNS", random.choice(names)) 

            if not structures[structure][line_number][0] == None: 

                print "syllables ok: " + str(count_syllables(picked_text) == structures[structure][line_number][0]) 

            poem.append(picked_text) 

        for line in poem: 

            print line 

 

    #https://github.com/lekhakpadmanabh/Summarizer 

    #summarize a text that is too long 

 

    #Named Entity Recognition (NEs) in http://www.nltk.org/book/ch07.html