逐渐的人工智能已走入了日常生活中了,从对联生成器到古诗生成器,从智能翻译到机器写作,有时候看UC头条的新闻,感觉很多逻辑混乱的文章,有可能就是机器写作的也可能是机器翻译出来的,不管怎么样,文本写作已经成为人工智能的一个重要场景了。

在《keras深度学习实战》书籍中有关于爱丽丝梦游仙境的例子,本文不仅仅是原封不动的还原代码,也包括自己对代码的理解和总结。虽然不成熟,起码也思考了,希望下个章节能够改进一下算法,或支持一下中文写作。

代码示例

  1. # -*- coding: utf-8 -*-

  2. from __future__ import print_function

  3. from keras.layers.recurrent import SimpleRNN

  4. from keras.models import Sequential

  5. from keras.layers import Dense, Activation

  6. import numpy as np

  7. #从网上获取爱丽丝梦游仙境文本

  8. INPUT_FILE = "alice.txt"

  9. # 抽取文件,并进行清洗

  10. print("Extracting text from input...")

  11. fin = open(INPUT_FILE, 'rb')

  12. lines = []

  13. for line in fin:

  14. line = line.strip().lower()

  15. line = line.decode("ascii", "ignore")

  16. if len(line) == 0:

  17. continue

  18. lines.append(line)

  19. fin.close()

  20. text = " ".join(lines)

  21. print(text)

  22. #爱丽丝梦游仙境文本格式如下:合计142773长度

  23. #chapter i down the rabbit-hole chapter i down the rabbit-hole alice was beginning to get very tired of sitting by h

  24. #142773

  25. # 创建查找表

  26. chars = set([c for c in text])

  27. #{'h', 'u', ';', 'e', 'q', 'j', '_', '`', 'z', 'd', 'v', 'x', "'", 'n', 'k', '(', '?', 'p', 'y', 'o', ')', 'c', '*', 'r', '!', 'i', 'w', '"', 'g', 't', 's', '-', 'l', ',', 'm', ':', 'a', '.', 'f', 'b', ' '}

  28. nb_chars = len(chars)

  29. #41

  30. #创建字符索引

  31. char2index = dict((c, i) for i, c in enumerate(chars))

  32. #{'h': 0, 'u': 1, ';': 2, 'e': 3, 'q': 4, 'j': 5, '_': 6, '`': 7, 'z': 8, 'd': 9, 'v': 10, 'x': 11, "'": 12, 'n': 13, 'k': 14, '(': 15, '?': 16, 'p': 17, 'y': 18, 'o': 19, ')': 20, 'c': 21, '*': 22, 'r': 23, '!': 24, 'i': 25, 'w': 26, '"': 27, 'g': 28, 't': 29, 's': 30, '-': 31, 'l': 32, ',': 33, 'm': 34, ':': 35, 'a': 36, '.': 37, 'f': 38, 'b': 39, ' ': 40}

  33. index2char = dict((i, c) for i, c in enumerate(chars))

  34. #{0: 'h', 1: 'u', 2: ';', 3: 'e', 4: 'q', 5: 'j', 6: '_', 7: '`', 8: 'z', 9: 'd', 10: 'v', 11: 'x', 12: "'", 13: 'n', 14: 'k', 15: '(', 16: '?', 17: 'p', 18: 'y', 19: 'o', 20: ')', 21: 'c', 22: '*', 23: 'r', 24: '!', 25: 'i', 26: 'w', 27: '"', 28: 'g', 29: 't', 30: 's', 31: '-', 32: 'l', 33: ',', 34: 'm', 35: ':', 36: 'a', 37: '.', 38: 'f', 39: 'b', 40: ' '}

  35. # 创建输入和标签文本

  36. # STEP变量给出字符数目,此处为1

  37. # SEQLEN定义文本段,此处为10

  38. # assuming an input text "The sky was falling", we would get the

  39. # following sequence of input_chars and label_chars (first 5 only)

  40. # The sky wa -> s

  41. # he sky was ->

  42. # e sky was -> f

  43. # sky was f -> a

  44. # sky was fa -> l

  45. print("Creating input and label text...")

  46. SEQLEN = 10

  47. STEP = 1

  48. input_chars = []

  49. label_chars = []

  50. for i in range(0, len(text) - SEQLEN, STEP):

  51. #print(text[i:i + SEQLEN],'~~~~~~~~~~',text[i + SEQLEN])

  52. #gging her ~~~~~~~~~~ s

  53. #ging her s ~~~~~~~~~~ h

  54. #ing her sh ~~~~~~~~~~ a

  55. input_chars.append(text[i:i + SEQLEN])

  56. label_chars.append(text[i + SEQLEN])

  57. #print('input_chars=',input_chars)

  58. #n the danc

  59. # the dance

  60. #the dance?

  61. #he dance?

  62. #print('label_chars=',label_chars)

  63. #d

  64. #o

  65. #w

  66. #n

  67. # 把输入和标签文本向量化

  68. # RNN输入的每行对应前面展示的一个输入文本

  69. # 输入中共有SEQLEN-10个字符

  70. # 字典大小由nb_chars-41给定,把每个字符表示成one-hot编码的大小为nb_chars的向量

  71. # 每行输入就是一个大小为(SEQLEN-10,nb_chars-41)的张量

  72. # 输出标签是一个单个的字符,所以和输入的每个字符表示类似

  73. # 输出标签表示为(nb_chars-41)的one-hot编码的向量

  74. print("Vectorizing input and label text...")

  75. X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool)

  76. # X形状为(142763, 10, 41),类型为bool,缺省值为False

  77. y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)

  78. # y形状为(142763, 41),类型为bool,缺省值为False

  79. #遍历每个单词

  80. for i, input_char in enumerate(input_chars):

  81. #遍历每个字符

  82. for j, ch in enumerate(input_char):

  83. #print(i,input_char,j,ch)

  84. #138004 course-- " 4 s

  85. #138004 course-- " 5 e

  86. #138004 course-- " 6 -

  87. #把对应的单词,字符和索引索引置为1

  88. X[i, j, char2index[ch]] = 1

  89. #把对应的单词和字符索引位置置为1

  90. y[i, char2index[label_chars[i]]] = 1

  91. #print(i,label_chars[i],char2index[label_chars[i]])

  92. #50011 h 37

  93. #50012 i 10

  94. #print('X=',X)

  95. #X= [[[False False False ... False False False]

  96. # ...

  97. # [False False False ... False False True]]

  98. #

  99. # [[False False False ... False False False]

  100. # ...

  101. # [False False False ... False False True]]

  102. # [[ True False False ... False False False]

  103. # ...

  104. # [False False True ... False False False]]]

  105. #print('y=',y)

  106. #y= [[False False False ... False False False]

  107. # [False False False ... False False False]

  108. # [False False False ... False False False]

  109. # ...

  110. # [False False False ... True False False]

  111. # [False False True ... False False False]

  112. # [False False False ... False False False]]

  113. #构建模型

  114. HIDDEN_SIZE = 128

  115. #BATCH_SIZE = 128

  116. #NUM_ITERATIONS = 25

  117. BATCH_SIZE = 128

  118. NUM_ITERATIONS = 50

  119. NUM_EPOCHS_PER_ITERATION = 1

  120. NUM_PREDS_PER_EPOCH = 100

  121. model = Sequential()

  122. #将RNN输出维度大小定义为128

  123. #选择值太小,模型不具有生成较好文本的有效容量,会看到重复字符或重复词组的长时运行

  124. #选择值太大,模型参数过多,需要很多数据才能有效训练

  125. model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,

  126. input_shape=(SEQLEN, nb_chars),

  127. unroll=True))

  128. #创建一个全连接dense层,dense层有nb_chars个单元,为字典中每个字符发出评分

  129. model.add(Dense(nb_chars))

  130. #全连接层激活函数是softmax,把分数标准化为概率,概率最高的字符即成为预测字符。

  131. model.add(Activation("softmax"))

  132. #用分类输出中出色的分类交叉熵函数作为损失函数

  133. model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

  134. # 一共迭代25

  135. for iteration in range(NUM_ITERATIONS):

  136. print("=" * 50)

  137. print("Iteration #: %d" % (iteration))

  138. model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)

  139. #model.fit(X, y, batch_size=142763, epochs=NUM_EPOCHS_PER_ITERATION)

  140. # 测试模型

  141. # 给定一个随机的输入

  142. test_idx = np.random.randint(len(input_chars))

  143. #从模型中生成一个字符

  144. test_chars = input_chars[test_idx]

  145. #test_chars = 'alice'

  146. #打印随机选出的种子字符

  147. print("Generating from seed: %s" % (test_chars))

  148. print('result=',test_chars, end="")

  149. #持续进行100次,生成并打印结果字符串,即生成100长度的字符串

  150. for i in range(NUM_PREDS_PER_EPOCH):

  151. Xtest = np.zeros((1, SEQLEN, nb_chars))

  152. for i, ch in enumerate(test_chars):

  153. Xtest[0, i, char2index[ch]] = 1

  154. #print('0, i, char2index[ch]]=',i,char2index[ch])

  155. #print(Xtest)

  156. pred = model.predict(Xtest, verbose=0)[0]

  157. ypred = index2char[np.argmax(pred)]

  158. print(ypred, end="")

  159. # move forward with test_chars + ypred

  160. test_chars = test_chars[1:] + ypred

  161. print()

  162. #BATCH_SIZE = 10000

  163. #NUM_ITERATIONS = 100

  164. #test_chars=random char

  165. # he lory, w

  166. # ck turtle, ao ho he oo ho he oo ho he oo ho he oo ho he oo ho he oo ho he oo ho he oo

  167. # ust begun the ao ae ao ae ao ae ao ae ao ae ao ae ao ae ao ae ao ae ao ae ao ae ao ae

  168. # next peep ao the ao the ao the ao the ao the ao the ao the ao the ao the ao the ao the ao the ao

  169. # g. `and ju the ao he ao he ao he ao he ao he ao he ao he ao he ao he ao he ao he ao he

  170. # den you ev the ao he ao ao he ao ao he ao ao he ao ao he ao ao he ao ao he ao ao he ao

  171. # gerly, for the the the the the the the the the the the the the the the the the the the the the the the the the

  172. # the rabbit an the he the the the the the the the the the the the the the the the the the the the the the the

  173. # queer ind the aid the sard the sard the sard the sard the sard the sard the sard the sard the sard the sard

  174. # ; but she said the said the said the said the said the said the said the said the said the said the said the s

  175. # y until it and the sare the sare the sare the sare the sare the sare the sare the sare the sare the sare the s

  176. # wn the lithe sout and the sout the was ing the are the sout the was ing the are the sout the was ing the ar

  177. # and quietling the said the said the said the said the said the said the said the said the said the said the sa

  178. # ers in the wast and the sare the sare the sare the sare the sare the sare the sare the sare the sare the sare

  179. # here, bill the said the sard the sar said the sard the sar said the sard the sar said the sard the sar said t

  180. # tering ove the southe sald the sare the sare the sare the sare the sare the sare the sare the sare the sare th

  181. # said alice sas ing the was in the was in the was in the was in the was in the was in the was in the was in th

  182. # swam near and the sare the sar she cat se the was in the said the said the said the said the said the said th

  183. # `of course for the the said the more the said the more the said the more the said the more the said the more t

  184. # its head the wast ou the the said the wast ou the the said the wast ou the the said the wast ou the the said

  185. # d the moon the said the sallere the said the sallere the said the sallere the said the sallere the said the sa

  186. # d'; and she said the said the said the said the said the said the said the said the said the said the said the

  187. # sure whethe wast and the wast oure and the wast oure and the wast oure and the wast oure and the wast oure and

  188. # to her usus the said the sald the said the sald the said the sald the said the sald the said the sald the said

  189. # n to look the she her alice sad the growe the said the dore the she said the dore the she said the dore the sh

  190. # s the stuped an the coust of at ing tore the mouthe said the more the said the more the said the more the said

  191. # as going of the southe said the more the said the more the said the more the said the more the said the more t

  192. # list of she sound he sere the said the mack to the was in the was in the was in the was in the was in the was

  193. # poor alice sad the coust on the the harded the harded the harded the harded the harded the harded the harded

  194. # ner!' cried an the said the mack ture said the mack ture said the mack ture said the mack ture said the mack t

  195. # be shutting tore the mouthe said the mack turele the was the was the was the was the was the was the was the

  196. # her, about the growe the the grithe the grithe the grithe the grithe the grithe the grithe the grithe the grit

  197. # y over alice sad the d at she sad she don the d at she sad she don the d at she sad she don the d at she sad s

  198. # self, `if the said the donge for and the donge for and the donge for and the donge for and the donge for and t

  199. # me that fire the mast the said the mack turele the said the mack turele the said the mack turele the said the

  200. # ome, there the said the dore the she the donge said the dore the she the donge said the dore the she the donge

  201. # as an unuthe sad the dored so she was so the king the was so the king the was so the king the was so the king

  202. # ' `so they wish as all to the marte the marte the marte the marte the marte the marte the marte the marte the

  203. # ke to piece to the rither all the dong the said the dore the mouthe the said the dore the mouthe the said the

  204. # f i shall the said the harded and alice said the harded and alice said the harded and alice said the harded an

  205. # ell, at and the donge said the donge said the donge said the donge said the donge said the donge said the dong

  206. # e queen! the wast of the harded and the growe the growe the growe the growe the growe the growe the growe the

  207. # reason of the said the mack turele the was ous the was ous the was ous the was ous the was ous the was ous th

  208. # d still whe the har she was the way her alice sad the cares in the harded the har her all the was of the har s

  209. # le. i wond and the was of the said the could and the was of the said the could and the was of the said the cou

  210. # ' said alice said the mouthe said the mouthe said the mouthe said the mouthe said the mouthe said the mouthe s

  211. # th, and it and the wast of the harded and the moust of the harded and the moust of the harded and the moust of

  212. # ny rate, the was of the mous the mous the mous the mous the mous the mous the mous the mous the mous the mous

  213. # cheered, and the growe the growe the growe the growe the growe the growe the growe the growe the growe the gr

  214. # g merrily an the could be the said the dored she had the dored she had the dored she had the dored she had the

  215. # uld go thry har said the cat out the was of the had and the gat has look to the donde say the way har alice sa

  216. # d the grean the could the could the could the could the could the could the could the could the could the coul

  217. # thought alice said the gryphon inge the gring the said the gryphon inge the gring the said the gryphon inge th

  218. # und an oppeat it on the could be the could be the could be the could be the could be the could be the could be

  219. # as in the said to the rabbet it all the was of the said to the rabbet it all the was of the said to the rabbet

  220. # shock of be the said the donge said the donge said the donge said the donge said the donge said the donge said

  221. # phon. `we the there the mast the cate the mouse fore the madd the mack to the rabbet at it and the wast of the

  222. # hness?' `realice and all as it a the har her alice all at it a the har her alice all at it a the har her alice

  223. # nk it was, the came the marte to the dored and the dore the mores in the said to the dored and the dore the mo

  224. # no time she had all the harded and the growe the hat her the harded and the growe the hat her the harded and

  225. # n minutes the was of the said the mack to the rouse she was of the said the mack to the rouse she was of the s

  226. # d ordered. `in the say said to the donge wat on the madde was a ling on the said the mack to the roush she was

  227. # the cook the marth the mart the mad the mack turtle the dond on the donge the gring the was of the dorse the

  228. # ou are all the mouth and the doon the mad the mouse sa done with the king the mouse sa done with the king the

  229. # sidering her alice the mouse said the mack turtle the was of the moust on the was of the moust on the was of t

  230. # e of the donge wat the way so the don't and the doon the way so the don't and the doon the way so the don't an

  231. # thought alice, `in's a down the gat hourd and the gryphon it of chast her was of the harded to her all the wa

  232. # it tricksen the gat hourden the was of the could be the cat out the was of the could be the cat out the was o

  233. # read-knifer the said the dores it was the dores it was the dores it was the dores it was the dores it was the

  234. # way up as it as and the say the was erought the harte for the said the dore the mores it a could her to the d

  235. # `but i mustle to the dorse the cat of the sare the could and the dorse the cat of the sare the could and the d

  236. # tle girl of the soof hing to see the hard the could be the could be the could be the could be the could be the

  237. # ed at it grown the was of the said the doon the marther said the doon the marther said the doon the marther sa

  238. # rot away quite and all as and the catere the could be the catere the could be the catere the could be the cate

  239. # vered his mouse be the say the way or the could and the soof anowe to the rouse said to herself, `i don't the

  240. # t by the erought the harter the har she sad the dore the could bet mo the har her the har she sad the dore the

  241. # by producing the dorse for the was of the sare the dorse for the was of the sare the dorse for the was of the

  242. # at the nuther a done said the dock turtle the harded the harded the harded the harded the harded the harded t

  243. # nother snat it all the catt at the say said the mack to the rabbet mast the cat of the could and the say said

  244. # trial's be the say the was of the mores it said the dorme the har she was not leat do she her for the was the

  245. # d see, whe the king to the dore the could be the catt reat her the harde for the mack to the dore the could be

  246. # nocking,' said the could be the said the could be the said the could be the said the could be the said the cou

  247. # ok his head the could at and the say the was the was the was the was the was the was the was the was the was t

  248. # ll you, you know she last down the say so for and the say so for and the say so for and the say so for and the

  249. # their curly had for the harded to the dorme the harded to the dorme the harded to the dorme the harded to the

  250. # l whether said to herself to the dore of the marth urse the marte said to herself to the dore of the marth urs

  251. # y wrote do goon the mouse betone of the could be the cat of and the king to the king to the king to the king t

  252. # nt to stay she was not mean she said the gryphon the say she was not mean she said the gryphon the say she was

  253. # more puzzled the mouse so gat in a the said the mouse so gat in a the said the mouse so gat in a the said the

  254. # d better not of the some to the say she was a look an the could all was the harter and her alice the gat here

  255. # low with it the dormouse the could be the said the dormouse the could be the said the dormouse the could be th

  256. # ily on slack of the mouse say the gat it and the dores it was the door as the could an the came the marth a do

  257. # fter them!' said the cat of the har she said the cat of the har she said the cat of the har she said the cat o

  258. # nd she, oh the the could all whing to she cat of chart which was the could all whing to she cat of chart which

  259. # t, being mo ding the mouse of the mouse of the mouse of the mouse of the mouse of the mouse of the mouse of th

  260. # much conter of the soof a atile the parce to the dormouse the could at the could at the could at the could at

  261. # ing up in a long the mares it was the was the mack turtle the sabe to the mares it was the was the mack turtle

  262. # y it ran and whing to she said the mouse beto doon the marth a dong on the sare the could be the say the way t

  263. # with one extly soon the marth a mone that it was the mart the marth a mone that it was the mart the marth a mo

  264. # han't! you know the mouse sad the was the growh her her had for the harde was the growh her her had for the ha

  265. #BATCH_SIZE = 128

  266. #NUM_ITERATIONS = 50

  267. #test_chars=random char

  268. #id alice; `ind he wall the wall the wall the wall the wall the wall the wall the wall the wall the wall the wa

  269. #at all. `be the was the was the was the was the was the was the was the was the was the was the was the was th

  270. # on each so the rould all the tore the rould all the tore the rould all the tore the rould all the tore the ro

  271. #id so, and was in the had her said the mouthe to the say her same the mouthe to the say her same the mouthe to

  272. #hapter v and it a a mance said the gryphon in a and the sait the mack the gryphon in a and the sait the mack t

  273. #ant to go see the mock turtle sear the rabbed and the satter a marce the was she was not in the was she was no

  274. #the hatter the read the gryphon said the gryphon said the gryphon said the gryphon said the gryphon said the g

  275. #t, jumping the dore with the gryphon a little so mack to the gryphon a little so mack to the gryphon a little

  276. #, that alice a don't be a little she was nother the rabbit was the ery, and the mouse was she said the caterpi

  277. #ously replied in a little and the sore of the sore of the sore of the sore of the sore of the sore of the sore

  278. #ked, with her head a soup of a little got the dormouse for a little got the dormouse for a little got the dorm

  279. #could, if a to har she said the mock turtle said to the gryphon, and the mock turtle said to the gryphon, and

  280. #ad said the mors the dormouse the mors the dormouse the mors the dormouse the mors the dormouse the mors the d

  281. #ossed the rabbit haster a with the rabbit haster a with the rabbit haster a with the rabbit haster a with the

  282. #seven jogg not in a was not in a was not in a was not in a was not in a was not in a was not in a was not in a

  283. #o my boy, and the mock turtle some of the errout for the mock turtle some of the errout for the mock turtle so

  284. #l, it must be a mame the hatter was a look a marter the rabbit and the mouse the mouse the mouse the mouse the

  285. #ed alice. `it's a little she was the read the round. `they were the erreat of the words all reading her all th

  286. #t about it was not in the rabbit was the rabbit was the rabbit was the rabbit was the rabbit was the rabbit wa

  287. #their face in the look the looked at the mock turtle the last the tone, and the dormouse the dormouse the dorm

  288. # `i know what it was a great one on the way in a very the jury and the white rabbit and said to the plose to t

  289. # direction in the dormouse don't be a little be the cat on the court to be a marking that the dormouse don't b

  290. #d of mine--on it must be off the right in a very some of the dormouse the mock turtle in a little she went on

  291. #g,' said the caterpillar the caterpillar the caterpillar the caterpillar the caterpillar the caterpillar the c

  292. #te much accuritule the way the wase that she was no door of the hatter with the hatter with the hatter with th

  293. # like it, and she was to see of the long and alice a can in a little shrilking about the sout very good to the

  294. #nt,' and she seant which she her fand him all the rabbit here all the stood here with a don't be a marked and

  295. #into the cook and the words with the rabbit alice to the room, and they were the mock turtle and the words wit

  296. #deal frightened to her hands, and was not a mant with the white rabbit some the hatter with the white rabbit s

  297. # memory, and she said to herself in a mort to be a little sharing down and great such a can't the room, and sh

  298. # the duchess were like the poor as the door as the door as the door as the door as the door as the door as the

  299. #with. alice thought the dormouse the door the rabbit was of the table. `now it was a little that the rabbit wa

  300. #ls in its down to herself to her hands and was very croquetting in the wase that her eyes a little got to the

  301. #tone. `praided to here and the words with the gryphon, and the words with the gryphon, and the words with the

  302. #nly know her lifting a tone on the end of the tires in the right all the right all the right all the right all

  303. #y it too: she was not as she said to herself, `i don't be a little sharppean wall be trees would be a minute o

  304. #le for it, i don't be a minute on the door as mouth a down the commoute of the tame off and the cook again, an

  305. # very sorrout for a great had the rabbit has hearts in a large all have had not a large all have had not a lar

  306. #ds; `so not to the door a looking and shouted the door a looking and shouted the door a looking and shouted th

  307. #igently to go and the gryphon ander with the gryphon ander with the gryphon ander with the gryphon ander with

  308. #rink me' beauting of the sort as she car and the white rabbit had been that she had not the bolkn't be so mark

  309. #know all the king out of the stalles were said to herself, `on the story, and she had for the hatter, and the

  310. #get into the words of the mouse they were all round a dear face and the caterpillar. `i'm to the roots and she

  311. #se?' said the door as she spoke that the looked at the things it was a door what you have to the tame the door

  312. #ar off to the things it was a little shrough of the were to the right and she seaning the white rabbit asked i

  313. #ed very poor she little got to the first to be a marted to the footman the mouse to the mock turtle said, `it

  314. # anything the gryphon with a she was sore and she was sore and she was sore and she was sore and she was sore

  315. #y, it fillow the king, and the white rabbit had been to herself, and the white rabbit had been to herself, and

  316. # herself had and the dormouse in a very curious to see hor she was some of the bolkn't me sire of the gryphon,

  317. #nches high it was the dormouse in the mock turtle in a low the least to the mock turtle in a low the least to

关于深度学习系列笔记二(爱丽丝梦游仙境)_字符串