ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

神经网络与深度学习(邱锡鹏)编程练习6 RNN 加法进位实验 Jupyter导出版

2022-06-08 09:33:10  阅读:230  来源: 互联网

标签:loss Jupyter RNN batch results step tf 邱锡鹏 True


加法进位实验

本题为填空题,填入内容:
def call(self, num1, num2):
num1_emb = self.embed_layer(num1) # shape(b_sz, len, emb_sz)
num2_emb = self.embed_layer(num2) # shape(b_sz, len, emb_sz)
inp_emb = tf.concat([num1_emb, num2_emb], axis=-1)
rnn_out = self.rnn_layer(inp_emb)
logits = self.dense(rnn_out)
return logits

import numpy as np
import tensorflow as tf
import collections
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import layers, optimizers, datasets
import os,sys,tqdm

数据生成

我们随机在 start->end之间采样除整数对(num1, num2),计算结果num1+num2作为监督信号。

  • 首先将数字转换成数字位列表 convertNum2Digits
  • 将数字位列表反向
  • 将数字位列表填充到同样的长度 pad2len
def gen_data_batch(batch_size, start, end):
    '''在(start, end)区间采样生成一个batch的整型的数据
    Args :
        batch_size: batch_size
        start: 开始数值
        end: 结束数值
    '''
    numbers_1 = np.random.randint(start, end, batch_size)
    numbers_2 = np.random.randint(start, end, batch_size)
    results = numbers_1 + numbers_2
    return numbers_1, numbers_2, results

def convertNum2Digits(Num):
    '''将一个整数转换成一个数字位的列表,例如 133412 ==> [1, 3, 3, 4, 1, 2]
    '''
    strNum = str(Num)
    chNums = list(strNum)
    digitNums = [int(o) for o in strNum]
    return digitNums

def convertDigits2Num(Digits):
    '''将数字位列表反向, 例如 [1, 3, 3, 4, 1, 2] ==> [2, 1, 4, 3, 3, 1]
    '''
    digitStrs = [str(o) for o in Digits]
    numStr = ''.join(digitStrs)
    Num = int(numStr)
    return Num

def pad2len(lst, length, pad=0):
    '''将一个列表用`pad`填充到`length`的长度 例如 pad2len([1, 3, 2, 3], 6, pad=0) ==> [1, 3, 2, 3, 0, 0]
    '''
    lst+=[pad]*(length - len(lst))
    return lst

def results_converter(res_lst):
    '''将预测好的数字位列表批量转换成为原始整数
    Args:
        res_lst: shape(b_sz, len(digits))
    '''
    res = [reversed(digits) for digits in res_lst]
    return [convertDigits2Num(digits) for digits in res]

def prepare_batch(Nums1, Nums2, results, maxlen):
    '''准备一个batch的数据,将数值转换成反转的数位列表并且填充到固定长度
    Args:
        Nums1: shape(batch_size,)
        Nums2: shape(batch_size,)
        results: shape(batch_size,)
        maxlen:  type(int)
    Returns:
        Nums1: shape(batch_size, maxlen)
        Nums2: shape(batch_size, maxlen)
        results: shape(batch_size, maxlen)
    '''
    Nums1 = [convertNum2Digits(o) for o in Nums1]
    Nums2 = [convertNum2Digits(o) for o in Nums2]
    results = [convertNum2Digits(o) for o in results]
    
    Nums1 = [list(reversed(o)) for o in Nums1]
    Nums2 = [list(reversed(o)) for o in Nums2]
    results = [list(reversed(o)) for o in results]
    
    Nums1 = [pad2len(o, maxlen) for o in Nums1]
    Nums2 = [pad2len(o, maxlen) for o in Nums2]
    results = [pad2len(o, maxlen) for o in results]
    
    return Nums1, Nums2, results

建模过程, 按照图示完成建模

class myRNNModel(keras.Model):
    def __init__(self):
        super(myRNNModel, self).__init__()
        self.embed_layer = tf.keras.layers.Embedding(10, 32,batch_input_shape=[None, None])        
        self.rnncell = tf.keras.layers.SimpleRNNCell(64)
        self.rnn_layer = tf.keras.layers.RNN(self.rnncell, return_sequences=True)
        self.dense = tf.keras.layers.Dense(10)
        
    @tf.function
    def call(self, num1, num2):
        '''
        此处完成上述图中模型
        '''
        num1_emb = self.embed_layer(num1) # shape(b_sz, len, emb_sz)
        num2_emb = self.embed_layer(num2) # shape(b_sz, len, emb_sz)
        inp_emb = tf.concat([num1_emb, num2_emb], axis=-1)
        rnn_out = self.rnn_layer(inp_emb)
        logits = self.dense(rnn_out)
        
        return logits
@tf.function
def compute_loss(logits, labels):
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=labels)
    return tf.reduce_mean(losses)

@tf.function
def train_one_step(model, optimizer, x, y, label):
    with tf.GradientTape() as tape:
        logits = model(x, y)
        loss = compute_loss(logits, label)

    # compute gradient
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

def train(steps, model, optimizer):
    loss = 0.0
    accuracy = 0.0
    for step in range(steps):
        datas = gen_data_batch(batch_size=200, start=0, end=555555555)
        Nums1, Nums2, results = prepare_batch(*datas, maxlen=11)
        loss = train_one_step(model, optimizer, tf.constant(Nums1, dtype=tf.int32), 
                              tf.constant(Nums2, dtype=tf.int32),
                              tf.constant(results, dtype=tf.int32))
        if step%50 == 0:
            print('step', step, ': loss', loss.numpy())

    return loss

def evaluate(model):
    datas = gen_data_batch(batch_size=2000, start=555555555, end=999999999)
    Nums1, Nums2, results = prepare_batch(*datas, maxlen=11)
    logits = model(tf.constant(Nums1, dtype=tf.int32), tf.constant(Nums2, dtype=tf.int32))
    logits = logits.numpy()
    pred = np.argmax(logits, axis=-1)
    res = results_converter(pred)
    for o in list(zip(datas[2], res))[:20]:
        print(o[0], o[1], o[0]==o[1])

    print('accuracy is: %g' % np.mean([o[0]==o[1] for o in zip(datas[2], res)]))

optimizer = optimizers.Adam(0.001)
model = myRNNModel()
train(3000, model, optimizer)
evaluate(model)
step 0 : loss 2.3128169
step 50 : loss 1.9332728
step 100 : loss 1.901959
step 150 : loss 1.8834128
step 200 : loss 1.8941866
step 250 : loss 1.883983
step 300 : loss 1.8795818
step 350 : loss 1.8715074
step 400 : loss 1.8778303
step 450 : loss 1.8824102
step 500 : loss 1.8784071
step 550 : loss 1.8804839
step 600 : loss 1.8770535
step 650 : loss 1.8731002
step 700 : loss 1.883957
step 750 : loss 1.8866007
step 800 : loss 1.8686253
step 850 : loss 1.8691077
step 900 : loss 1.8789036
step 950 : loss 1.8719782
step 1000 : loss 1.8767223
step 1050 : loss 1.8807548
step 1100 : loss 1.8698553
step 1150 : loss 1.863222
step 1200 : loss 1.8729354
step 1250 : loss 1.8697383
step 1300 : loss 1.863727
step 1350 : loss 1.8565942
step 1400 : loss 1.823668
step 1450 : loss 1.7782799
step 1500 : loss 1.6455835
step 1550 : loss 1.4902543
step 1600 : loss 1.3107812
step 1650 : loss 1.1358132
step 1700 : loss 0.971002
step 1750 : loss 0.8325506
step 1800 : loss 0.7205786
step 1850 : loss 0.6340592
step 1900 : loss 0.55104315
step 1950 : loss 0.49578613
step 2000 : loss 0.43124878
step 2050 : loss 0.37480894
step 2100 : loss 0.32941413
step 2150 : loss 0.2885746
step 2200 : loss 0.24742316
step 2250 : loss 0.21270446
step 2300 : loss 0.18246596
step 2350 : loss 0.15749024
step 2400 : loss 0.1375851
step 2450 : loss 0.120020166
step 2500 : loss 0.105072536
step 2550 : loss 0.092671186
step 2600 : loss 0.081815556
step 2650 : loss 0.071697846
step 2700 : loss 0.06214186
step 2750 : loss 0.055456445
step 2800 : loss 0.050086357
step 2850 : loss 0.04434098
step 2900 : loss 0.040631484
step 2950 : loss 0.037673675
1667667009 1667667009 True
1789950734 1789950734 True
1710655485 1710655485 True
1663521507 1663521507 True
1896681877 1896681877 True
1554165075 1554165075 True
1578702243 1578702243 True
1645886796 1645886796 True
1267851483 1267851483 True
1543259935 1543259935 True
1621680881 1621680881 True
1887850516 1887850516 True
1416221863 1416221863 True
1293380770 1293380770 True
1421167341 1421167341 True
1418403242 1418403242 True
1327906642 1327906642 True
1419538600 1419538600 True
1532859597 1532859597 True
1531071162 1531071162 True
accuracy is: 1

标签:loss,Jupyter,RNN,batch,results,step,tf,邱锡鹏,True
来源: https://www.cnblogs.com/hbuwyg/p/16354360.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有