📜  CNTK-训练神经网络(1)

📅  最后修改于: 2023-12-03 14:40:07.559000             🧑  作者: Mango

CNTK-训练神经网络

CNTK(微软认知工具包)是一个开源的深度学习框架,可用于快速构建、训练和测试神经网络模型。CNTK在多个平台上具有良好的可扩展性和高效性能。以下是一些基本的组件,我们可以使用它们来训练神经网络模型:

数据读取

首先,我们需要准备数据集,以SPAM/HAM分类器为例,假设我们有一个包含5000封电子邮件的数据集。每个电子邮件都被标记为“SPAM”或“HAM”。为了训练神经网络模型,我们需要将这些电子邮件读入内存中,然后将它们转换成数值特征,以便我们的算法可以进行处理。

import cntk as C
import os

# specify the path of the data
data_path = os.path.join(os.getcwd(), "data")

# define the input data dimension
input_dim = 10000

# define the size of the sparse input
sparse_input_dim = 10000

# define the number of output classes
num_output_classes = 2

# define the minibatch size
minibatch_size = 64

# define the number of training epochs
num_epochs = 10

# define the learning rate
learning_rate = 0.05

# define the momentum
momentum = 0.99

# define the number of hidden layers
num_hidden_layers = 2

# define the size of the hidden layers
hidden_layers_dim = 200

# define the dropout rate
dropout_rate = 0.5

# define the activation function
activation_function = C.relu

# define the loss function
loss_function = C.cross_entropy_with_softmax

# define the optimizer
optimizer = C.momentum_sgd(learning_rate=learning_rate, momentum=momentum)

# define the network
input_var = C.sequence.input_variable(shape=(input_dim,), is_sparse=True)
label_var = C.sequence.input_variable(num_output_classes)
prev_output = input_var
for i in range(num_hidden_layers):
    prev_output = C.layers.Dense(hidden_layers_dim, activation=activation_function)(prev_output)
    prev_output = C.layers.Dropout(dropout_rate)(prev_output)
z = C.layers.Dense(num_output_classes, activation=None)(prev_output)
ce = loss_function(z, label_var)
pe = C.classification_error(z, label_var)

# define the trainer
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
learner = optimizer(z.parameters, lr_schedule)
trainer = C.Trainer(z, (ce, pe), [learner])

# define the data reader
def create_reader(path, is_training):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
        features=C.io.StreamDef(field='features', shape=sparse_input_dim, is_sparse=True),
        labels=C.io.StreamDef(field='labels', shape=num_output_classes))), randomize=is_training, max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1)

train_reader = create_reader(os.path.join(data_path, "train.ctf"), True)
test_reader = create_reader(os.path.join(data_path, "test.ctf"), False)

# train the network
for epoch in range(num_epochs):
    epoch_training_error = 0
    epoch_training_accuracy = 0
    num_minibatches = 0
    while True:
        minibatch = train_reader.next_minibatch(minibatch_size)
        if not minibatch:
            break
        trainer.train_minibatch({input_var: minibatch['features'], label_var: minibatch['labels']})
        epoch_training_error += trainer.previous_minibatch_loss_average
        epoch_training_accuracy += trainer.previous_minibatch_evaluation_average
        num_minibatches += 1
    epoch_training_error /= num_minibatches
    epoch_training_accuracy /= num_minibatches

    epoch_test_error = 0
    epoch_test_accuracy = 0
    num_samples = 0
    while True:
        minibatch = test_reader.next_minibatch(minibatch_size)
        if not minibatch:
            break
        epoch_test_error += trainer.test_minibatch({input_var: minibatch['features'], label_var: minibatch['labels']})
        epoch_test_accuracy += trainer.previous_minibatch_evaluation_average * minibatch_size
        num_samples += minibatch_size
    epoch_test_error /= num_samples
    epoch_test_accuracy /= num_samples

    print("Epoch %d: training error = %.6f, training accuracy = %.6f, test error = %.6f, test accuracy = %.6f"
          % (epoch+1, epoch_training_error, 1-epoch_training_accuracy, epoch_test_error, 1-epoch_test_accuracy))
网络结构

在CNTK中,我们可以使用Sequential或Function API来定义自己的神经网络结构。在下面的代码片段中,我们使用Dense层构建了一个具有2个隐藏层的全连接神经网络。使用ReLU激活函数作为隐藏层的激活函数,使用Softmax函数作为输出层的激活函数。

import cntk as C

# define the input and output layers
input_dim = 784
output_dim = 10
input_var = C.input_variable(shape=(input_dim,))
label_var = C.input_variable(shape=(output_dim,))

# define the number of hidden layers
num_hidden_layers = 2

# define the size of the hidden layers
hidden_layers_dim = 128

# define the activation function
activation_function = C.relu

# define the output function
output_function = C.softmax

# define the network
model = C.layers.Sequential([
    C.layers.Dense(hidden_layers_dim, activation=activation_function),
    C.layers.Dense(hidden_layers_dim, activation=activation_function),
    C.layers.Dense(output_dim, activation=output_function),
])(input_var)

# define the loss function
loss_function = C.cross_entropy_with_softmax(model, label_var)

# define the performance metric
metric = C.classification_error(model, label_var)
训练模型

一旦我们有了数据和网络架构,我们就可以使用CNTK训练模型。在下面的代码片段中,我们使用Python API来训练模型。

import cntk as C

# define the minibatch size
minibatch_size = 64

# define the number of training epochs
num_epochs = 10

# define the learning rate
learning_rate = 0.05

# define the momentum
momentum = 0.99

# define the trainer
lr_schedule = C.learning_parameter_schedule(learning_rate)
momentum_schedule = C.momentum_schedule(momentum)
learner = C.sgd(model.parameters, lr_schedule, momentum_schedule)
trainer = C.Trainer(model, (loss_function, metric), [learner])

# define the data reader
def create_reader(path, is_training):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
        features=C.io.StreamDef(field='features', shape=input_dim),
        labels=C.io.StreamDef(field='labels', shape=output_dim))), randomize=is_training, max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1)

train_reader = create_reader(os.path.join(data_path, "train.ctf"), True)
test_reader = create_reader(os.path.join(data_path, "test.ctf"), False)

# train the model
for epoch in range(num_epochs):
    epoch_training_error = 0
    epoch_training_accuracy = 0
    num_minibatches = 0
    while True:
        minibatch = train_reader.next_minibatch(minibatch_size)
        if not minibatch:
            break
        trainer.train_minibatch({input_var: minibatch['features'], label_var: minibatch['labels']})
        epoch_training_error += trainer.previous_minibatch_loss_average
        epoch_training_accuracy += trainer.previous_minibatch_evaluation_average
        num_minibatches += 1
    epoch_training_error /= num_minibatches
    epoch_training_accuracy /= num_minibatches

    epoch_test_error = 0
    epoch_test_accuracy = 0
    num_samples = 0
    while True:
        minibatch = test_reader.next_minibatch(minibatch_size)
        if not minibatch:
            break
        epoch_test_error += trainer.test_minibatch({input_var: minibatch['features'], label_var: minibatch['labels']})
        epoch_test_accuracy += trainer.previous_minibatch_evaluation_average * minibatch_size
        num_samples += minibatch_size
    epoch_test_error /= num_samples
    epoch_test_accuracy /= num_samples

    print("Epoch %d: training error = %.6f, training accuracy = %.6f, test error = %.6f, test accuracy = %.6f"
          % (epoch+1, epoch_training_error, 1-epoch_training_accuracy, epoch_test_error, 1-epoch_test_accuracy))

在运行上述Python代码时,程序会自动从给定路径加载数据,然后创建与网络结构相对应的模型,在训练过程中,程序会计算误差和准确性,并将它们打印出来。