blog_20160811_1_8471332 64行 Python
Raw
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
from __future__ import print_function

__docformat__ = 'restructedtext en'

import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T

class SegLoader(object):
def load_data(self, dataset):
samplesNumber = 6
features = 2
train_set = (numpy.ndarray(shape=(samplesNumber, features), dtype=numpy.float32), numpy.ndarray(shape=(samplesNumber), dtype=int))
self.prepare_dataset(train_set)
valid_set = (train_set[0].copy(), train_set[1].copy())
test_set = (train_set[0].copy(), train_set[1].copy())
test_set_x, test_set_y = self.shared_dataset(test_set)
valid_set_x, valid_set_y = self.shared_dataset(valid_set)
train_set_x, train_set_y = self.shared_dataset(train_set)
rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
(test_set_x, test_set_y)]
return rval

def shared_dataset(self, data_xy, borrow=True):
data_x, data_y = data_xy
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int32')

def prepare_dataset(self, dataset):
dataset[0][0][0] = 1.0
dataset[0][0][1] = 1.0
dataset[1][0] = 1

dataset[0][1][0] = 2.0
dataset[0][1][1] = 2.0
dataset[1][1] = 1

dataset[0][2][0] = 3.0
dataset[0][2][1] = 3.0
dataset[1][2] = 1

dataset[0][3][0] = 1.5
dataset[0][3][1] = 2.0
dataset[1][3] = 0

dataset[0][4][0] = 2.5
dataset[0][4][1] = 4.0
dataset[1][4] = 0

dataset[0][5][0] = 3.5
dataset[0][5][1] = 7.0
dataset[1][5] = 0
blog_20160811_2_6584742 149行 Python
Raw
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
from __future__ import print_function

__docformat__ = 'restructedtext en'

import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T
from logistic_regression import LogisticRegression
from seg_loader import SegLoader

class LrSegEngine(object):
def __init__(self):
print("Logistic Regression MNIST Engine")
self.learning_rate = 0.13
self.n_epochs = 1000
self.batch_size = 1 # 600
self.dataset = 'mnist.pkl.gz'

def train(self):
print("Yantao:train the model")
loader = SegLoader()
datasets = loader.load_data(self.dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // self.batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // self.batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // self.batch_size
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
# in:x,y out: 1 in y=x otherwise 0
classifier = LogisticRegression(input=x, n_in=2, n_out=2)
cost = classifier.negative_log_likelihood(y)
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * self.batch_size: (index + 1) * self.batch_size],
y: test_set_y[index * self.batch_size: (index + 1) * self.batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * self.batch_size: (index + 1) * self.batch_size],
y: valid_set_y[index * self.batch_size: (index + 1) * self.batch_size]
}
)
g_W = T.grad(cost=cost, wrt=classifier.W)
g_b = T.grad(cost=cost, wrt=classifier.b)
updates = [(classifier.W, classifier.W - self.learning_rate * g_W),
(classifier.b, classifier.b - self.learning_rate * g_b)]
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size],
y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size]
}
)
patience = 5000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience // 2)
best_validation_loss = numpy.inf
test_score = 0.
start_time = timeit.default_timer()
done_looping = False
epoch = 0
while (epoch < self.n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in range(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i)
for i in range(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
# test it on the test set
test_losses = [test_model(i)
for i in range(n_test_batches)]
test_score = numpy.mean(test_losses)
print(
(
' epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(
epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
# save the best model
with open('best_model.pkl', 'wb') as f:
pickle.dump(classifier, f)
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print(
(
'Optimization complete with best validation score of %f %%,'
'with test performance %f %%'
)
% (best_validation_loss * 100., test_score * 100.)
)
print('The code run for %d epochs, with %f epochs/sec' % (
epoch, 1. * epoch / (end_time - start_time)))
print(('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)

def run(self, data):
print("run the model")
classifier = pickle.load(open('best_model.pkl', 'rb'))
predict_model = theano.function(
inputs=[classifier.input],
outputs=classifier.y_pred
)
rst = predict_model(data)
print(rst)
blog_20160811_3_3934783 52行 Python
Raw
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
from __future__ import print_function

__docformat__ = 'restructedtext en'

import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T

class LogisticRegression(object):
def __init__(self, input, n_in, n_out):
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
self.input = input
print("Yantao: ***********************************")
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
if y.dtype.startswith('int'):
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
blog_20160811_4_5188519 22行 Python
Raw
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
from __future__ import print_function

__docformat__ = 'restructedtext en'

import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T

from logistic_regression import LogisticRegression
from seg_loader import SegLoader
from lr_seg_engine import LrSegEngine

if __name__ == '__main__':
engine = LrSegEngine()
engine.train()
blog_20160811_5_4839072 9行 Python
Raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
from seg_loader import SegLoader
from lr_seg_engine import LrSegEngine

if __name__ == '__main__':
print("test program v1.0")
engine = LrSegEngine()
data = [[2.0, 2.0]]
print(data)
engine.run(data)