ML_NN_1.py

# This notebook shows how to:
# - implement the forward-pass (evaluation) of a deep, fully connected neural network in a few lines of python
# - do that efficiently using batches
# - illustrate the results for randomly initialized neural networks

# cell 01

from numpy import array, zeros, exp, random, dot, shape, reshape, meshgrid, linspace

import matplotlib.pyplot as plt # for plotting
import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display

# cell 2

N0=3 # input layer size
N1=2 # output layer size

# initialize random weights: array dimensions N1xN0
w=random.uniform(low=-1,high=+1,size=(N1,N0))
# initialize random biases: N1 vector
b=random.uniform(low=-1,high=+1,size=N1)

# cell 4

y_in=array([0.2,0.4,-0.1])

# cell 5

z=dot(w,y_in)+b # result: the vector of 'z' values, length N1
y_out=1/(1+exp(-z)) # the 'sigmoid' function (applied elementwise)

# cell 6

print("network input y_in:", y_in)
print("weights w:", w)
print("bias vector b:", b)
print("linear superposition z:", z)
print("network output y_out:", y_out)

# cell 7

def apply_net(y_in):
    global w, b
    
    z=dot(w,y_in)+b    
    return(1/(1+exp(-z)))

# cell 8


N0=2 # input layer size
N1=1 # output layer size

w=random.uniform(low=-10,high=+10,size=(N1,N0)) # random weights: N1xN0
b=random.uniform(low=-1,high=+1,size=N1) # biases: N1 vector

# cell 9

apply_net([0.8,0.3]) # a simple test

M=50 # will create picture of size MxM
y_out=zeros([M,M]) # array MxM, to hold the result

for j1 in range(M):
    for j2 in range(M):
        # out of these integer indices, generate
        # two values in the range -0.5...0.5
        # and then apply the network to those two
        # input values
        value0=float(j1)/M-0.5
        value1=float(j2)/M-0.5
        y_out[j1,j2]=apply_net([value0,value1])[0]
        
 # cell 10

# display image
plt.imshow(y_out,origin='lower',extent=(-0.5,0.5,-0.5,0.5))
plt.colorbar()
plt.title("NN output as a function of input values")
plt.xlabel("y_2")
plt.ylabel("y_1")
plt.show()

# cell 11

def apply_layer(y_in,w,b): 
    z=dot(w,y_in)+b
    return(1/(1+exp(-z)))

# cell 12


N0=2 # input layer size
N1=30 # hidden layer size
N2=1 # output layer size

# weights and biases from input layer to hidden layer:
w1=random.uniform(low=-10,high=+10,size=(N1,N0)) # random weights: N1xN0
b1=random.uniform(low=-1,high=+1,size=N1) # biases: N1 vector

# weights+biases from hidden layer to output layer:
w2=random.uniform(low=-10,high=+10,size=(N2,N1)) # random weights
b2=random.uniform(low=-1,high=+1,size=N2) # biases

# cell 13

# evaluate the network by subsequently evaluating the two steps (input to hidden and hidden to output)
def apply_net(y_in):
    global w1,b1,w2,b2
    
    y1=apply_layer(y_in,w1,b1)
    y2=apply_layer(y1,w2,b2)
    return(y2)

# cell 14

# obtain values for a range of inputs

M=50 # will create picture of size MxM
y_out=zeros([M,M]) # array MxM, to hold the result

for j1 in range(M):
    for j2 in range(M):
        value0=float(j1)/M-0.5
        value1=float(j2)/M-0.5
        y_out[j1,j2]=apply_net([value0,value1])[0]

 # cell 15

# display image
plt.imshow(y_out,origin='lower',extent=(-0.5,0.5,-0.5,0.5))
plt.colorbar()
plt.title("NN output as a function of input values")
plt.xlabel("y_2")
plt.ylabel("y_1")
plt.show()

# Obviously, the shape of the output is already more 'complex' than that of a simple network without hidden layer!


# Goal: apply network to many samples in parallel (no 'for' loops!)

# cell 16

# See how the dot product works: 
# 'contracts' (sums over) the innermost index

W=zeros([7,8])
y=zeros([8,30]) 

# here '30' would stand for the number of samples
# in our envisaged network applications
shape(dot(W,y))

# cell 17


# now try to add the bias vector entries,
# in the most naive way (beware!)

B=zeros(7)
result=dot(W,y)+B # will produce an error!

# cell 18

# But with a re-ordering of indices, this works!
# So, we take the dimension of size 30 to be the very first one:

y=zeros([30,8])
W=zeros([8,7])
shape(dot(y,W))

# cell 19
# and now we try again by adding the bias vector, again in a naive way

B=zeros(7)
result=dot(y,W)+B 
# will give the desired result, because B is 'broadcast' to shape (30,7)
shape(result)

# We now set up for batch processing, i.e. parallel evaluation of many input samples!