Tutorial 3: Representations in continuous space

Tutorial 3: Representations in continuous space#

Week 2, Day 2: Neuro-Symbolic Methods

By Neuromatch Academy

Content creators: P. Michael Furlong, Chris Eliasmith

Content reviewers: Hlib Solodzhuk, Patrick Mineault, Aakash Agrawal, Alish Dipani, Hossein Rezaei, Yousef Ghanbari, Mostafa Abdollahi

Production editors: Konstantine Tsafatinos, Ella Batty, Spiros Chavlis, Samuele Bolotta, Hlib Solodzhuk

Tutorial Objectives#

Estimated timing of tutorial: 40 minutes

In this tutorial, you will observe how the VSA methods can be applied in structures and environments to allow for efficient generalization.

Setup#

Install and import feedback gadget#

Notice that exactly the neuromatch branch of sspspace should be installed! Otherwise, some of the functionality (like optimize parameter in the DiscreteSPSpace initialization) won’t work.

Install dependencies#

Imports#

Figure settings#

Plotting functions#

Show code cell source Hide code cell source

# @title Plotting functions

def plot_3d_function(X, Y, zs, titles):
    """Plot 3D function.

    Inputs:
    - X (list): list of np.ndarray of x-values.
    - Y (list): list of np.ndarray of y-values.
    - zs (list): list of np.ndarray of z-values.
    - titles (list): list of titles of the plot.
    """
    with plt.xkcd():
        fig = plt.figure(figsize=(8, 8))
        for index, (x, y, z) in enumerate(zip(X, Y, zs)):
            fig.add_subplot(1, len(X), index + 1, projection='3d')
            plt.gca().plot_surface(x,y,z.reshape(x.shape),cmap='plasma', antialiased=False, linewidth=0)
            plt.xlabel(r'$x_{1}$')
            plt.ylabel(r'$x_{2}$')
            plt.gca().set_zlabel(r'$f(\mathbf{x})$')
            plt.title(titles[index])
        plt.show()

def plot_performance(bound_performance, bundle_performance, training_samples, title):
    """Plot RMSE values for two different representations of the input data.

    Inputs:
    - bound_performance (list): list of RMSE for bound representation.
    - bundle_performance (list): list of RMSE for bundle representation.
    - training_samples (list): x-axis.
    - title (str): title of the plot.
    """
    with plt.xkcd():
        plt.plot(training_samples, bound_performance, label='Bound Representation')
        plt.plot(training_samples, bundle_performance, label='Bundling Representation', ls='--')
        plt.legend()
        plt.title(title)
        plt.ylabel('RMSE (a.u.)')
        plt.xlabel('# Training samples')

def plot_2d_similarity(sims, obj_names, size, title_argmax = False):
    """
    Plot 2D similarity between query points (grid) and the ones associated with the objects.

    Inputs:
    - sims (list): list of similarity values for each of the objects.
    - obj_names (list): list of object names.
    - size (tuple): to reshape the similarities.
    - title_argmax (bool, default = False): looks for the point coordinates as arg max from all similarity value.
    """
    ticks = [0, 24, 49, 74, 99]
    ticklabels = [-5, -2, 0, 2, 5]
    with plt.xkcd():
        for obj_idx, obj in enumerate(obj_names):
            plt.subplot(1, len(obj_names), 1 + obj_idx)
            plt.imshow(np.array(sims[obj_idx].reshape(size)), origin='lower', vmin=-1, vmax=1)
            plt.gca().set_xticks(ticks)
            plt.gca().set_xticklabels(ticklabels)
            if obj_idx == 0:
                plt.gca().set_yticks(ticks)
                plt.gca().set_yticklabels(ticklabels)
            else:
                plt.gca().set_yticks([])
            if not title_argmax:
                plt.title(f'{obj}, {positions[obj_idx]}')
            else:
                plt.title(f'{obj}, {query_xs[sims[obj_idx].argmax()]}')

def plot_unbinding_objects_map(sims, positions, query_xs, size):
    """
    Plot 2D similarity between query points (grid) and the unbinded from the objects map.

    Inputs:
    - sims (np.ndarray): similarity values for each of the query points with the map.
    - positions (np.ndarray): positions of the objects.
    - query_xs (np.ndarray): grid points.
    - size (tuple): to reshape the similarities.

    """
    ticks = [0,24,49,74,99]
    ticklabels = [-5,-2,0,2,5]
    with plt.xkcd():
        plt.imshow(sims.reshape(size), origin='lower')

        for idx, marker in enumerate(['o','s','^']):
            plt.scatter(*get_coordinate(positions[idx,:], query_xs, size), marker=marker,s=100)

        plt.gca().set_xticks(ticks)
        plt.gca().set_xticklabels(ticklabels)
        plt.gca().set_yticks(ticks)
        plt.gca().set_yticklabels(ticklabels)
        plt.title(f'All Object Locations')
        plt.show()

def plot_unbinding_positions_map(sims, positions, obj_names):
    """
    Plot 2D similarity between query points (grid) and the unbinded from the positions map.

    Inputs:
    - sims (np.ndarray): similarity values for each of the query points with the map.
    - positions (np.ndarray): test positions to query.
    - obj_names (list): names of the objects for labels.
    - size (tuple): to reshape the similarities.
    """
    with plt.xkcd():
        plt.figure(figsize=(8, 4))
        for pos_idx, pos in enumerate(positions):
            plt.subplot(1,len(test_positions), 1+pos_idx)
            plt.bar([1,2,3], sims[pos_idx])
            plt.ylim([-0.3, 1.05])
            plt.gca().set_xticks([1,2,3])
            plt.gca().set_xticklabels(obj_names, rotation=90)
            if pos_idx != 0:
                plt.gca().set_yticks([])
            plt.title(f'Symbols at\n{pos}')
        plt.show()

Set random seed#

Helper functions#

Show code cell source Hide code cell source

# @title Helper functions

def get_model(xs, ys, train_size):
    """Fit linear regression to the given data.

    Inputs:
    - xs (np.ndarray): input data.
    - ys (np.ndarray): output data.
    - train_size (float): fraction of data to use for train.
    """
    X_train, _, y_train, _ = train_test_split(xs, ys, random_state=1, train_size=train_size)
    return LinearRegression().fit(X_train, y_train)

def get_coordinate(x, positions, target_shape):
    """Return the closest column and row coordinates for the given position.

    Inputs:
    - x (np.ndarray): query position.
    - positions (np.ndarray): all positions.
    - target_shape (tuple): shape of the grid.

    Outputs:
    - coordinates (tuple): column and row positions.
    """
    idx = np.argmin(np.linalg.norm(x - positions, axis=1))
    c = idx % target_shape[1]
    r = idx // target_shape[1]
    return (c,r)

def rastrigin_solution(x):
    """Compute Rastrigin function for given array of d-dimenstional vectors.

    Inputs:
    - x (np.ndarray of shape (n, d)): n d-dimensional vectors.

    Outputs:
    - y (np.ndarray of shape (n, 1)): Rastrigin function value for each of the vectors.
    """
    return 10 * x.shape[1] + np.sum(x**2 - 10 * np.cos(2*np.pi*x), axis=1)

def non_separable_solution(x):
    """Compute non-separable function for given array of 2-dimenstional vectors.

    Inputs:
    - x (np.ndarray of shape (n, 2)): n 2-dimensional vectors.

    Outputs:
    - y (np.ndarray of shape (n, 1)): non-separable function value for each of the vectors.
    """
    return np.sin(np.multiply(x[:, 0], x[:, 1]))

x0_rastrigin = np.linspace(-5.12, 5.12, 100)
X_rastrigin, Y_rastrigin = np.meshgrid(x0_rastrigin,x0_rastrigin)
xs_rastrigin = np.vstack((X_rastrigin.flatten(), Y_rastrigin.flatten())).T
ys_rastrigin = rastrigin_solution(xs_rastrigin)

x0_non_separable = np.linspace(-4, 4, 100)
X_non_separable, Y_non_separable = np.meshgrid(x0_non_separable,x0_non_separable)
xs_non_separable = np.vstack((X_non_separable.flatten(), Y_non_separable.flatten())).T
ys_non_separable = non_separable_solution(xs_non_separable)

set_seed(42)

obj_names = ['circle','square','triangle']
discrete_space = sspspace.DiscreteSPSpace(obj_names, ssp_dim=1024)

objs = {n:discrete_space.encode(n) for n in obj_names}

ssp_space = sspspace.RandomSSPSpace(domain_dim=2, ssp_dim=1024)
positions = np.array([[0, -2],
                      [-2, 3],
                      [3, 2]
                     ])
ssps = {n:ssp_space.encode(x) for n, x in zip(obj_names, positions)}

dim0 = np.linspace(-5, 5, 101)
dim1 = np.linspace(-5, 5, 101)
X,Y = np.meshgrid(dim0, dim1)

query_xs = np.vstack((X.flatten(), Y.flatten())).T
query_ssps = ssp_space.encode(query_xs)

bound_objects = [objs[n] * ssps[n] for n in obj_names]
ssp_map = sspspace.SSP(np.sum(bound_objects, axis=0))

Section 1: Sample Efficient Learning#

In this section, we will take a look at how imposing an inductive bias on our feature space can result in more sample-efficient learning.

Video 1: Function Learning and Inductive Bias#

Submit your feedback#

Coding Exercise 1: Additive Function#

We will start with an additive function, the Rastrigin function, defined

\[\begin{align*} f(\mathbf{x}) = 10d + \sum_{i=1}^{d} (x_{i}^{2} - 10 \cos(2 \pi x_{i})) \end{align*}\]

where \(d\) is the dimensionality of the input vector. In the cell below, complete missing parts of the function which computes values of the Rastrigin function given the input array.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete the Rastrigin function.")
###################################################################

def rastrigin(x):
    """Compute Rastrigin function for given array of d-dimenstional vectors.

    Inputs:
    - x (np.ndarray of shape (n, d)): n d-dimensional vectors.

    Outputs:
    - y (np.ndarray of shape (n, 1)): Rastrigin function value for each of the vectors.
    """
    return 10 * x.shape[1] + np.sum(... - 10 * np.cos(2*np.pi*...), axis=1)

# this code creates 10000 2-dimensional vectors which are going to be served as input to the function (thus, output is of shape (10000, 1))
x0_rastrigin = np.linspace(-5.12, 5.12, 100)
X_rastrigin, Y_rastrigin = np.meshgrid(x0_rastrigin,x0_rastrigin)
xs_rastrigin = np.vstack((X_rastrigin.flatten(), Y_rastrigin.flatten())).T

ys_rastrigin = rastrigin(xs_rastrigin)

plot_3d_function([X_rastrigin],[Y_rastrigin], [ys_rastrigin.reshape(X_rastrigin.shape)], ['Rastrigin Function'])

Click for solution

Example output:

Now, we are going to see which of the inductive biases (suggested mechanism underlying input data) will be more efficient in training the linear regression to get values of the Rastrigin function. We will consider two representations:

Bound: We encode 2D input vectors xs as bound vectors
Bundled: We encode 1D input vectors separately and use bundling and then bundle them together

set_seed(42)

ssp_space = sspspace.RandomSSPSpace(domain_dim=2, ssp_dim=1024)
bound_phis = ssp_space.encode(xs_rastrigin)

ssp_space0 = sspspace.RandomSSPSpace(domain_dim=1, ssp_dim=1024)
ssp_space1 = sspspace.RandomSSPSpace(domain_dim=1, ssp_dim=1024)

#remember that input to `encode` should be 2-dimensional, thus we need to create extra dimension by applying [:,None]
bundle_phis = ssp_space0.encode(xs_rastrigin[:, 0][:, None]) + ssp_space1.encode(xs_rastrigin[:, 1][:, None])

Now, let us define modeling attributes: we will have a few different train_sizes, and we will fit a linear regression for each of them in a loop. Then, for each of the models, we will evaluate its fit based on RMSE loss on the test set.

def loss(y_true, y_pred):
    """Calculate RMSE loss between true and predicted values (note, that loss is not normalized by the shape).

    Inputs:
    - y_true (np.ndarray): true values.
    - y_pred (np.ndarray): predicted values.

    Outputs:
    - loss (float): loss value.
    """
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def test_performance(xs, ys, train_sizes):
    """Fit linear regression to the provided data and evaluate the performance with RMSE loss for different test sizes.

    Inputs:
    - xs (np.ndarray): input data.
    - ys (np.ndarray): output data.
    - train_size (list): list of the train sizes.
    """
    performance = []

    models = []
    for train_size in tqdm(train_sizes):
        X_train, X_test, y_train, y_test = train_test_split(xs, ys, random_state=1, train_size=train_size)
        regr = LinearRegression().fit(X_train, y_train)
        performance.append(np.copy(loss(y_test, regr.predict(X_test))))
        models.append(regr)
    return performance, models

Now, we are ready to train the models on two different inductive biases of the input data.

train_sizes = np.linspace(0.25, 0.9, 5)
bound_performance, bound_models = test_performance(bound_phis, ys_rastrigin, train_sizes)
bundle_performance, bundle_models = test_performance(bundle_phis, ys_rastrigin, train_sizes)
plot_performance(bound_performance, bundle_performance, train_sizes * bound_phis.shape[0], "Rastrigin function - RMSE")
plt.ylim((-1, 20))

(-1.0, 20.0)

../../../_images/2740f8703215fc037e7d59a2f026fa483f147ff6dc004614ff0295cee8dce083.png

What a drastic difference! Let us evaluate visually the performance when training on 3,000 train points.

bound_model = bound_models[0]
bundled_model = bundle_models[0]

ys_hat_rastrigin_bound = bound_model.predict(bound_phis)
ys_hat_rastrigin_bundled = bundled_model.predict(bundle_phis)

plot_3d_function([X_rastrigin, X_rastrigin, X_rastrigin], [Y_rastrigin, Y_rastrigin, Y_rastrigin], [ys_rastrigin.reshape(X_rastrigin.shape), ys_hat_rastrigin_bound.reshape(X_rastrigin.shape), ys_hat_rastrigin_bundled.reshape(X_rastrigin.shape)], ['Rastrigin Function - True', 'Bound', 'Bundled'])

../../../_images/d377fdcc38117e438c9c138ca0f658f2910638f5f69484cf3695582e64d1e1ae.png

Coding Exercise 1 Discussion#

Why do you think the bundled representation is superior for the Rastrigin function?

Click for solution

Submit your feedback#

Coding Exercise 2: Non-separable Function#

Now, let’s consider a non-separable function: a function \(f(x_1, x_2)\) that cannot be described as the sum of two one-dimensional functions \(g(x_1)\) and \(h_1\). We will examine this function over the domain \([-4,4]^{2}\):

\[f(\mathbf{x}) = \sin(x_{1}x_{2})\]

Fill in the missing parts of the code to get the correct calculation of the defined function.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete the non-separable function.")
###################################################################

def non_separable(x):
    """Compute non-separable function for given array of 2-dimenstional vectors.

    Inputs:
    - x (np.ndarray of shape (n, 2)): n 2-dimensional vectors.

    Outputs:
    - y (np.ndarray of shape (n, 1)): non-separable function value for each of the vectors.
    """
    return np.sin(np.multiply(x[:, ...], x[:, ...]))

x0_non_separable = np.linspace(-4, 4, 100)
X_non_separable, Y_non_separable = np.meshgrid(x0_non_separable,x0_non_separable)
xs_non_separable = np.vstack((X_non_separable.flatten(), Y_non_separable.flatten())).T

ys_non_separable = non_separable(xs_non_separable)

Click for solution

plot_3d_function([X_non_separable],[Y_non_separable], [ys_non_separable.reshape(X_non_separable.shape)], ['Nonseparable Function, $f(\mathbf{x}) = \sin(x_{1}x_{2})$'])

../../../_images/074290f5198856bfab922a405c226433a0a72ffdd5026861dc022f8f3a2859b5.png

Coding Exercise 2 Discussion#

Can you guess by the nature of the function which of the representations will be more efficient?

Click for solution

We will reuse previously defined spaces for encoding bound and bundled representations.

bound_phis = ssp_space.encode(xs_non_separable)
bundle_phis = ssp_space0.encode(xs_non_separable[:,0][:,None]) + ssp_space1.encode(xs_non_separable[:,1][:,None])

train_sizes = np.linspace(0.25, 0.9, 5)
bound_performance, bound_models = test_performance(bound_phis, ys_non_separable, train_sizes)
bundle_performance, bundle_models = test_performance(bundle_phis, ys_non_separable, train_sizes)
plot_performance(bound_performance, bundle_performance, train_sizes * bound_phis.shape[0], title = "Non-separable function - RMSE")

../../../_images/c469261d98673be355192babde26656a865f5f1fb9f8172f964a80f3c900dceb.png

Bundling representation can’t achieve the same quality even when the number of samples is increased. This is because the function is non-separable, and the bundling representation can’t capture the interaction between the two dimensions.

bound_model = bound_models[0]
bundle_model = bundle_models[0]

ys_hat_bound = bound_model.predict(bound_phis)
ys_hat_bundle = bundle_model.predict(bundle_phis)

plot_3d_function([X_non_separable, X_non_separable, X_non_separable], [Y_non_separable, Y_non_separable, Y_non_separable], [ys_non_separable.reshape(X_non_separable.shape), ys_hat_bound.reshape(X_non_separable.shape), ys_hat_bundle.reshape(X_non_separable.shape)], ['Non-separable Function - True', 'Bound', 'Bundled'])

../../../_images/c6e343b792bf51670cbf0dbd6d2c55413c8552ce06867336ac419e109194c881.png

So, as we can see, when we pick the right inductive bias, we can do a better job.

Submit your feedback#

Section 2: Representing Continuous Values#

Estimated timing to here from start of tutorial: 20 minutes

In this section we will use a technique called Fractional Binding to represent continuous values to construct a map of objects distributed over a 2D space.

Video 2: Mapping Intro#

Submit your feedback#

Coding Exercise 3: Mixing Discrete Objects With Continuous Space#

We will store three objects in a vector representing a map. First, we will create 3 objects (a circle, square, and triangle), as we did before.

set_seed(42)

obj_names = ['circle','square','triangle']
discrete_space = sspspace.DiscreteSPSpace(obj_names, ssp_dim=1024)

objs = {n:discrete_space.encode(n) for n in obj_names}

Next, we are going to create three locations where the objects will reside, and an encoder will transform those coordinates into an SSP representation.

set_seed(42)

ssp_space = sspspace.RandomSSPSpace(domain_dim=2, ssp_dim=1024)
positions = np.array([[0, -2],
                      [-2, 3],
                      [3, 2]
                     ])
ssps = {n:ssp_space.encode(x) for n, x in zip(obj_names, positions)}

Next, in order to see where things are on the map, we are going to compute the similarity between encoded places and points in the space. Your task is to complete the calculation of similarity values between all grid points with the one associated with the object.

dim0 = np.linspace(-5, 5, 101)
dim1 = np.linspace(-5, 5, 101)
X,Y = np.meshgrid(dim0, dim1)

query_xs = np.vstack((X.flatten(), Y.flatten())).T
query_ssps = ssp_space.encode(query_xs)

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete similarity calculation.")
###################################################################

sims = []

for obj_idx, obj in enumerate(obj_names):
    sims.append(... @ ssps[obj].flatten())

plt.figure(figsize=(8, 2.4))
plot_2d_similarity(sims, obj_names, (dim0.size, dim1.size))

Click for solution

Example output:

Now, let’s bind these positions with the objects and see how that changes similarity with the map positions. Complete binding operation in the cell below.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete binding operation for objects and corresponding positions.")
###################################################################

#objects are located in `objs` and positions in `ssps`
bound_objects = [... * ... for n in obj_names]

sims = []

for obj_idx, obj in enumerate(obj_names):
    sims.append(query_ssps @ bound_objects[obj_idx].flatten())

plt.figure(figsize=(8, 2.4))
plot_2d_similarity(sims, obj_names, (dim0.size, dim1.size))

Click for solution

Example output:

As you can see, the similarity is destroyed, which is what we should expect.

Next, we are going to create a map out of our bound objects:

\[\begin{align*} \mathrm{map} = \sum_{i=1}^{n} \phi(x_{i})\circledast obj_{i} \end{align*}\]

set_seed(42)

ssp_map = sspspace.SSP(np.sum(bound_objects, axis=0))

Now, we can query the map by unbinding the objects we care about. Your task is to complete the unbinding operation. Then, let’s observe the resulting similarities.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete the unbinding operation.")
###################################################################

objects_sims = []

for obj_idx, obj_name in enumerate(obj_names):
    #query the object name by unbinding it from the map
    query_map = ssp_map * ~objs[...]
    objects_sims.append(query_ssps @ query_map.flatten())

plot_2d_similarity(objects_sims, obj_names, (dim0.size, dim1.size), title_argmax = True)

Click for solution

Example output:

Let’s look at what happens when we unbind all the symbols from the map at once. Complete bundling and unbinding operations in the following code cell.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete the bundling and unbinding operations.")
###################################################################

# unifying bundled representation of all objects
all_objs = (objs['circle'] + objs[...] + objs[...]).normalize()

# unbind this unifying representation from the map
query_map = ... * ~...

sims = query_ssps @ query_map.flatten()
size = (dim0.size,dim1.size)

plot_unbinding_objects_map(sims, positions, query_xs, size)

Click for solution

Example output:

We can also unbind positions and see what objects exist there. We will the locations where objects are located as test positions, as well as two distinct ones to compare. In the final exercise, you should complete the unbinding of the position’s operation.

###################################################################
## Fill out the following then remove
raise NotImplementedError("Student exercise: complete the unbinding operations.")
###################################################################

query_objs = np.vstack([objs[n] for n in obj_names])
test_positions = np.vstack((positions, [0,0], [0,-1.5]))

sims = []

for pos_idx, pos in enumerate(test_positions):
    position_ssp = ssp_space.encode(pos[None,:]) #remember we need to have 2-dimensional vectors for `encode()` function
    #unbind positions from the map
    query_map = ... * ~...
    sims.append(query_objs @ query_map.flatten())

plot_unbinding_positions_map(sims, test_positions, obj_names)

Click for solution

Example output:

As you can see from the above plots, when we query each location, we can clearly identify the object stored at that location.

When we query at the origin (where no object is present), we see that there is no strong candidate element. However, as we move closer to one of the objects (rightmost plot), the similarity starts to increase.

Submit your feedback#

Video 4: Mapping Outro#

Submit your feedback#

Summary#

Estimated timing of tutorial: 40 minutes

Video 5: Conclusions#

Conclusion slides#

If you want to download the slides: 'https://osf.io/download/pxqny'

Tutorial 3: Representations in continuous space

Contents

Tutorial 3: Representations in continuous space#

Tutorial Objectives#

Setup#

Install and import feedback gadget#

Install dependencies#

Imports#

Figure settings#

Plotting functions#

Set random seed#

Helper functions#

Section 1: Sample Efficient Learning#

Video 1: Function Learning and Inductive Bias#

Submit your feedback#

Coding Exercise 1: Additive Function#

Coding Exercise 1 Discussion#

Submit your feedback#

Coding Exercise 2: Non-separable Function#

Coding Exercise 2 Discussion#

Submit your feedback#

Section 2: Representing Continuous Values#

Video 2: Mapping Intro#

Submit your feedback#

Coding Exercise 3: Mixing Discrete Objects With Continuous Space#

Submit your feedback#

Video 4: Mapping Outro#

Submit your feedback#

Summary#

Video 5: Conclusions#

Conclusion slides#

Submit your feedback#