{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"execution": {},
"id": "view-in-github"
},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"execution": {}
},
"source": [
"# Tutorial 4: Biological meta reinforcement learning \n",
"\n",
"**Week 2, Day 4: Macro-Learning**\n",
"\n",
"**By Neuromatch Academy**\n",
"\n",
"__Content creators:__ Hlib Solodzhuk, Ximeng Mao, Grace Lindsay\n",
"\n",
"__Content reviewers:__ Aakash Agrawal, Alish Dipani, Hossein Rezaei, Yousef Ghanbari, Mostafa Abdollahi, Hlib Solodzhuk, Ximeng Mao, Samuele Bolotta, Grace Lindsay\n",
"\n",
"__Production editors:__ Konstantine Tsafatinos, Ella Batty, Spiros Chavlis, Samuele Bolotta, Hlib Solodzhuk\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"execution": {}
},
"source": [
"___\n",
"\n",
"\n",
"# Tutorial Objectives\n",
"\n",
"*Estimated timing of tutorial: 70 minutes*\n",
"\n",
"In this tutorial, you will observe how meta-learning may occur in the brain, specifically through reinforcement learning and the Baldwin effect."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"remove-input"
]
},
"outputs": [],
"source": [
"# @markdown\n",
"from IPython.display import IFrame\n",
"from ipywidgets import widgets\n",
"out = widgets.Output()\n",
"with out:\n",
" print(f\"If you want to download the slides: https://osf.io/download/t36w8/\")\n",
" display(IFrame(src=f\"https://mfr.ca-1.osf.io/render?url=https://osf.io/t36w8/?direct%26mode=render%26action=download%26mode=render\", width=730, height=410))\n",
"display(out)"
]
},
{
"cell_type": "markdown",
"metadata": {
"execution": {}
},
"source": [
"---\n",
"# Setup\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install and import feedback gadget\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"# @title Install and import feedback gadget\n",
"\n",
"!pip install numpy matplotlib ipywidgets jupyter_ui_poll torch tqdm vibecheck datatops --quiet\n",
"\n",
"from vibecheck import DatatopsContentReviewContainer\n",
"def content_review(notebook_section: str):\n",
" return DatatopsContentReviewContainer(\n",
" \"\", # No text prompt\n",
" notebook_section,\n",
" {\n",
" \"url\": \"https://pmyvdlilci.execute-api.us-east-1.amazonaws.com/klab\",\n",
" \"name\": \"neuromatch_neuroai\",\n",
" \"user_key\": \"wb2cxze8\",\n",
" },\n",
" ).render()\n",
"\n",
"\n",
"feedback_prefix = \"W2D4_T4\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"# @title Imports\n",
"\n",
"#working with data\n",
"import numpy as np\n",
"import random\n",
"\n",
"#plotting\n",
"import matplotlib.pyplot as plt\n",
"import logging\n",
"\n",
"#interactive display\n",
"import ipywidgets as widgets\n",
"from IPython.display import display, clear_output\n",
"from jupyter_ui_poll import ui_events\n",
"import time\n",
"from tqdm import tqdm\n",
"\n",
"#modeling\n",
"import copy\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"from torch.autograd import Variable"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Figure settings\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"# @title Figure settings\n",
"\n",
"logging.getLogger('matplotlib.font_manager').disabled = True\n",
"\n",
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'retina' # perfrom high definition rendering for images and plots\n",
"plt.style.use(\"https://raw.githubusercontent.com/NeuromatchAcademy/course-content/main/nma.mplstyle\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting functions\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"# @title Plotting functions\n",
"\n",
"def plot_cumulative_rewards(rewards):\n",
" \"\"\"\n",
" Plot the cumulative rewards over time.\n",
"\n",
" Inputs:\n",
" - rewards (list): list containing the cumulative rewards at each time step.\n",
" \"\"\"\n",
" with plt.xkcd():\n",
" plt.plot(range(len(rewards)), rewards)\n",
" plt.xlabel('Time Step')\n",
" plt.ylabel('Cumulative Reward')\n",
" plt.title('Cumulative Reward Over Time')\n",
" plt.show()\n",
"\n",
"\n",
"def plot_boxplot_scores(scores):\n",
" \"\"\"\n",
" Plots a boxplot of the given scores.\n",
"\n",
" Inputs:\n",
" scores (list): list of scores.\n",
" \"\"\"\n",
" with plt.xkcd():\n",
" plt.boxplot(scores, labels = [''])\n",
" plt.xlabel('')\n",
" plt.ylabel('Score')\n",
" plt.title('Distribution of Scores')\n",
" plt.show()\n",
"\n",
"def plot_two_boxplot_scores(newbie_scores, experienced_scores):\n",
" \"\"\"\n",
" Plots two boxplots of the given scores.\n",
"\n",
" Inputs:\n",
" scores (list): list of scores.\n",
" \"\"\"\n",
" with plt.xkcd():\n",
" plt.boxplot([newbie_scores, experienced_scores], labels=['Newbie', 'Experienced'])\n",
" plt.xlabel('Agent')\n",
" plt.ylabel('Score')\n",
" plt.title('Distribution of Scores')\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper functions\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"execution": {},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"# @title Helper functions\n",
"\n",
"def generate_symbols():\n",
" \"\"\"\n",
" Generate random symbols for playing Harlow experiment.\n",
"\n",
" Outputs:\n",
" - symbols (list): list of symbols.\n",
" \"\"\"\n",
" symbols = []\n",
" symbol_types = ['circle', 'square', 'triangle', 'star', 'pentagon', 'hexagon', 'octagon', 'diamond', 'arrow', 'rectangle']\n",
" symbol_types = np.random.permutation(symbol_types)\n",
"\n",
" for symbol_type in symbol_types:\n",
" color = np.random.choice(['red', 'blue', 'green', 'yellow'])\n",
"\n",
" if symbol_type == 'circle':\n",
" symbol = plt.Circle((0.5, 0.5), 0.3, color=color)\n",
" elif symbol_type == 'square':\n",
" symbol = plt.Rectangle((0.2, 0.2), 0.6, 0.6, color=color)\n",
" elif symbol_type == 'triangle':\n",
" symbol = plt.Polygon([(0.2, 0.2), (0.5, 0.8), (0.8, 0.2)], closed=True, color=color)\n",
" elif symbol_type == 'star':\n",
" symbol = plt.Polygon([(0.5, 1), (0.6, 0.7), (0.8, 0.7), (0.65, 0.5), (0.75, 0.3),\n",
" (0.5, 0.45), (0.25, 0.3), (0.35, 0.5), (0.2, 0.7), (0.4, 0.7)], closed=True, color=color)\n",
" elif symbol_type == 'pentagon':\n",
" symbol = plt.Polygon([(0.5 + 0.2*np.cos(2*np.pi*i/5), 0.5 + 0.2*np.sin(2*np.pi*i/5)) for i in range(5)], closed=True, color=color)\n",
" elif symbol_type == 'hexagon':\n",
" symbol = plt.Polygon([(0.5 + 0.2*np.cos(2*np.pi*i/6), 0.5 + 0.2*np.sin(2*np.pi*i/6)) for i in range(6)], closed=True, color=color)\n",
" elif symbol_type == 'octagon':\n",
" symbol = plt.Polygon([(0.5 + 0.2*np.cos(2*np.pi*i/8), 0.5 + 0.2*np.sin(2*np.pi*i/8)) for i in range(8)], closed=True, color=color)\n",
" elif symbol_type == 'diamond':\n",
" symbol = plt.Polygon([(0.5, 0.7), (0.3, 0.5), (0.5, 0.3), (0.7, 0.5)], closed=True, color=color)\n",
" elif symbol_type == 'arrow':\n",
" symbol = plt.Polygon([(0.3, 0.3), (0.5, 0.7), (0.7, 0.3), (0.5, 0.5)], closed=True, color=color)\n",
" elif symbol_type == 'rectangle':\n",
" symbol = plt.Rectangle((0.4, 0.2), 0.2, 0.6, color=color)\n",
" symbols.append(symbol)\n",
"\n",
" return symbols\n",
"\n",
"def run_dummy_agent(env):\n",
" \"\"\"\n",
" Implement dummy agent strategy: chooses the last rewarded action.\n",
"\n",
" Inputs:\n",
" - env (HarlowExperimentEnv): An environment.\n",
" \"\"\"\n",
" action = 0\n",
" cumulative_reward = 0\n",
" rewards = [cumulative_reward]\n",
"\n",
" for _ in (range(num_trials)):\n",
" _, reward = env.step(action)\n",
" cumulative_reward += reward\n",
" rewards.append(cumulative_reward)\n",
"\n",
" #dummy agent\n",
" if reward == -1:\n",
" action = 1 - action\n",
" return rewards\n",
"\n",
"def game():\n",
" \"\"\"\n",
" Create interactive game which resembles one famous experiment!\n",
" \"\"\"\n",
" total_reward = 0\n",
" symbols = generate_symbols()\n",
" message = \"Start of the game!\"\n",
" total_attempts = 5 * 6 # Assuming 5 sets with 6 attempts each\n",
"\n",
" left_button = widgets.Button(description=\"Left\")\n",
" right_button = widgets.Button(description=\"Right\")\n",
" button_box = widgets.HBox([left_button, right_button])\n",
"\n",
" def define_choice(button):\n",
" \"\"\"\n",
" Change `choice` variable with respect to the pressed button.\n",
" \"\"\"\n",
" nonlocal choice\n",
" display(widgets.HTML(f\"