{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Scikit-Learn GAIuS™ Pipeline Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pprint\n", "\n", "from ia.gaius.experimental.sklearn import GAIuSClassifier, GDFTransformer\n", "from ia.gaius.manager import AgentManager\n", "\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.feature_selection import VarianceThreshold\n", "from sklearn.metrics import classification_report\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import KBinsDiscretizer, StandardScaler" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fetch mnist data in openml format. Each row corresponds to a single MNIST Image" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "mnist = fetch_openml('mnist_784', version=1, parser='auto')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "X = mnist.data\n", "y = mnist.target\n", "\n", "feature_names = mnist.feature_names" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "X_train,X_test = X[:60000], X[60000:]\n", "y_train,y_test = y[:60000], y[60000:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clear all agents on system" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "am = AgentManager()\n", "am.kill_all_agents()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define pipeline to:\n", "\n", " - center and scale MNIST data,\n", " - eliminate features with low variance\n", " - bin the data into integer bins\n", " - Convert to GDF sequence\n", " - Ingest into Cognitive Processor" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "gaius_pipeline = Pipeline([('scaler', StandardScaler()),\n", " ('variance_threshold', VarianceThreshold(0.005)),\n", " ('discretizer', KBinsDiscretizer(32, encode='ordinal')),\n", " ('gdfer', GDFTransformer(as_vector=True)),\n", " ('cp_classifier', GAIuSClassifier(recall_threshold=0.1, max_predictions=5, near_vector_count=3, pred_as_int=False))])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'P1': {'AUTOLEARN': False,\n", " 'HYPOTHESIZED': False,\n", " 'PREDICT': True,\n", " 'SLEEPING': False,\n", " 'SNAPSHOT': False,\n", " 'emotives': {},\n", " 'last_learned_model_name': '',\n", " 'models_kb': '{KB| objects: 0}',\n", " 'name': 'P1',\n", " 'num_observe_call': 0,\n", " 'size_WM': 0,\n", " 'target': '',\n", " 'time': 0,\n", " 'vector_dimensionality': -1,\n", " 'vectors_kb': '{KB| objects: 0}'}}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gaius_pipeline.steps[-1][-1].agent.show_status()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "# Ignore all user warnings\n", "warnings.filterwarnings(\"ignore\", category=UserWarning)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "982ac3efc68a495a80eb9f5b59c6ba39", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/10000 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "993f3abcc92240b790662619e057410e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/10000 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Pipeline(steps=[('scaler', StandardScaler()),\n", " ('variance_threshold', VarianceThreshold(threshold=0.005)),\n", " ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)),\n", " ('gdfer', GDFTransformer(as_vector=True)),\n", " ('cp_classifier',\n", " GAIuSClassifier(max_predictions=5, near_vector_count=3,\n", " pred_as_int=False))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('scaler', StandardScaler()),\n", " ('variance_threshold', VarianceThreshold(threshold=0.005)),\n", " ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)),\n", " ('gdfer', GDFTransformer(as_vector=True)),\n", " ('cp_classifier',\n", " GAIuSClassifier(max_predictions=5, near_vector_count=3,\n", " pred_as_int=False))])
StandardScaler()
VarianceThreshold(threshold=0.005)
KBinsDiscretizer(encode='ordinal', n_bins=32)
GDFTransformer(as_vector=True)
GAIuSClassifier(max_predictions=5, near_vector_count=3, pred_as_int=False)