Scikit-Learn GAIuS™ Pipeline Example

[1]:

import pprint

from ia.gaius.experimental.sklearn import GAIuSClassifier, GDFTransformer
from ia.gaius.manager import AgentManager

from sklearn.datasets import fetch_openml
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import KBinsDiscretizer, StandardScaler

Fetch mnist data in openml format. Each row corresponds to a single MNIST Image

[2]:

mnist = fetch_openml('mnist_784', version=1, parser='auto')

[3]:

X = mnist.data
y = mnist.target

feature_names = mnist.feature_names

[4]:

X_train,X_test = X[:60000], X[60000:]
y_train,y_test = y[:60000], y[60000:]

Clear all agents on system

[5]:

am = AgentManager()
am.kill_all_agents()

Define pipeline to:

- center and scale MNIST data,
- eliminate features with low variance
- bin the data into integer bins
- Convert to GDF sequence
- Ingest into Cognitive Processor

[6]:

gaius_pipeline = Pipeline([('scaler', StandardScaler()),
                 ('variance_threshold', VarianceThreshold(0.005)),
                 ('discretizer', KBinsDiscretizer(32, encode='ordinal')),
                 ('gdfer', GDFTransformer(as_vector=True)),
                 ('cp_classifier', GAIuSClassifier(recall_threshold=0.1, max_predictions=5, near_vector_count=3, pred_as_int=False))])

[7]:

gaius_pipeline.steps[-1][-1].agent.show_status()

[7]:

{'P1': {'AUTOLEARN': False,
  'HYPOTHESIZED': False,
  'PREDICT': True,
  'SLEEPING': False,
  'SNAPSHOT': False,
  'emotives': {},
  'last_learned_model_name': '',
  'models_kb': '{KB| objects: 0}',
  'name': 'P1',
  'num_observe_call': 0,
  'size_WM': 0,
  'target': '',
  'time': 0,
  'vector_dimensionality': -1,
  'vectors_kb': '{KB| objects: 0}'}}

[8]:

import warnings
# Ignore all user warnings
warnings.filterwarnings("ignore", category=UserWarning)

[9]:

gaius_pipeline.fit(X_train[:10000], y_train[:10000])

[9]:

Pipeline(steps=[('scaler', StandardScaler()),
                ('variance_threshold', VarianceThreshold(threshold=0.005)),
                ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)),
                ('gdfer', GDFTransformer(as_vector=True)),
                ('cp_classifier',
                 GAIuSClassifier(max_predictions=5, near_vector_count=3,
                                 pred_as_int=False))])

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

[10]:

preds = gaius_pipeline.predict(X=X_test[:1000])

[11]:

# preds = [str(p) for p in preds]

Print results metrics from pipeline, trained on 10,000 records and testing on 1,000 records

[12]:

pprint.pp(classification_report(y_true=y_test[:1000], y_pred=preds[:1000]))

('              precision    recall  f1-score   support\n'
 '\n'
 '           0       0.92      0.96      0.94        85\n'
 '           1       0.94      0.99      0.97       126\n'
 '           2       0.94      0.87      0.91       116\n'
 '           3       0.87      0.84      0.85       107\n'
 '           4       0.90      0.86      0.88       110\n'
 '           5       0.86      0.92      0.89        87\n'
 '           6       0.90      0.93      0.92        87\n'
 '           7       0.84      0.91      0.87        99\n'
 '           8       0.89      0.79      0.83        89\n'
 '           9       0.86      0.84      0.85        94\n'
 '\n'
 '    accuracy                           0.89      1000\n'
 '   macro avg       0.89      0.89      0.89      1000\n'
 'weighted avg       0.89      0.89      0.89      1000\n')

[13]:

gaius_pipeline.steps[-1][-1].agent.show_status()

[13]:

{'P1': {'AUTOLEARN': False,
  'HYPOTHESIZED': False,
  'PREDICT': True,
  'SLEEPING': False,
  'SNAPSHOT': False,
  'emotives': {},
  'last_learned_model_name': '74e834addc3af2d88aa336db0f67f9a3c5da7009',
  'models_kb': '{KB| objects: 10000}',
  'name': 'P1',
  'num_observe_call': 1,
  'size_WM': 4,
  'target': '',
  'time': 21000,
  'vector_dimensionality': 673,
  'vectors_kb': '{KB| objects: 10000}'}}

[ ]: