Scikit-Learn GAIuS™ Pipeline Example

import pprint

from ia.gaius.experimental.sklearn import GAIuSClassifier, GDFTransformer
from ia.gaius.manager import AgentManager

from sklearn.datasets import fetch_openml
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import KBinsDiscretizer, StandardScaler

Fetch mnist data in openml format. Each row corresponds to a single MNIST Image

mnist = fetch_openml('mnist_784', version=1, parser='auto')
X =
y =

feature_names = mnist.feature_names
X_train,X_test = X[:60000], X[60000:]
y_train,y_test = y[:60000], y[60000:]

Clear all agents on system

am = AgentManager()

Define pipeline to:

- center and scale MNIST data,
- eliminate features with low variance
- bin the data into integer bins
- Convert to GDF sequence
- Ingest into Cognitive Processor
gaius_pipeline = Pipeline([('scaler', StandardScaler()),
                 ('variance_threshold', VarianceThreshold(0.005)),
                 ('discretizer', KBinsDiscretizer(32, encode='ordinal')),
                 ('gdfer', GDFTransformer(as_vector=True)),
                 ('cp_classifier', GAIuSClassifier(recall_threshold=0.1, max_predictions=5, near_vector_count=3, pred_as_int=False))])
import warnings
# Ignore all user warnings
warnings.filterwarnings("ignore", category=UserWarning)

[9]:[:10000], y_train[:10000])
Pipeline(steps=[('scaler', StandardScaler()),
                ('variance_threshold', VarianceThreshold(threshold=0.005)),
                ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)),
                ('gdfer', GDFTransformer(as_vector=True)),
                 GAIuSClassifier(max_predictions=5, near_vector_count=3,
preds = gaius_pipeline.predict(X=X_test[:1000])
# preds = [str(p) for p in preds]

Print results metrics from pipeline, trained on 10,000 records and testing on 1,000 records


pprint.pp(classification_report(y_true=y_test[:1000], y_pred=preds[:1000]))
('              precision    recall  f1-score   support\n'
 '           0       0.92      0.96      0.94        85\n'
 '           1       0.94      0.99      0.97       126\n'
 '           2       0.94      0.87      0.91       116\n'
 '           3       0.87      0.84      0.85       107\n'
 '           4       0.90      0.86      0.88       110\n'
 '           5       0.86      0.92      0.89        87\n'
 '           6       0.90      0.93      0.92        87\n'
 '           7       0.84      0.91      0.87        99\n'
 '           8       0.89      0.79      0.83        89\n'
 '           9       0.86      0.84      0.85        94\n'
 '    accuracy                           0.89      1000\n'
 '   macro avg       0.89      0.89      0.89      1000\n'
 'weighted avg       0.89      0.89      0.89      1000\n')
{'P1': {'AUTOLEARN': False,
  'PREDICT': True,
  'SLEEPING': False,
  'SNAPSHOT': False,
  'emotives': {},
  'last_learned_model_name': '74e834addc3af2d88aa336db0f67f9a3c5da7009',
  'models_kb': '{KB| objects: 10000}',
  'name': 'P1',
  'num_observe_call': 1,
  'size_WM': 4,
  'target': '',
  'time': 21000,
  'vector_dimensionality': 673,
  'vectors_kb': '{KB| objects: 10000}'}}
