Scikit-Learn GAIuS™ Pipeline Example
[1]:
import pprint
from ia.gaius.experimental.sklearn import GAIuSClassifier, GDFTransformer
from ia.gaius.manager import AgentManager
from sklearn.datasets import fetch_openml
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
Fetch mnist data in openml format. Each row corresponds to a single MNIST Image
[2]:
mnist = fetch_openml('mnist_784', version=1, parser='auto')
[3]:
X = mnist.data
y = mnist.target
feature_names = mnist.feature_names
[4]:
X_train,X_test = X[:60000], X[60000:]
y_train,y_test = y[:60000], y[60000:]
Clear all agents on system
[5]:
am = AgentManager()
am.kill_all_agents()
Define pipeline to:
- center and scale MNIST data,
- eliminate features with low variance
- bin the data into integer bins
- Convert to GDF sequence
- Ingest into Cognitive Processor
[6]:
gaius_pipeline = Pipeline([('scaler', StandardScaler()),
('variance_threshold', VarianceThreshold(0.005)),
('discretizer', KBinsDiscretizer(32, encode='ordinal')),
('gdfer', GDFTransformer(as_vector=True)),
('cp_classifier', GAIuSClassifier(recall_threshold=0.1, max_predictions=5, near_vector_count=3, pred_as_int=False))])
[7]:
gaius_pipeline.steps[-1][-1].agent.show_status()
[7]:
{'P1': {'AUTOLEARN': False,
'HYPOTHESIZED': False,
'PREDICT': True,
'SLEEPING': False,
'SNAPSHOT': False,
'emotives': {},
'last_learned_model_name': '',
'models_kb': '{KB| objects: 0}',
'name': 'P1',
'num_observe_call': 0,
'size_WM': 0,
'target': '',
'time': 0,
'vector_dimensionality': -1,
'vectors_kb': '{KB| objects: 0}'}}
[8]:
import warnings
# Ignore all user warnings
warnings.filterwarnings("ignore", category=UserWarning)
[9]:
gaius_pipeline.fit(X_train[:10000], y_train[:10000])
[9]:
Pipeline(steps=[('scaler', StandardScaler()), ('variance_threshold', VarianceThreshold(threshold=0.005)), ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)), ('gdfer', GDFTransformer(as_vector=True)), ('cp_classifier', GAIuSClassifier(max_predictions=5, near_vector_count=3, pred_as_int=False))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('scaler', StandardScaler()), ('variance_threshold', VarianceThreshold(threshold=0.005)), ('discretizer', KBinsDiscretizer(encode='ordinal', n_bins=32)), ('gdfer', GDFTransformer(as_vector=True)), ('cp_classifier', GAIuSClassifier(max_predictions=5, near_vector_count=3, pred_as_int=False))])
StandardScaler()
VarianceThreshold(threshold=0.005)
KBinsDiscretizer(encode='ordinal', n_bins=32)
GDFTransformer(as_vector=True)
GAIuSClassifier(max_predictions=5, near_vector_count=3, pred_as_int=False)
[10]:
preds = gaius_pipeline.predict(X=X_test[:1000])
[11]:
# preds = [str(p) for p in preds]
Print results metrics from pipeline, trained on 10,000 records and testing on 1,000 records
[12]:
pprint.pp(classification_report(y_true=y_test[:1000], y_pred=preds[:1000]))
(' precision recall f1-score support\n'
'\n'
' 0 0.92 0.96 0.94 85\n'
' 1 0.94 0.99 0.97 126\n'
' 2 0.94 0.87 0.91 116\n'
' 3 0.87 0.84 0.85 107\n'
' 4 0.90 0.86 0.88 110\n'
' 5 0.86 0.92 0.89 87\n'
' 6 0.90 0.93 0.92 87\n'
' 7 0.84 0.91 0.87 99\n'
' 8 0.89 0.79 0.83 89\n'
' 9 0.86 0.84 0.85 94\n'
'\n'
' accuracy 0.89 1000\n'
' macro avg 0.89 0.89 0.89 1000\n'
'weighted avg 0.89 0.89 0.89 1000\n')
[13]:
gaius_pipeline.steps[-1][-1].agent.show_status()
[13]:
{'P1': {'AUTOLEARN': False,
'HYPOTHESIZED': False,
'PREDICT': True,
'SLEEPING': False,
'SNAPSHOT': False,
'emotives': {},
'last_learned_model_name': '74e834addc3af2d88aa336db0f67f9a3c5da7009',
'models_kb': '{KB| objects: 10000}',
'name': 'P1',
'num_observe_call': 1,
'size_WM': 4,
'target': '',
'time': 21000,
'vector_dimensionality': 673,
'vectors_kb': '{KB| objects: 10000}'}}
[ ]: