import json, base64, joblib, io, os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
confusion_matrix, ConfusionMatrixDisplay,
classification_report, r2_score, mean_absolute_error, mean_squared_error
)
[docs]
class Gam_Ai_Workflow:
"""
Intelligent, type-aware workflow manager for `.gam_ai` model packages.
This class provides a unified interface for loading, evaluating, refitting,
and visualizing machine learning models saved in the `.gam_ai` format.
It automatically detects the model type (classifier, regressor, or
unsupervised) and routes the evaluation pipeline accordingly.
Parameters
----------
model_name : str
The name (without extension) of the `.gam_ai` file to be loaded.
base_dir : str, optional, default="gam_models"
Directory containing saved `.gam_ai` model files.
Raises
------
FileNotFoundError
If the specified `.gam_ai` file cannot be located within `base_dir`.
Notes
-----
This class depends on the `GamAI_io` handler for deserializing `.gam_ai` files.
Each `.gam_ai` file contains both model metadata and a serialized model object.
Once loaded, `Gam_Ai_Workflow` provides:
- Smart evaluation (`evaluate()`)
- Visualization (`visualize_unsupervised()`)
- Refit capabilities (`refit()`)
- Summaries (`summary()`)
Examples
--------
>>> workflow = Gam_Ai_Workflow("rf_classifier_v1", base_dir="models")
β
Loaded model 'rf_classifier_v1' (classifier) successfully.
>>>
>>> workflow.summary()
π MODEL SUMMARY
model_name: rf_classifier_v1
model_type: classifier
author_name: John Doe
best_accuracy: 0.94
...
>>>
>>> workflow.evaluate(X_test, y_test)
π― Accuracy: 0.9470
π Classification Report:
...
"""
def __init__(self, model_name, base_dir=None):
self.model_name = model_name
self.gam=GAM_AI_MODEL(model_name,base_dir=base_dir)
self.ml_model = self.gam.ml_model
self.model_type = self.gam.model_type.lower()
print(f"β
Loaded model '{self.model_name}' ({self.model_type}) successfully.")
# ---------- General ----------
[docs]
def summary(self):
"""
Display a concise summary of the loaded model and its metadata.
Prints all metadata fields stored in the `.gam_ai` file,
including model type, author information, and hyperparameters.
Returns
-------
None
"""
print("\nπ MODEL SUMMARY")
for k, v in self.gam.__dict__.items():
if k != "model":
print(f"{k}: {v}")
[docs]
def predict(self, X):
"""
Run model inference on input data.
Parameters
----------
X : array-like
Input features compatible with the trained model.
Returns
-------
np.ndarray
Model predictions corresponding to `X`.
Raises
------
ValueError
If no model is loaded.
"""
if self.ml_model is None:
raise ValueError("Model not loaded.")
return self.ml_model.predict(X)
[docs]
def refit(self, X, y):
"""
Retrain (refit) the loaded model on new data.
Parameters
----------
X : array-like
Training features.
y : array-like
Corresponding training labels or targets.
Raises
------
NotImplementedError
If the model does not support the `.fit()` method.
Notes
-----
This method modifies the current model in place and does not automatically
update the `.gam_ai` file on disk. To persist changes, re-save the model
using `GamAI_io.save()` after refitting.
"""
if hasattr(self.ml_model, "fit"):
self.ml_model.fit(X, y)
print("π Model refitted successfully.")
else:
raise NotImplementedError("This model type cannot be refitted.")
# ---------- Classifier ----------
[docs]
def evaluate_classifier(self, X, y_true):
"""
Evaluate a classification model and display key performance metrics.
Parameters
----------
X : array-like
Input test features.
y_true : array-like
Ground-truth class labels.
Returns
-------
None
Notes
-----
This method computes and displays:
- Accuracy score
- Classification report
- Confusion matrix plot
"""
y_pred = self.ml_model.predict(X)
acc = np.mean(y_pred == y_true)
print(f"π― Accuracy: {acc:.4f}\n")
print("π Classification Report:\n")
print(classification_report(y_true, y_pred))
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(cm)
disp.plot(cmap="Blues")
plt.title(f"Confusion Matrix: {self.gam.model_name}")
plt.show()
# ---------- Regressor ----------
[docs]
def evaluate_regressor(self, X, y_true):
"""
Evaluate a regression model and visualize performance trends.
Parameters
----------
X : array-like
Input test features.
y_true : array-like
True continuous target values.
Returns
-------
None
Notes
-----
This method prints and plots:
- Coefficient of determination (RΒ²)
- Mean Absolute Error (MAE)
- Mean Squared Error (MSE)
- Predicted vs. True value scatter plot
"""
y_pred = self.ml_model.predict(X)
r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
print(f"π RΒ²: {r2:.4f}")
print(f"π MAE: {mae:.4f}")
print(f"π MSE: {mse:.4f}\n")
plt.figure(figsize=(6, 4))
plt.scatter(y_true, y_pred, alpha=0.6)
plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], 'r--')
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title(f"Prediction Trend β {self.gam.model_name}")
plt.grid(True)
plt.show()
# ---------- Unsupervised ----------
[docs]
def visualize_unsupervised(self, X):
"""
Visualize cluster assignments or feature transformations for unsupervised models.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data to visualize.
Returns
-------
None
Raises
------
NotImplementedError
If the model lacks both `predict()` and `transform()` methods.
Notes
-----
- If the model has a `.predict()` method, cluster assignments are plotted.
- If the model has a `.transform()` method, the transformed feature space is shown.
- This visualization assumes that the first two components or features
are suitable for 2D projection.
"""
if hasattr(self.ml_model, "predict"):
y_pred = self.ml_model.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap="viridis", s=30)
plt.title(f"Cluster Visualization β {self.gam.model_name}")
plt.show()
elif hasattr(self.ml_model, "transform"):
X_trans = self.ml_model.transform(X)
plt.scatter(X_trans[:, 0], X_trans[:, 1], s=30)
plt.title(f"Feature Space β {self.gam.model_name}")
plt.show()
else:
raise NotImplementedError("This unsupervised model has no visualization method.")
# ---------- Smart Evaluation ----------
[docs]
def evaluate(self, X, y_true=None):
"""
Automatically dispatch model evaluation based on its declared type.
Parameters
----------
X : array-like
Input data.
y_true : array-like, optional
Ground-truth labels or target values (required for supervised models).
Raises
------
ValueError
If the model type is not recognized.
Notes
-----
This method intelligently determines which evaluation routine to run:
- `evaluate_classifier()` for classification models
- `evaluate_regressor()` for regression models
- `visualize_unsupervised()` for unsupervised models
Examples
--------
>>> workflow.evaluate(X_test, y_test)
π― Accuracy: 0.9470
π Classification Report:
...
"""
if self.model_type == "classifier":
self.evaluate_classifier(X, y_true)
elif self.model_type == "regressor":
self.evaluate_regressor(X, y_true)
elif self.model_type == "unsupervised":
self.visualize_unsupervised(X)
else:
raise ValueError(f"Unknown model_type: {self.model_type}")
[docs]
def get_GAM_AI_MODEL(self):
"""
Retrieve the fully loaded `.gam_ai` model instance associated with this workflow.
Returns
-------
GAM_AI_MODEL
The `GAM_AI_MODEL` object currently managed by this workflow instance.
This object encapsulates both the modelβs metadata (e.g., author info,
training details, performance metrics) and the deserialized scikit-learn model
accessible via the attribute `ml_model`.
Examples
--------
>>> workflow = Gam_Ai_Workflow("cu-nanocomposites-poisson-ratio-lr")
β
Loaded model 'cu-nanocomposites-poisson-ratio-lr' (train/test) successfully.
>>> gam_model = workflow.get_GAM_AI_MODEL()
>>> type(gam_model)
<class 'PyGamLab.ai_core.gam_ai.GAM_AI_MODEL'>
>>> gam_model.summary()
π MODEL METADATA SUMMARY
model_name: cu-nanocomposites-poisson-ratio-lr
author_name: Shaoyu Zhao, Yingyan Zhang, Yihe Zhang et al.
best_accuracy: {'MAE': 0.0541, 'MSE': 0.0042, 'R2': 0.39}
βοΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'>
Notes
-----
This method serves as a safe accessor for the underlying `GAM_AI_MODEL` instance
(`self.gam`) loaded during `Gam_Ai_Workflow` initialization.
It can be used to directly inspect model metadata, retrieve the raw ML model,
or perform low-level analysis without invoking higher-level workflow methods.
"""
return self.gam
import os, json, io, base64, joblib
class GAM_AI_MODEL:
"""
A unified data structure for loading and representing `.gam_ai` model packages.
The `.gam_ai` format encapsulates both the machine learning model (serialized
via `joblib` and encoded in Base64) and its accompanying metadata.
This class provides a standardized interface for deserializing, inspecting,
and utilizing such packaged models within the PyGamLab AI ecosystem.
The class dynamically attaches metadata fields (e.g., `author_name`,
`model_type`, `best_accuracy`, etc.) as instance attributes and exposes
the trained ML model under the attribute `ml_model`.
Parameters
----------
model_name : str
The name of the model file (without the `.gam_ai` extension) to be loaded.
base_dir : str, optional
The base directory containing `.gam_ai` model files. If not provided,
the class automatically defaults to the `gam_models` directory located
alongside this module file.
Attributes
----------
model_name : str
The identifier of the loaded `.gam_ai` model.
file_path : str
Full path to the `.gam_ai` file on disk.
ml_model : object
The deserialized scikit-learn model instance (e.g., LinearRegression, RandomForestRegressor, etc.).
<dynamic_metadata_fields> : Any
All key-value pairs from the `"metadata"` section of the `.gam_ai` file are
dynamically added as attributes (e.g., `author_name`, `description`, `best_accuracy`, etc.).
Raises
------
FileNotFoundError
If the specified `.gam_ai` file cannot be found at the resolved `file_path`.
Examples
--------
>>> from PyGamLab.ai_core.gam_ai import GAM_AI_MODEL
>>> model = GAM_AI_MODEL("cu-nanocomposites-poisson-ratio-lr")
β
Loaded GAM_AI_MODEL: 'cu-nanocomposites-poisson-ratio-lr'
>>> model.summary()
π MODEL METADATA SUMMARY
model_name: cu-nanocomposites-poisson-ratio-lr
model_type: train/test
author_name: Shaoyu Zhao, Yingyan Zhang, Yihe Zhang et al.
best_accuracy: {'MAE': 0.0541, 'MSE': 0.0042, ...}
βοΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'>
Notes
-----
- The `.gam_ai` file format is designed to preserve reproducibility of ML experiments.
- Metadata is stored in JSON format, while the ML model itself is serialized using `joblib`
and encoded with Base64 for portability.
- This class does not train or evaluate models; it only loads and interprets
pre-trained model artifacts.
See Also
--------
Gam_Ai_Workflow : High-level workflow manager that builds upon this class to
provide evaluation, visualization, and retraining utilities.
"""
def __init__(self, model_name, base_dir=None):
self.model_name = model_name
# Default directory if none provided
if base_dir is None:
base_dir = os.path.join(os.path.dirname(__file__), "gam_models")
self.file_path = os.path.join(base_dir, f"{model_name}.gam_ai")
if not os.path.exists(self.file_path):
raise FileNotFoundError(f"β File not found: {self.file_path}")
# --- Load JSON file ---
with open(self.file_path, "r") as f:
data = json.load(f)
metadata = data.get("metadata", {})
model_data = data.get("model_data", None)
# --- Assign metadata attributes dynamically ---
for key, value in metadata.items():
setattr(self, key, value)
# --- Decode Base64 model ---
if model_data:
model_bytes = base64.b64decode(model_data)
buffer = io.BytesIO(model_bytes)
self.ml_model = joblib.load(buffer)
else:
self.ml_model = None
print(f"β
Loaded GAM_AI_MODEL: '{self.model_name}'")
def summary(self):
"""
Print a structured overview of all model metadata.
This method displays the dynamically loaded metadata attributes
(e.g., author information, accuracy metrics, DOI, etc.) and the
associated machine learning model type.
Examples
--------
>>> model.summary()
π MODEL METADATA SUMMARY
model_name: cu-nanocomposites-poisson-ratio-lr
model_type: train/test
author_name: Shaoyu Zhao
best_accuracy: {'MAE': 0.054, 'MSE': 0.0042}
βοΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'>
"""
print("\nπ MODEL METADATA SUMMARY\n")
for k, v in self.__dict__.items():
if k not in ("ml_model", "file_path"):
print(f"{k}: {v}")
print("\nβοΈ ML Model:", type(self.ml_model))
[docs]
class GamAI_io:
"""
A unified input/output handler for saving and loading `.gam_ai` model packages.
This class encapsulates both machine learning model serialization and
relevant metadata (e.g., author info, model parameters, training details)
into a single portable `.gam_ai` file. It enables seamless model deployment,
archival, and reproducibility by combining the binary model object and
human-readable metadata in one structured JSON container.
Parameters
----------
model_name : str, optional
A short, descriptive name for the model. Used as the filename during saving.
model_type : str, optional
The algorithmic or architectural family of the model
(e.g., "RandomForest", "XGBoost", "CNN", "Transformer").
description : str, optional, default=""
A brief summary describing the modelβs purpose, training dataset,
or key features.
author_name : str, optional, default=""
Full name of the model creator.
author_email : str, optional, default=""
Contact email for correspondence or citation.
trainer_name : str, optional, default=""
The individual or system responsible for model training.
best_accuracy : float, optional
The highest validation or test accuracy achieved during training.
doi : str, optional
Digital Object Identifier (DOI) associated with the published model or dataset.
hyperparam_range : dict, optional, default={}
Dictionary defining hyperparameter search ranges used during optimization.
best_params : dict, optional, default={}
Dictionary of the final optimized hyperparameter values.
ml_model : object, optional
The trained machine learning model instance (e.g., scikit-learn estimator).
Notes
-----
The `.gam_ai` file format consists of a JSON object with two main sections:
- **metadata**: Contains descriptive fields such as author, model type,
and hyperparameters.
- **model_data**: Contains the Base64-encoded binary serialization of the
trained model, produced via `joblib`.
This approach ensures full portability and JSON readability, enabling
both programmatic and manual inspection of model metadata.
Examples
--------
>>> from gamai_io import GamAI_io
>>> from sklearn.ensemble import RandomForestClassifier
>>> model = RandomForestClassifier(n_estimators=100, random_state=42)
>>> model.fit(X_train, y_train)
>>>
>>> package = GamAI_io(
... model_name="rf_classifier_v1",
... model_type="RandomForest",
... description="Predicts material phases using compositional data",
... author_name="John Doe",
... author_email="john.doe@example.com",
... best_accuracy=0.94,
... hyperparam_range={"n_estimators": [50, 100, 200]},
... best_params={"n_estimators": 100, "max_depth": 10},
... ml_model=model
... )
>>>
>>> # Save to file
>>> package.save(save_dir="models")
πΎ Saved ml_model package: models/rf_classifier_v1.gam_ai
>>>
>>> # Load from file
>>> loaded_package = GamAI_io.load("models/rf_classifier_v1.gam_ai")
>>> restored_model = loaded_package.ml_model
>>> restored_model.predict(X_test[:5])
See Also
--------
joblib.dump : Efficient serialization of Python objects.
json : Standard JSON encoder/decoder.
base64 : Encoding binary model data for safe JSON storage.
"""
def __init__(self, **kwargs):
self.model_name = kwargs.get("model_name")
self.model_type = kwargs.get("model_type")
self.description = kwargs.get("description", "")
self.author_name = kwargs.get("author_name", "")
self.author_email = kwargs.get("author_email", "")
self.trainer_name = kwargs.get("trainer_name", "")
self.best_accuracy = kwargs.get("best_accuracy", None)
self.doi=kwargs.get("doi", None)
self.hyperparam_range = kwargs.get("hyperparam_range", {})
self.best_params = kwargs.get("best_params", {})
self.ml_model = kwargs.get("ml_model", None)
# ----------- Save ----------
[docs]
def save(self, save_dir="models"):
"""
Serialize and save the current model and metadata as a `.gam_ai` package.
Parameters
----------
save_dir : str, optional, default="models"
The target directory to save the `.gam_ai` file.
The directory will be created if it does not exist.
Raises
------
ValueError
If no model (`ml_model`) is attached to the current instance.
Notes
-----
The model is serialized via `joblib` and encoded with Base64 to
ensure JSON compatibility. The resulting file can be safely shared
or uploaded to repositories without binary corruption.
"""
os.makedirs(save_dir, exist_ok=True)
filename = f"{self.model_name}.gam_ai"
filepath = os.path.join(save_dir, filename)
if self.ml_model is None:
raise ValueError("No ml_model attached to save inside .gam_ai.")
buffer = io.BytesIO()
joblib.dump(self.ml_model, buffer)
buffer.seek(0)
encoded_model = base64.b64encode(buffer.read()).decode("utf-8")
data = {
"metadata": {k: v for k, v in self.__dict__.items() if k != "ml_model"},
"model_data": encoded_model
}
with open(filepath, "w") as f:
json.dump(data, f, indent=4)
print(f"πΎ Saved ml_model package: {filepath}")
# ----------- Load ----------
[docs]
@staticmethod
def load(filepath):
"""
Load a `.gam_ai` model package from disk.
Parameters
----------
filepath : str
Full path to the `.gam_ai` file to be loaded.
Returns
-------
GamAI_io
A `GamAI_io` instance containing both metadata and the
deserialized machine learning model (`ml_model`).
Notes
-----
The loading process reverses the Base64 encoding and `joblib`
serialization to reconstruct the original model object.
"""
with open(filepath, "r") as f:
data = json.load(f)
metadata = data["metadata"]
model_data = base64.b64decode(data["model_data"])
buffer = io.BytesIO(model_data)
model = joblib.load(buffer)
return GamAI_io(**metadata, model=model)