Source code for PyGamLab.ai_core.gam_ai

import json, base64, joblib, io, os

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    confusion_matrix, ConfusionMatrixDisplay,
    classification_report, r2_score, mean_absolute_error, mean_squared_error
)


  

[docs] class Gam_Ai_Workflow: """ Intelligent, type-aware workflow manager for `.gam_ai` model packages. This class provides a unified interface for loading, evaluating, refitting, and visualizing machine learning models saved in the `.gam_ai` format. It automatically detects the model type (classifier, regressor, or unsupervised) and routes the evaluation pipeline accordingly. Parameters ---------- model_name : str The name (without extension) of the `.gam_ai` file to be loaded. base_dir : str, optional, default="gam_models" Directory containing saved `.gam_ai` model files. Raises ------ FileNotFoundError If the specified `.gam_ai` file cannot be located within `base_dir`. Notes ----- This class depends on the `GamAI_io` handler for deserializing `.gam_ai` files. Each `.gam_ai` file contains both model metadata and a serialized model object. Once loaded, `Gam_Ai_Workflow` provides: - Smart evaluation (`evaluate()`) - Visualization (`visualize_unsupervised()`) - Refit capabilities (`refit()`) - Summaries (`summary()`) Examples -------- >>> workflow = Gam_Ai_Workflow("rf_classifier_v1", base_dir="models") βœ… Loaded model 'rf_classifier_v1' (classifier) successfully. >>> >>> workflow.summary() πŸ“˜ MODEL SUMMARY model_name: rf_classifier_v1 model_type: classifier author_name: John Doe best_accuracy: 0.94 ... >>> >>> workflow.evaluate(X_test, y_test) 🎯 Accuracy: 0.9470 πŸ“Š Classification Report: ... """ def __init__(self, model_name, base_dir=None): self.model_name = model_name self.gam=GAM_AI_MODEL(model_name,base_dir=base_dir) self.ml_model = self.gam.ml_model self.model_type = self.gam.model_type.lower() print(f"βœ… Loaded model '{self.model_name}' ({self.model_type}) successfully.") # ---------- General ----------
[docs] def summary(self): """ Display a concise summary of the loaded model and its metadata. Prints all metadata fields stored in the `.gam_ai` file, including model type, author information, and hyperparameters. Returns ------- None """ print("\nπŸ“˜ MODEL SUMMARY") for k, v in self.gam.__dict__.items(): if k != "model": print(f"{k}: {v}")
[docs] def predict(self, X): """ Run model inference on input data. Parameters ---------- X : array-like Input features compatible with the trained model. Returns ------- np.ndarray Model predictions corresponding to `X`. Raises ------ ValueError If no model is loaded. """ if self.ml_model is None: raise ValueError("Model not loaded.") return self.ml_model.predict(X)
[docs] def refit(self, X, y): """ Retrain (refit) the loaded model on new data. Parameters ---------- X : array-like Training features. y : array-like Corresponding training labels or targets. Raises ------ NotImplementedError If the model does not support the `.fit()` method. Notes ----- This method modifies the current model in place and does not automatically update the `.gam_ai` file on disk. To persist changes, re-save the model using `GamAI_io.save()` after refitting. """ if hasattr(self.ml_model, "fit"): self.ml_model.fit(X, y) print("πŸ” Model refitted successfully.") else: raise NotImplementedError("This model type cannot be refitted.")
# ---------- Classifier ----------
[docs] def evaluate_classifier(self, X, y_true): """ Evaluate a classification model and display key performance metrics. Parameters ---------- X : array-like Input test features. y_true : array-like Ground-truth class labels. Returns ------- None Notes ----- This method computes and displays: - Accuracy score - Classification report - Confusion matrix plot """ y_pred = self.ml_model.predict(X) acc = np.mean(y_pred == y_true) print(f"🎯 Accuracy: {acc:.4f}\n") print("πŸ“Š Classification Report:\n") print(classification_report(y_true, y_pred)) cm = confusion_matrix(y_true, y_pred) disp = ConfusionMatrixDisplay(cm) disp.plot(cmap="Blues") plt.title(f"Confusion Matrix: {self.gam.model_name}") plt.show()
# ---------- Regressor ----------
[docs] def evaluate_regressor(self, X, y_true): """ Evaluate a regression model and visualize performance trends. Parameters ---------- X : array-like Input test features. y_true : array-like True continuous target values. Returns ------- None Notes ----- This method prints and plots: - Coefficient of determination (RΒ²) - Mean Absolute Error (MAE) - Mean Squared Error (MSE) - Predicted vs. True value scatter plot """ y_pred = self.ml_model.predict(X) r2 = r2_score(y_true, y_pred) mae = mean_absolute_error(y_true, y_pred) mse = mean_squared_error(y_true, y_pred) print(f"πŸ“ˆ RΒ²: {r2:.4f}") print(f"πŸ“‰ MAE: {mae:.4f}") print(f"πŸ“‰ MSE: {mse:.4f}\n") plt.figure(figsize=(6, 4)) plt.scatter(y_true, y_pred, alpha=0.6) plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], 'r--') plt.xlabel("True Values") plt.ylabel("Predicted Values") plt.title(f"Prediction Trend β€” {self.gam.model_name}") plt.grid(True) plt.show()
# ---------- Unsupervised ----------
[docs] def visualize_unsupervised(self, X): """ Visualize cluster assignments or feature transformations for unsupervised models. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data to visualize. Returns ------- None Raises ------ NotImplementedError If the model lacks both `predict()` and `transform()` methods. Notes ----- - If the model has a `.predict()` method, cluster assignments are plotted. - If the model has a `.transform()` method, the transformed feature space is shown. - This visualization assumes that the first two components or features are suitable for 2D projection. """ if hasattr(self.ml_model, "predict"): y_pred = self.ml_model.predict(X) plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap="viridis", s=30) plt.title(f"Cluster Visualization β€” {self.gam.model_name}") plt.show() elif hasattr(self.ml_model, "transform"): X_trans = self.ml_model.transform(X) plt.scatter(X_trans[:, 0], X_trans[:, 1], s=30) plt.title(f"Feature Space β€” {self.gam.model_name}") plt.show() else: raise NotImplementedError("This unsupervised model has no visualization method.")
# ---------- Smart Evaluation ----------
[docs] def evaluate(self, X, y_true=None): """ Automatically dispatch model evaluation based on its declared type. Parameters ---------- X : array-like Input data. y_true : array-like, optional Ground-truth labels or target values (required for supervised models). Raises ------ ValueError If the model type is not recognized. Notes ----- This method intelligently determines which evaluation routine to run: - `evaluate_classifier()` for classification models - `evaluate_regressor()` for regression models - `visualize_unsupervised()` for unsupervised models Examples -------- >>> workflow.evaluate(X_test, y_test) 🎯 Accuracy: 0.9470 πŸ“Š Classification Report: ... """ if self.model_type == "classifier": self.evaluate_classifier(X, y_true) elif self.model_type == "regressor": self.evaluate_regressor(X, y_true) elif self.model_type == "unsupervised": self.visualize_unsupervised(X) else: raise ValueError(f"Unknown model_type: {self.model_type}")
[docs] def get_GAM_AI_MODEL(self): """ Retrieve the fully loaded `.gam_ai` model instance associated with this workflow. Returns ------- GAM_AI_MODEL The `GAM_AI_MODEL` object currently managed by this workflow instance. This object encapsulates both the model’s metadata (e.g., author info, training details, performance metrics) and the deserialized scikit-learn model accessible via the attribute `ml_model`. Examples -------- >>> workflow = Gam_Ai_Workflow("cu-nanocomposites-poisson-ratio-lr") βœ… Loaded model 'cu-nanocomposites-poisson-ratio-lr' (train/test) successfully. >>> gam_model = workflow.get_GAM_AI_MODEL() >>> type(gam_model) <class 'PyGamLab.ai_core.gam_ai.GAM_AI_MODEL'> >>> gam_model.summary() πŸ“˜ MODEL METADATA SUMMARY model_name: cu-nanocomposites-poisson-ratio-lr author_name: Shaoyu Zhao, Yingyan Zhang, Yihe Zhang et al. best_accuracy: {'MAE': 0.0541, 'MSE': 0.0042, 'R2': 0.39} βš™οΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'> Notes ----- This method serves as a safe accessor for the underlying `GAM_AI_MODEL` instance (`self.gam`) loaded during `Gam_Ai_Workflow` initialization. It can be used to directly inspect model metadata, retrieve the raw ML model, or perform low-level analysis without invoking higher-level workflow methods. """ return self.gam
import os, json, io, base64, joblib class GAM_AI_MODEL: """ A unified data structure for loading and representing `.gam_ai` model packages. The `.gam_ai` format encapsulates both the machine learning model (serialized via `joblib` and encoded in Base64) and its accompanying metadata. This class provides a standardized interface for deserializing, inspecting, and utilizing such packaged models within the PyGamLab AI ecosystem. The class dynamically attaches metadata fields (e.g., `author_name`, `model_type`, `best_accuracy`, etc.) as instance attributes and exposes the trained ML model under the attribute `ml_model`. Parameters ---------- model_name : str The name of the model file (without the `.gam_ai` extension) to be loaded. base_dir : str, optional The base directory containing `.gam_ai` model files. If not provided, the class automatically defaults to the `gam_models` directory located alongside this module file. Attributes ---------- model_name : str The identifier of the loaded `.gam_ai` model. file_path : str Full path to the `.gam_ai` file on disk. ml_model : object The deserialized scikit-learn model instance (e.g., LinearRegression, RandomForestRegressor, etc.). <dynamic_metadata_fields> : Any All key-value pairs from the `"metadata"` section of the `.gam_ai` file are dynamically added as attributes (e.g., `author_name`, `description`, `best_accuracy`, etc.). Raises ------ FileNotFoundError If the specified `.gam_ai` file cannot be found at the resolved `file_path`. Examples -------- >>> from PyGamLab.ai_core.gam_ai import GAM_AI_MODEL >>> model = GAM_AI_MODEL("cu-nanocomposites-poisson-ratio-lr") βœ… Loaded GAM_AI_MODEL: 'cu-nanocomposites-poisson-ratio-lr' >>> model.summary() πŸ“˜ MODEL METADATA SUMMARY model_name: cu-nanocomposites-poisson-ratio-lr model_type: train/test author_name: Shaoyu Zhao, Yingyan Zhang, Yihe Zhang et al. best_accuracy: {'MAE': 0.0541, 'MSE': 0.0042, ...} βš™οΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'> Notes ----- - The `.gam_ai` file format is designed to preserve reproducibility of ML experiments. - Metadata is stored in JSON format, while the ML model itself is serialized using `joblib` and encoded with Base64 for portability. - This class does not train or evaluate models; it only loads and interprets pre-trained model artifacts. See Also -------- Gam_Ai_Workflow : High-level workflow manager that builds upon this class to provide evaluation, visualization, and retraining utilities. """ def __init__(self, model_name, base_dir=None): self.model_name = model_name # Default directory if none provided if base_dir is None: base_dir = os.path.join(os.path.dirname(__file__), "gam_models") self.file_path = os.path.join(base_dir, f"{model_name}.gam_ai") if not os.path.exists(self.file_path): raise FileNotFoundError(f"❌ File not found: {self.file_path}") # --- Load JSON file --- with open(self.file_path, "r") as f: data = json.load(f) metadata = data.get("metadata", {}) model_data = data.get("model_data", None) # --- Assign metadata attributes dynamically --- for key, value in metadata.items(): setattr(self, key, value) # --- Decode Base64 model --- if model_data: model_bytes = base64.b64decode(model_data) buffer = io.BytesIO(model_bytes) self.ml_model = joblib.load(buffer) else: self.ml_model = None print(f"βœ… Loaded GAM_AI_MODEL: '{self.model_name}'") def summary(self): """ Print a structured overview of all model metadata. This method displays the dynamically loaded metadata attributes (e.g., author information, accuracy metrics, DOI, etc.) and the associated machine learning model type. Examples -------- >>> model.summary() πŸ“˜ MODEL METADATA SUMMARY model_name: cu-nanocomposites-poisson-ratio-lr model_type: train/test author_name: Shaoyu Zhao best_accuracy: {'MAE': 0.054, 'MSE': 0.0042} βš™οΈ ML Model: <class 'sklearn.linear_model._base.LinearRegression'> """ print("\nπŸ“˜ MODEL METADATA SUMMARY\n") for k, v in self.__dict__.items(): if k not in ("ml_model", "file_path"): print(f"{k}: {v}") print("\nβš™οΈ ML Model:", type(self.ml_model))
[docs] class GamAI_io: """ A unified input/output handler for saving and loading `.gam_ai` model packages. This class encapsulates both machine learning model serialization and relevant metadata (e.g., author info, model parameters, training details) into a single portable `.gam_ai` file. It enables seamless model deployment, archival, and reproducibility by combining the binary model object and human-readable metadata in one structured JSON container. Parameters ---------- model_name : str, optional A short, descriptive name for the model. Used as the filename during saving. model_type : str, optional The algorithmic or architectural family of the model (e.g., "RandomForest", "XGBoost", "CNN", "Transformer"). description : str, optional, default="" A brief summary describing the model’s purpose, training dataset, or key features. author_name : str, optional, default="" Full name of the model creator. author_email : str, optional, default="" Contact email for correspondence or citation. trainer_name : str, optional, default="" The individual or system responsible for model training. best_accuracy : float, optional The highest validation or test accuracy achieved during training. doi : str, optional Digital Object Identifier (DOI) associated with the published model or dataset. hyperparam_range : dict, optional, default={} Dictionary defining hyperparameter search ranges used during optimization. best_params : dict, optional, default={} Dictionary of the final optimized hyperparameter values. ml_model : object, optional The trained machine learning model instance (e.g., scikit-learn estimator). Notes ----- The `.gam_ai` file format consists of a JSON object with two main sections: - **metadata**: Contains descriptive fields such as author, model type, and hyperparameters. - **model_data**: Contains the Base64-encoded binary serialization of the trained model, produced via `joblib`. This approach ensures full portability and JSON readability, enabling both programmatic and manual inspection of model metadata. Examples -------- >>> from gamai_io import GamAI_io >>> from sklearn.ensemble import RandomForestClassifier >>> model = RandomForestClassifier(n_estimators=100, random_state=42) >>> model.fit(X_train, y_train) >>> >>> package = GamAI_io( ... model_name="rf_classifier_v1", ... model_type="RandomForest", ... description="Predicts material phases using compositional data", ... author_name="John Doe", ... author_email="john.doe@example.com", ... best_accuracy=0.94, ... hyperparam_range={"n_estimators": [50, 100, 200]}, ... best_params={"n_estimators": 100, "max_depth": 10}, ... ml_model=model ... ) >>> >>> # Save to file >>> package.save(save_dir="models") πŸ’Ύ Saved ml_model package: models/rf_classifier_v1.gam_ai >>> >>> # Load from file >>> loaded_package = GamAI_io.load("models/rf_classifier_v1.gam_ai") >>> restored_model = loaded_package.ml_model >>> restored_model.predict(X_test[:5]) See Also -------- joblib.dump : Efficient serialization of Python objects. json : Standard JSON encoder/decoder. base64 : Encoding binary model data for safe JSON storage. """ def __init__(self, **kwargs): self.model_name = kwargs.get("model_name") self.model_type = kwargs.get("model_type") self.description = kwargs.get("description", "") self.author_name = kwargs.get("author_name", "") self.author_email = kwargs.get("author_email", "") self.trainer_name = kwargs.get("trainer_name", "") self.best_accuracy = kwargs.get("best_accuracy", None) self.doi=kwargs.get("doi", None) self.hyperparam_range = kwargs.get("hyperparam_range", {}) self.best_params = kwargs.get("best_params", {}) self.ml_model = kwargs.get("ml_model", None) # ----------- Save ----------
[docs] def save(self, save_dir="models"): """ Serialize and save the current model and metadata as a `.gam_ai` package. Parameters ---------- save_dir : str, optional, default="models" The target directory to save the `.gam_ai` file. The directory will be created if it does not exist. Raises ------ ValueError If no model (`ml_model`) is attached to the current instance. Notes ----- The model is serialized via `joblib` and encoded with Base64 to ensure JSON compatibility. The resulting file can be safely shared or uploaded to repositories without binary corruption. """ os.makedirs(save_dir, exist_ok=True) filename = f"{self.model_name}.gam_ai" filepath = os.path.join(save_dir, filename) if self.ml_model is None: raise ValueError("No ml_model attached to save inside .gam_ai.") buffer = io.BytesIO() joblib.dump(self.ml_model, buffer) buffer.seek(0) encoded_model = base64.b64encode(buffer.read()).decode("utf-8") data = { "metadata": {k: v for k, v in self.__dict__.items() if k != "ml_model"}, "model_data": encoded_model } with open(filepath, "w") as f: json.dump(data, f, indent=4) print(f"πŸ’Ύ Saved ml_model package: {filepath}")
# ----------- Load ----------
[docs] @staticmethod def load(filepath): """ Load a `.gam_ai` model package from disk. Parameters ---------- filepath : str Full path to the `.gam_ai` file to be loaded. Returns ------- GamAI_io A `GamAI_io` instance containing both metadata and the deserialized machine learning model (`ml_model`). Notes ----- The loading process reverses the Base64 encoding and `joblib` serialization to reconstruct the original model object. """ with open(filepath, "r") as f: data = json.load(f) metadata = data["metadata"] model_data = base64.b64decode(data["model_data"]) buffer = io.BytesIO(model_data) model = joblib.load(buffer) return GamAI_io(**metadata, model=model)