Source code for mlrun.model_monitoring.applications.context

# Copyright 2024 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import typing

import numpy as np
import pandas as pd

import mlrun.common.helpers
import mlrun.common.model_monitoring.helpers
import mlrun.common.schemas.model_monitoring.constants as mm_constants
import mlrun.feature_store as fstore
from mlrun.artifacts.model import ModelArtifact, get_model
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
from mlrun.execution import MLClientCtx
from mlrun.model_monitoring.helpers import (
    calculate_inputs_statistics,
    get_endpoint_record,
)
from mlrun.model_monitoring.model_endpoint import ModelEndpoint


[docs]class MonitoringApplicationContext(MLClientCtx): """ The monitoring context holds all the relevant information for the monitoring application, and also it can be used for logging artifacts and results. The monitoring context has the following attributes: :param application_name: (str) the app name :param sample_df_stats: (FeatureStats) The new sample distribution dictionary. :param feature_stats: (FeatureStats) The train sample distribution dictionary. :param sample_df: (pd.DataFrame) The new sample DataFrame. :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule. :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule. :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id. :param endpoint_id: (str) ID of the monitored model endpoint :param output_stream_uri: (str) URI of the output stream for results :param model_endpoint: (ModelEndpoint) The model endpoint object. :param feature_names: (list[str]) List of models feature names. :param label_names: (list[str]) List of models label names. :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object, and list of """ def __init__(self, **kwargs): super().__init__(**kwargs) def __post_init__(self): self.application_name: typing.Optional[str] = None self.start_infer_time: typing.Optional[pd.Timestamp] = None self.end_infer_time: typing.Optional[pd.Timestamp] = None self.latest_request: typing.Optional[pd.Timestamp] = None self.endpoint_id: typing.Optional[str] = None self.output_stream_uri: typing.Optional[str] = None self._sample_df: typing.Optional[pd.DataFrame] = None self._model_endpoint: typing.Optional[ModelEndpoint] = None self._feature_stats: typing.Optional[FeatureStats] = None self._sample_df_stats: typing.Optional[FeatureStats] = None
[docs] @classmethod def from_dict( cls, attrs: dict, context=None, model_endpoint_dict=None, **kwargs, ) -> "MonitoringApplicationContext": """ Create an instance of the MonitoringApplicationContext from a dictionary. :param attrs: The instance data dictionary. :param context: The current application context. :param model_endpoint_dict: Dictionary of model endpoints. """ if not context: self = ( super().from_dict( attrs=attrs.get(mm_constants.ApplicationEvent.MLRUN_CONTEXT, {}), **kwargs, ), ) else: self = context self.__post_init__() self.start_infer_time = pd.Timestamp( attrs.get(mm_constants.ApplicationEvent.START_INFER_TIME) ) self.end_infer_time = pd.Timestamp( attrs.get(mm_constants.ApplicationEvent.END_INFER_TIME) ) self.latest_request = pd.Timestamp( attrs.get(mm_constants.ApplicationEvent.LAST_REQUEST) ) self.application_name = attrs.get( mm_constants.ApplicationEvent.APPLICATION_NAME ) self._feature_stats = json.loads( attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}") ) self._sample_df_stats = json.loads( attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}") ) self.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID) self._model_endpoint = model_endpoint_dict.get(self.endpoint_id) return self
@property def sample_df(self) -> pd.DataFrame: if not hasattr(self, "_sample_df") or self._sample_df is None: feature_set = fstore.get_feature_set( self.model_endpoint.status.monitoring_feature_set_uri ) features = [f"{feature_set.metadata.name}.*"] vector = fstore.FeatureVector( name=f"{self.endpoint_id}_vector", features=features, with_indexes=True, ) vector.metadata.tag = self.application_name vector.feature_set_objects = {feature_set.metadata.name: feature_set} offline_response = vector.get_offline_features( start_time=self.start_infer_time, end_time=self.end_infer_time, timestamp_for_filtering=mm_constants.FeatureSetFeatures.time_stamp(), ) self._sample_df = offline_response.to_dataframe().reset_index(drop=True) return self._sample_df @property def model_endpoint(self) -> ModelEndpoint: if not hasattr(self, "_model_endpoint") or not self._model_endpoint: self._model_endpoint = ModelEndpoint.from_flat_dict( get_endpoint_record(self.project, self.endpoint_id) ) return self._model_endpoint @property def feature_stats(self) -> FeatureStats: if not hasattr(self, "_feature_stats") or not self._feature_stats: self._feature_stats = json.loads(self.model_endpoint.status.feature_stats) pad_features_hist(self._feature_stats) return self._feature_stats @property def sample_df_stats(self) -> FeatureStats: """statistics of the sample dataframe""" if not hasattr(self, "_sample_df_stats") or not self._sample_df_stats: self._sample_df_stats = calculate_inputs_statistics( self.feature_stats, self.sample_df ) return self._sample_df_stats @property def feature_names(self) -> list[str]: """The feature names of the model""" feature_names = self.model_endpoint.spec.feature_names return ( feature_names if isinstance(feature_names, list) else json.loads(feature_names) ) @property def label_names(self) -> list[str]: """The label names of the model""" label_names = self.model_endpoint.spec.label_names return label_names if isinstance(label_names, list) else json.loads(label_names) @property def model(self) -> tuple[str, ModelArtifact, dict]: """return model file, model spec object, and list of extra data items""" return get_model(self.model_endpoint.spec.model_uri)
[docs] @staticmethod def dict_to_histogram( histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats, ) -> pd.DataFrame: """ Convert histogram dictionary to pandas DataFrame with feature histograms as columns :param histogram_dict: Histogram dictionary :returns: Histogram dataframe """ # Create a dictionary with feature histograms as values histograms = {} for feature, stats in histogram_dict.items(): if "hist" in stats: # Normalize to probability distribution of each feature histograms[feature] = np.array(stats["hist"][0]) / stats["count"] # Convert the dictionary to pandas DataFrame histograms = pd.DataFrame(histograms) return histograms