import { columnFlagIcons } from "@/domains/monitoring";
import {
  ChunkingInfo,
  ColumnType,
  IdentifierColumn,
  ModelPredictions,
  ModelPredictedProbability,
  ReferenceAndAnalysisInfo,
  SourceName,
  SourcesInfo,
  Target,
  Timestamp,
  ApiToken,
  KeyPerformanceMetric,
} from "@/formatters/informationModal/informationModalContents";

export type InformationModalData = {
  name: string;
  content: string;
  readMoreLink?: string;
};

export const informationModalCatalog = [
  {
    name: "Chunking",
    content: ChunkingInfo,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/chunking.html",
  },
  {
    name: "Sources",
    content: SourcesInfo,
  },
  {
    name: "Reference and Analysis Data",
    content: ReferenceAndAnalysisInfo,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#data-periods",
  },
  {
    name: "Source Name",
    content: SourceName,
  },
  {
    name: "Model prediction",
    content: ModelPredictions,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#prediction-class-labels",
  },
  {
    name: "Model predicted probability",
    content: ModelPredictedProbability,
    readMoreLink:
      "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#predicted-class-probabilities",
  },
  {
    name: "Timestamp",
    content: Timestamp,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#timestamp",
  },
  {
    name: "Identifier column",
    content: IdentifierColumn,
  },
  {
    name: "Target",
    content: Target,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#target",
  },
  {
    name: "Column type",
    content: ColumnType,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#columns",
  },
  {
    name: "Column flags",
    content: `Columns can have flags to indicate special properties. For example, the 'Segment by' flag is used to indicate that a column should be used for segmentation.`,
  },
  {
    name: "Prediction class name",
    content: `The value that represents a prediction related to this score.

    For example, if your score represent the probability of the target value being the class "prepaid", then use this field to specify the value "prepaid".

    You need to do this because the value that represents your class may be different from the name of the column that contains the score for that class.`,
    readMoreLink:
      "https://nannyml.readthedocs.io/en/stable/tutorials/performance_estimation/multiclass_performance_estimation.html",
  },
  {
    name: "Reference dataset",
    content: `
    Required.

    ${ReferenceAndAnalysisInfo}
    `,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#reference-period",
  },
  {
    name: "Analysis dataset",
    content: `
    Required.

    ${ReferenceAndAnalysisInfo}
    `,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#reference-period",
  },
  {
    name: "Target dataset",
    content: `
    Optional.

    NannyML cloud allows you to have a separate dataset for your target values.

    This is useful for cases when the ground truth is delayed, so you can provide estimate the performance of your available data, and then update it when the ground truth when available.
    `,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/tutorials/data_requirements.html#reference-period",
  },
  {
    name: "API Token",
    content: ApiToken,
  },
  {
    name: "SMTP TLS",
    content: `
      When disabled, NannyML Cloud will establish an unencrypted connection and upgrade to an encrypted connection if
      the SMTP server advertises support for STARTTLS.

      When enabled, NannyML Cloud will encrypt all traffic with the SMTP server. This type of connection should
      generally be used when supported.
    `,
  },
  {
    name: "Webhook configuration",
    content: `
    To set up a custom webhook connection you'll need to provide at least the address of the webhook.

    In the address field provide the full URL of the endpoint that will receive the POST request issued by NannyML.

    Use the additional headers control to provide any key-value pairs you want to include in the POST headers, e.g. for authentication.

    The webhook test will perform a POST request with a dummy payload to the configured webhook endpoint. If the response from the webhook is
    a status code of 200, the webhook is deemed valid.
    `,
  },
  {
    name: "Performance analysis",
    content: `
      NannyML Cloud offers a performance calculator and multiple estimators. Performance calculation requires targets to provide actual performance metrics. The estimators on the other hand predict model performance using only input features and model outputs.

      For metrics that require thresholding model probability scores, we don't use thresholds but instead use the model predictions, i.e. predictions that are thresholded by the user.
    `,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/how_it_works/performance_estimation.html",
  },
  {
    name: "Concept drift",
    content: `
      The concept drift algorithm detects a change in the relationship between the input features and the target variable.

      It can only be used if targets are available for the analysis period.
    `,
  },
  {
    name: "Thresholds",
    content: `
      Thresholds are used to evaluate whether metrics are within expected range. If a metric is outside the expected range, an alert is triggered.

      Two types of thresholds are supported:

      - Constant threshold: a predefined constant lower and upper limit.
      - Standard deviation threshold: the mean and standard deviation of the metric are calculated from the reference period. The lower and upper limits are calculated as the mean plus or minus a multiple of the standard deviation.
    `,
    readMoreLink: "https://nannyml.readthedocs.io/en/stable/how_it_works/thresholds.html",
  },
  {
    name: "Azure Blob authentication mode",
    content: `
      Azure Blob Storage provides multiple ways of authenticating:

      - Anonymous: Do not use any authentication. This will only work if the container is public.
      - Access key: Use an access key to authenticate.
      - Managed identity: Use the managed identity associated with the NannyML Cloud application to authenticate.
      - SAS token: Use a shared access signature token to authenticate.

      The managed identity is the most secure and recommended way of authenticating. This requires the NannyML Cloud application to have the necessary permissions to access the storage account.
    `,
    readMoreLink:
      "https://nannyml.gitbook.io/cloud/deployment/azure/azure-managed-application/enabling-access-to-storage",
  },
  {
    name: "S3 authentication mode",
    content: `
      Amazon S3 provides multiple ways of authenticating:

      - Access key: Use your personal access key to authenticate.
      - Anonymous: Do not use any authentication. This will only work if the bucket is public.
      - IAM role: Use the IAM role associated with the NannyML Cloud application to authenticate.

      The IAM role is the most secure and recommended way of authenticating. This requires the NannyML Cloud application to have the necessary permissions to access the bucket.
    `,
    readMoreLink: "https://nannyml.gitbook.io/cloud/deployment/aws#granting-access-to-s3",
  },
  {
    name: "Schema",
    content: `
      The schema determines how NannyML Cloud interprets columns for analysis. Depending on the problem type for the model some columns are required, while others are completely optional.
      ${ColumnType}
    `,
  },
  {
    name: "Run status",
    content: `
      NannyML algorithms are executed as an asynchronous process. The run status indicates the execution state of a group of algorithms, referred to as a 'run'.
    `,
  },
  {
    name: "Enable metric",
    content: `
      When a metric is enabled, it is included in the analysis. When disabled, the metric will not be calculated and excluded from analysis.
    `,
  },
  {
    name: "Key performance metric",
    content: KeyPerformanceMetric,
  },
  {
    name: "Key performance metric selection",
    content: `
      ${KeyPerformanceMetric}

      Most performance metrics require predictions to be provided, so you would need a column in your dataset that contains the model's predictions. However, NannyML Cloud supports calculating 'ROC AUC' and 'average precision' metrics for binary classification problems without providing predictions. These metrics may be a better choice if you don't have predictions available.
    `,
  },
  {
    name: "Region of practical equivalence (ROPE)",
    content: `
      The Region Of Practical Equivalence (ROPE) is the range where the evaluation hypothesis should be accepted, e.g. model performance is no worse than the reference performance.

      NannyML Cloud uses the Highest Density Interval (HDI) of the evaluated metric to compare against the ROPE:

      - If the HDI is within the ROPE, the hypothesis is accepted.
      - If the HDI falls completely outside the ROPE, the hypothesis is rejected.
      - If the two overlap, the hypothesis is inconclusive and more data is required to reach a conclusion.

      Note NannyML Cloud also enforces a minimum HDI width before making a conclusion.
    `,
  },
  {
    name: "Required 95% HDI width",
    content: `
      The Highest Density Interval (HDI) is a range of values that contains a certain percentage of the probability distribution of a metric.

      NannyML Cloud uses the 95% HDI of a metric to compare against the ROPE to evaluate the evaluation hypothesis. To avoid concluding too early, NannyML Cloud enforces a minimum HDI width before making a conclusion.

      In practical terms, the width of the 95% HDI should be lower than the specified width before making an evaluation.
    `,
  },
  {
    name: "Key experiment metric",
    content: `
      The Key Experiment Metric (KEM) is the primary metric used to evaluate the experiment.

      NannyML Cloud displays the KEM in a central location while other metrics are considered secondary and are displayed in a separate section.
    `,
  },
  {
    name: "Treatment - Control",
    content: `
      Experiment metrics are evaluated by comparing the treatment group to the control group.

      To perform numerical analysis, NannyML Cloud calculates the difference between the treatment and control groups.
      The difference is then compared to the Region Of Practical Equivalence (ROPE) to evaluate the experiment.
    `,
  },
  {
    name: "File format",
    content:
      "We strongly recommend using parquet files because CSV files do not contain data type information. Using CSV files may result in incorrectly inferred data types.",
  },
  {
    name: "Missing targets",
    content: `We didn't find a target column in your monitored data. You can provide a separate target dataset for use cases with delayed ground truth.

      Please note that an identifier column must be present to match targets with rows in the monitored data.`,
  },
  {
    name: "Normalization",
    content: `When normalization is enabled we'll express the result as a fraction of the total. This is useful when you want to compare the performance of different models or datasets.`,
  },
  {
    name: "Segmentation",
    content: `Segmentation allows you to split your data into groups and analyze them separately. Each segmentation column provides a separate group of segments that are not combined.

    For example having 'gender' and 'region' as segmentation columns might result in 'gender: male', 'gender: female' and 'region: US' segments but there won't be a 'female-US' segment. If you want to analyze combined segments, you should create a new column that combines the segments you want to analyze together.`,
  },
  {
    name: "Metric calculation function",
    content:
      "Function to calculate the metric value per chunk. It should be provided as a valid Python function called `calculate` which returns the aggregated result for the chunk.",
  },
  {
    name: "Metric estimation function",
    content:
      "Function to estimate the metric value per chunk. It should be provided as a valid Python function called `estimate` which returns the aggregated result for the chunk.",
  },
  {
    name: "Metric loss function",
    content:
      "Function to calculate the loss per observation. It should be provided as a valid Python function called `loss` which returns the loss per observation as a numpy array.",
  },
  {
    name: "Metric aggregation function",
    content:
      "Function to aggregate the loss per chunk. It should be provided as a valid Python function called `aggregate` which returns the aggregated result for the chunk.",
  },
  {
    name: "Metric limits",
    content:
      "Value range the metric cannot exceed, e.g. for ROC AUC the range is 0 - 1. NannyML Cloud uses this to constrain thresholds. When not set thresholds may grow too large and distort plot scale.",
  },
] as const satisfies readonly InformationModalData[];

export type InformationModalNames = (typeof informationModalCatalog)[number]["name"];
