forked from github-starred/komodo
alert refactor
This commit is contained in:
@@ -13,7 +13,7 @@ use aws_sdk_ec2::{
|
||||
Client,
|
||||
};
|
||||
use monitor_client::entities::{
|
||||
alert::{Alert, AlertData, AlertDataVariant},
|
||||
alert::{Alert, AlertData},
|
||||
monitor_timestamp,
|
||||
server::stats::SeverityLevel,
|
||||
server_template::aws::AwsServerTemplateConfig,
|
||||
@@ -172,7 +172,6 @@ pub async fn terminate_ec2_instance_with_retry(
|
||||
resolved: false,
|
||||
level: SeverityLevel::Critical,
|
||||
target: ResourceTarget::system(),
|
||||
variant: AlertDataVariant::AwsBuilderTerminationFailed,
|
||||
data: AlertData::AwsBuilderTerminationFailed {
|
||||
instance_id: instance_id.to_string(),
|
||||
message: format!("{e:#}"),
|
||||
|
||||
@@ -49,6 +49,7 @@ impl HetznerClient {
|
||||
self.post("/servers", body).await
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub async fn delete_server(
|
||||
&self,
|
||||
id: i64,
|
||||
@@ -120,6 +121,7 @@ impl HetznerClient {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
async fn delete<Res: DeserializeOwned>(
|
||||
&self,
|
||||
path: &str,
|
||||
|
||||
@@ -5,15 +5,9 @@ use std::{
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use futures::future::join_all;
|
||||
use monitor_client::entities::{
|
||||
alert::{Alert, AlertData, AlertDataVariant},
|
||||
monitor_timestamp,
|
||||
server::stats::SeverityLevel,
|
||||
server_template::hetzner::{
|
||||
HetznerDatacenter, HetznerServerTemplateConfig,
|
||||
HetznerServerType, HetznerVolumeFormat,
|
||||
},
|
||||
update::ResourceTarget,
|
||||
use monitor_client::entities::server_template::hetzner::{
|
||||
HetznerDatacenter, HetznerServerTemplateConfig, HetznerServerType,
|
||||
HetznerVolumeFormat,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
@@ -22,15 +16,9 @@ use crate::{
|
||||
create_volume::CreateVolumeBody,
|
||||
},
|
||||
config::core_config,
|
||||
helpers::alert::send_alerts,
|
||||
};
|
||||
|
||||
use self::{
|
||||
client::HetznerClient,
|
||||
common::{
|
||||
HetznerAction, HetznerActionResponse, HetznerVolumeStatus,
|
||||
},
|
||||
};
|
||||
use self::{client::HetznerClient, common::HetznerVolumeStatus};
|
||||
|
||||
mod client;
|
||||
mod common;
|
||||
@@ -195,60 +183,6 @@ pub async fn launch_hetzner_server(
|
||||
))
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MAX_TERMINATION_TRIES: usize = 5;
|
||||
#[allow(unused)]
|
||||
const TERMINATION_WAIT_SECS: u64 = 15;
|
||||
|
||||
#[allow(unused)]
|
||||
pub async fn terminate_hetzner_server_with_retry(
|
||||
id: i64,
|
||||
) -> anyhow::Result<()> {
|
||||
let hetzner =
|
||||
*hetzner().as_ref().context("Hetzner token not configured")?;
|
||||
|
||||
for i in 0..MAX_TERMINATION_TRIES {
|
||||
let message = match hetzner.delete_server(id).await {
|
||||
Ok(HetznerActionResponse {
|
||||
action: HetznerAction { error: None, .. },
|
||||
}) => return Ok(()),
|
||||
Ok(HetznerActionResponse {
|
||||
action: HetznerAction { error: Some(e), .. },
|
||||
}) => (i == MAX_TERMINATION_TRIES - 1).then(|| {
|
||||
format!(
|
||||
"failed to terminate instance | code: {} | {}",
|
||||
e.code, e.message
|
||||
)
|
||||
}),
|
||||
Err(e) => {
|
||||
(i == MAX_TERMINATION_TRIES - 1).then(|| format!("{e:#}"))
|
||||
}
|
||||
};
|
||||
if let Some(message) = message {
|
||||
error!("failed to terminate hetzner server {id} | {message}");
|
||||
let alert = Alert {
|
||||
id: Default::default(),
|
||||
ts: monitor_timestamp(),
|
||||
resolved: false,
|
||||
level: SeverityLevel::Critical,
|
||||
target: ResourceTarget::system(),
|
||||
variant: AlertDataVariant::HetznerBuilderTerminationFailed,
|
||||
data: AlertData::HetznerBuilderTerminationFailed {
|
||||
server_id: id,
|
||||
message: message.clone(),
|
||||
},
|
||||
resolved_ts: None,
|
||||
};
|
||||
send_alerts(&[alert]).await;
|
||||
return Err(anyhow::Error::msg(message));
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(TERMINATION_WAIT_SECS))
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn hetzner_format(
|
||||
format: HetznerVolumeFormat,
|
||||
) -> common::HetznerVolumeFormat {
|
||||
|
||||
@@ -246,21 +246,6 @@ async fn send_slack_alert(
|
||||
];
|
||||
(text, blocks.into())
|
||||
}
|
||||
AlertData::HetznerBuilderTerminationFailed {
|
||||
server_id,
|
||||
message,
|
||||
} => {
|
||||
let text = format!(
|
||||
"{level} | Failed to terminated Hetzner builder instance"
|
||||
);
|
||||
let blocks = vec![
|
||||
Block::header(text.clone()),
|
||||
Block::section(format!(
|
||||
"server id: **{server_id}**\n{message}"
|
||||
)),
|
||||
];
|
||||
(text, blocks.into())
|
||||
}
|
||||
AlertData::None {} => Default::default(),
|
||||
};
|
||||
if !text.is_empty() {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use monitor_client::entities::{
|
||||
alert::{Alert, AlertData, AlertDataVariant},
|
||||
alert::{Alert, AlertData},
|
||||
deployment::Deployment,
|
||||
server::stats::SeverityLevel,
|
||||
update::ResourceTarget,
|
||||
@@ -46,7 +46,6 @@ pub async fn alert_deployments(
|
||||
let alert = Alert {
|
||||
id: Default::default(),
|
||||
level: SeverityLevel::Warning,
|
||||
variant: AlertDataVariant::ContainerStateChange,
|
||||
resolved: true,
|
||||
resolved_ts: ts.into(),
|
||||
target,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::{collections::HashMap, path::PathBuf, str::FromStr};
|
||||
|
||||
use anyhow::Context;
|
||||
use derive_variants::ExtractVariant;
|
||||
use mongo_indexed::Indexed;
|
||||
use monitor_client::entities::{
|
||||
alert::{Alert, AlertData, AlertDataVariant},
|
||||
@@ -66,7 +67,6 @@ pub async fn alert_servers(
|
||||
resolved_ts: None,
|
||||
level: SeverityLevel::Critical,
|
||||
target: ResourceTarget::Server(server_status.id.clone()),
|
||||
variant: AlertDataVariant::ServerUnreachable,
|
||||
data: AlertData::ServerUnreachable {
|
||||
id: server_status.id.clone(),
|
||||
name: server.name.clone(),
|
||||
@@ -132,7 +132,6 @@ pub async fn alert_servers(
|
||||
resolved_ts: None,
|
||||
level: health.cpu,
|
||||
target: ResourceTarget::Server(server_status.id.clone()),
|
||||
variant: AlertDataVariant::ServerCpu,
|
||||
data: AlertData::ServerCpu {
|
||||
id: server_status.id.clone(),
|
||||
name: server.name.clone(),
|
||||
@@ -188,7 +187,6 @@ pub async fn alert_servers(
|
||||
resolved_ts: None,
|
||||
level: health.cpu,
|
||||
target: ResourceTarget::Server(server_status.id.clone()),
|
||||
variant: AlertDataVariant::ServerMem,
|
||||
data: AlertData::ServerMem {
|
||||
id: server_status.id.clone(),
|
||||
name: server.name.clone(),
|
||||
@@ -260,7 +258,6 @@ pub async fn alert_servers(
|
||||
resolved_ts: None,
|
||||
level: *health,
|
||||
target: ResourceTarget::Server(server_status.id.clone()),
|
||||
variant: AlertDataVariant::ServerDisk,
|
||||
data: AlertData::ServerDisk {
|
||||
id: server_status.id.clone(),
|
||||
name: server.name.clone(),
|
||||
@@ -492,7 +489,7 @@ async fn get_open_alerts(
|
||||
}
|
||||
_ => {
|
||||
let inner = map.entry(alert.target.clone()).or_default();
|
||||
inner.insert(alert.variant, alert);
|
||||
inner.insert(alert.data.extract_variant(), alert);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,9 +48,6 @@ pub struct Alert {
|
||||
/// The target of the alert
|
||||
pub target: ResourceTarget,
|
||||
|
||||
/// The type of alert, eg ServerUnreachable, ServerMem, ContainerStateChange
|
||||
pub variant: AlertDataVariant,
|
||||
|
||||
/// The data attached to the alert
|
||||
pub data: AlertData,
|
||||
|
||||
@@ -151,14 +148,6 @@ pub enum AlertData {
|
||||
message: String,
|
||||
},
|
||||
|
||||
/// A Hetzner builder failed to terminate.
|
||||
HetznerBuilderTerminationFailed {
|
||||
/// The id of the server which failed to terminate
|
||||
server_id: I64,
|
||||
/// A reason for the failure
|
||||
message: String,
|
||||
},
|
||||
|
||||
None {},
|
||||
}
|
||||
|
||||
|
||||
@@ -182,13 +182,6 @@ export type AlertData =
|
||||
instance_id: string;
|
||||
/** A reason for the failure */
|
||||
message: string;
|
||||
}}
|
||||
/** A Hetzner builder failed to terminate. */
|
||||
| { type: "HetznerBuilderTerminationFailed", data: {
|
||||
/** The id of the server which failed to terminate */
|
||||
server_id: I64;
|
||||
/** A reason for the failure */
|
||||
message: string;
|
||||
}}
|
||||
| { type: "None", data: {
|
||||
}};
|
||||
@@ -209,8 +202,6 @@ export interface Alert {
|
||||
level: SeverityLevel;
|
||||
/** The target of the alert */
|
||||
target: ResourceTarget;
|
||||
/** The type of alert, eg ServerUnreachable, ServerMem, ContainerStateChange */
|
||||
variant: AlertData["type"];
|
||||
/** The data attached to the alert */
|
||||
data: AlertData;
|
||||
/** The timestamp of alert resolution */
|
||||
|
||||
@@ -55,7 +55,7 @@ export const AlertsTable = ({
|
||||
},
|
||||
{
|
||||
header: "Alert Type",
|
||||
accessorKey: "variant",
|
||||
accessorKey: "data.type",
|
||||
},
|
||||
]}
|
||||
/>
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
// import { Config } from "@components/config";
|
||||
// import { useRead, useWrite } from "@lib/hooks";
|
||||
// import { Types } from "@monitor/client";
|
||||
// import {
|
||||
// Select,
|
||||
// SelectContent,
|
||||
// SelectItem,
|
||||
// SelectTrigger,
|
||||
// SelectValue,
|
||||
// } from "@ui/select";
|
||||
// import { useEffect, useState } from "react";
|
||||
// import { Config } from "@components/config";
|
||||
// import { useRead, useWrite } from "@lib/hooks";
|
||||
// import { Types } from "@monitor/client";
|
||||
// import {
|
||||
// Select,
|
||||
// SelectContent,
|
||||
// SelectItem,
|
||||
// SelectTrigger,
|
||||
// SelectValue,
|
||||
// } from "@ui/select";
|
||||
// import { useEffect, useState } from "react";
|
||||
|
||||
// export const AlerterConfig = ({ id }: { id: string }) => {
|
||||
export const AlerterConfig = () => {
|
||||
// const perms = useRead("GetPermissionLevel", {
|
||||
// target: { type: "Alerter", id },
|
||||
// }).data;
|
||||
// const config = useRead("GetAlerter", { alerter: id }).data?.config;
|
||||
// const global_disabled =
|
||||
// useRead("GetCoreInfo", {}).data?.ui_write_disabled ?? false;
|
||||
// const [type, setType] = useState<Types.AlerterConfig["type"]>();
|
||||
// useEffect(() => config?.type && setType(config.type), [config?.type]);
|
||||
// const [update, setConfig] = useState<
|
||||
// Partial<Types.SlackAlerterConfig | Types.CustomAlerterConfig>
|
||||
// >({});
|
||||
// const { mutateAsync } = useWrite("UpdateAlerter");
|
||||
// if (!config) return null;
|
||||
|
||||
// const disabled = global_disabled || perms !== Types.PermissionLevel.Write;
|
||||
|
||||
// return (
|
||||
// <Config
|
||||
// disabled={disabled}
|
||||
// config={config.params}
|
||||
// update={update}
|
||||
// set={setConfig}
|
||||
// onSave={async () => {
|
||||
// if (!type) return;
|
||||
// await mutateAsync({ id, config: { type, params: update } });
|
||||
// }}
|
||||
// components={{
|
||||
// general: [
|
||||
// {
|
||||
// label: "General",
|
||||
// components: {
|
||||
// url: true,
|
||||
// enabled: true,
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// }}
|
||||
// selector={
|
||||
// <div className="flex gap-2 items-center text-sm">
|
||||
// Alerter Type:
|
||||
// <Select
|
||||
// value={type}
|
||||
// onValueChange={(type) => {
|
||||
// setType(type as any);
|
||||
// setConfig({
|
||||
// url: update.url || "",
|
||||
// enabled: update.enabled === undefined ? true : update.enabled,
|
||||
// });
|
||||
// }}
|
||||
// disabled={disabled}
|
||||
// >
|
||||
// <SelectTrigger className="w-32 capitalize" disabled={disabled}>
|
||||
// <SelectValue />
|
||||
// </SelectTrigger>
|
||||
// <SelectContent className="w-32">
|
||||
// {["Slack", "Custom"].map((key) => (
|
||||
// <SelectItem value={key} key={key} className="capitalize">
|
||||
// {key}
|
||||
// </SelectItem>
|
||||
// ))}
|
||||
// </SelectContent>
|
||||
// </Select>
|
||||
// </div>
|
||||
// }
|
||||
// />
|
||||
// );
|
||||
return <></>
|
||||
};
|
||||
@@ -0,0 +1,74 @@
|
||||
import { ConfigItem } from "@components/config/util";
|
||||
import { Types } from "@monitor/client";
|
||||
import { Badge } from "@ui/badge";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
} from "@ui/select";
|
||||
import { MinusCircle } from "lucide-react";
|
||||
|
||||
const ALERT_TYPES: Types.AlertData["type"][] = [
|
||||
"ServerUnreachable",
|
||||
"ServerCpu",
|
||||
"ServerMem",
|
||||
"ServerDisk",
|
||||
"ContainerStateChange",
|
||||
"AwsBuilderTerminationFailed",
|
||||
];
|
||||
|
||||
export const AlertTypeConfig = ({
|
||||
alert_types,
|
||||
set,
|
||||
disabled,
|
||||
}: {
|
||||
alert_types: Types.AlertData["type"][];
|
||||
set: (alert_types: Types.AlertData["type"][]) => void;
|
||||
disabled: boolean;
|
||||
}) => {
|
||||
const at = ALERT_TYPES.filter(
|
||||
(alert_type) => !alert_types.includes(alert_type)
|
||||
);
|
||||
return (
|
||||
<ConfigItem label="Alert Types">
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex items-center gap-2">
|
||||
{alert_types.map((type) => (
|
||||
<Badge
|
||||
variant="secondary"
|
||||
className="text-sm flex items-center gap-2 cursor-pointer"
|
||||
onClick={() => {
|
||||
if (disabled) return;
|
||||
set(alert_types.filter((t) => t !== type));
|
||||
}}
|
||||
>
|
||||
{type}
|
||||
{!disabled && <MinusCircle className="w-3 h-3" />}
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
{at.length ? (
|
||||
<Select
|
||||
value={undefined}
|
||||
onValueChange={(type: Types.AlertData["type"]) => {
|
||||
set([...alert_types, type]);
|
||||
}}
|
||||
disabled={disabled}
|
||||
>
|
||||
<SelectTrigger className="w-[150px]">
|
||||
<div className="pr-2">Add Filter</div>
|
||||
</SelectTrigger>
|
||||
<SelectContent align="end">
|
||||
{at.map((alert_type) => (
|
||||
<SelectItem key={alert_type} value={alert_type}>
|
||||
{alert_type}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
) : undefined}
|
||||
</div>
|
||||
</ConfigItem>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,58 @@
|
||||
import { ConfigItem } from "@components/config/util";
|
||||
import { TextUpdateMenu } from "@components/util";
|
||||
import { Types } from "@monitor/client";
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@ui/select";
|
||||
|
||||
const ENDPOINT_TYPES: Types.AlerterEndpoint["type"][] = ["Custom", "Slack"];
|
||||
|
||||
export const EndpointConfig = ({
|
||||
endpoint,
|
||||
set,
|
||||
disabled,
|
||||
}: {
|
||||
endpoint: Types.AlerterEndpoint;
|
||||
set: (endpoint: Types.AlerterEndpoint) => void;
|
||||
disabled: boolean;
|
||||
}) => {
|
||||
return (
|
||||
<ConfigItem label="Endpoint">
|
||||
<div className="flex items-center gap-4">
|
||||
<Select
|
||||
value={endpoint.type}
|
||||
onValueChange={(type: Types.AlerterEndpoint["type"]) => {
|
||||
set({ type, params: { url: default_url(type) } });
|
||||
}}
|
||||
disabled={disabled}
|
||||
>
|
||||
<SelectTrigger className="w-[150px]" disabled={disabled}>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{ENDPOINT_TYPES.map((endpoint) => (
|
||||
<SelectItem key={endpoint} value={endpoint}>
|
||||
{endpoint}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<TextUpdateMenu
|
||||
title={`${endpoint.type} Alerter Url`}
|
||||
value={endpoint.params.url}
|
||||
onUpdate={(url) =>
|
||||
set({ ...endpoint, params: { ...endpoint.params, url } })
|
||||
}
|
||||
placeholder="Enter endpoint url"
|
||||
triggerClassName="w-[250px]"
|
||||
/>
|
||||
</div>
|
||||
</ConfigItem>
|
||||
);
|
||||
};
|
||||
|
||||
const default_url = (type: Types.AlerterEndpoint["type"]) => {
|
||||
return type === "Custom"
|
||||
? "http://localhost:7000"
|
||||
: type === "Slack"
|
||||
? "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
: "";
|
||||
};
|
||||
56
frontend/src/components/resources/alerter/config/index.tsx
Normal file
56
frontend/src/components/resources/alerter/config/index.tsx
Normal file
@@ -0,0 +1,56 @@
|
||||
import { Config } from "@components/config";
|
||||
import { useRead, useWrite } from "@lib/hooks";
|
||||
import { Types } from "@monitor/client";
|
||||
import { useState } from "react";
|
||||
import { EndpointConfig } from "./endpoint";
|
||||
import { AlertTypeConfig } from "./alert_types";
|
||||
|
||||
export const AlerterConfig = ({ id }: { id: string }) => {
|
||||
const perms = useRead("GetPermissionLevel", {
|
||||
target: { type: "Alerter", id },
|
||||
}).data;
|
||||
const config = useRead("GetAlerter", { alerter: id }).data?.config;
|
||||
const global_disabled =
|
||||
useRead("GetCoreInfo", {}).data?.ui_write_disabled ?? false;
|
||||
const { mutateAsync } = useWrite("UpdateAlerter");
|
||||
const [update, set] = useState<Partial<Types.AlerterConfig>>({});
|
||||
|
||||
if (!config) return null;
|
||||
const disabled = global_disabled || perms !== Types.PermissionLevel.Write;
|
||||
|
||||
return (
|
||||
<Config
|
||||
disabled={disabled}
|
||||
config={config}
|
||||
update={update}
|
||||
set={set}
|
||||
onSave={async () => {
|
||||
await mutateAsync({ id, config: update });
|
||||
}}
|
||||
components={{
|
||||
general: [
|
||||
{
|
||||
label: "General",
|
||||
components: {
|
||||
enabled: true,
|
||||
endpoint: (endpoint, set) => (
|
||||
<EndpointConfig
|
||||
endpoint={endpoint!}
|
||||
set={(endpoint) => set({ endpoint })}
|
||||
disabled={disabled}
|
||||
/>
|
||||
),
|
||||
alert_types: (alert_types, set) => (
|
||||
<AlertTypeConfig
|
||||
alert_types={alert_types!}
|
||||
set={(alert_types) => set({ alert_types })}
|
||||
disabled={disabled}
|
||||
/>
|
||||
),
|
||||
},
|
||||
},
|
||||
],
|
||||
}}
|
||||
/>
|
||||
);
|
||||
};
|
||||
Reference in New Issue
Block a user