alert refactor

This commit is contained in:
mbecker20
2024-06-02 19:15:13 -07:00
parent 40e1b1ff88
commit 3c5868d111
13 changed files with 199 additions and 208 deletions

View File

@@ -13,7 +13,7 @@ use aws_sdk_ec2::{
Client,
};
use monitor_client::entities::{
alert::{Alert, AlertData, AlertDataVariant},
alert::{Alert, AlertData},
monitor_timestamp,
server::stats::SeverityLevel,
server_template::aws::AwsServerTemplateConfig,
@@ -172,7 +172,6 @@ pub async fn terminate_ec2_instance_with_retry(
resolved: false,
level: SeverityLevel::Critical,
target: ResourceTarget::system(),
variant: AlertDataVariant::AwsBuilderTerminationFailed,
data: AlertData::AwsBuilderTerminationFailed {
instance_id: instance_id.to_string(),
message: format!("{e:#}"),

View File

@@ -49,6 +49,7 @@ impl HetznerClient {
self.post("/servers", body).await
}
#[allow(unused)]
pub async fn delete_server(
&self,
id: i64,
@@ -120,6 +121,7 @@ impl HetznerClient {
})
}
#[allow(unused)]
async fn delete<Res: DeserializeOwned>(
&self,
path: &str,

View File

@@ -5,15 +5,9 @@ use std::{
use anyhow::{anyhow, Context};
use futures::future::join_all;
use monitor_client::entities::{
alert::{Alert, AlertData, AlertDataVariant},
monitor_timestamp,
server::stats::SeverityLevel,
server_template::hetzner::{
HetznerDatacenter, HetznerServerTemplateConfig,
HetznerServerType, HetznerVolumeFormat,
},
update::ResourceTarget,
use monitor_client::entities::server_template::hetzner::{
HetznerDatacenter, HetznerServerTemplateConfig, HetznerServerType,
HetznerVolumeFormat,
};
use crate::{
@@ -22,15 +16,9 @@ use crate::{
create_volume::CreateVolumeBody,
},
config::core_config,
helpers::alert::send_alerts,
};
use self::{
client::HetznerClient,
common::{
HetznerAction, HetznerActionResponse, HetznerVolumeStatus,
},
};
use self::{client::HetznerClient, common::HetznerVolumeStatus};
mod client;
mod common;
@@ -195,60 +183,6 @@ pub async fn launch_hetzner_server(
))
}
#[allow(unused)]
const MAX_TERMINATION_TRIES: usize = 5;
#[allow(unused)]
const TERMINATION_WAIT_SECS: u64 = 15;
#[allow(unused)]
pub async fn terminate_hetzner_server_with_retry(
id: i64,
) -> anyhow::Result<()> {
let hetzner =
*hetzner().as_ref().context("Hetzner token not configured")?;
for i in 0..MAX_TERMINATION_TRIES {
let message = match hetzner.delete_server(id).await {
Ok(HetznerActionResponse {
action: HetznerAction { error: None, .. },
}) => return Ok(()),
Ok(HetznerActionResponse {
action: HetznerAction { error: Some(e), .. },
}) => (i == MAX_TERMINATION_TRIES - 1).then(|| {
format!(
"failed to terminate instance | code: {} | {}",
e.code, e.message
)
}),
Err(e) => {
(i == MAX_TERMINATION_TRIES - 1).then(|| format!("{e:#}"))
}
};
if let Some(message) = message {
error!("failed to terminate hetzner server {id} | {message}");
let alert = Alert {
id: Default::default(),
ts: monitor_timestamp(),
resolved: false,
level: SeverityLevel::Critical,
target: ResourceTarget::system(),
variant: AlertDataVariant::HetznerBuilderTerminationFailed,
data: AlertData::HetznerBuilderTerminationFailed {
server_id: id,
message: message.clone(),
},
resolved_ts: None,
};
send_alerts(&[alert]).await;
return Err(anyhow::Error::msg(message));
}
tokio::time::sleep(Duration::from_secs(TERMINATION_WAIT_SECS))
.await;
}
Ok(())
}
fn hetzner_format(
format: HetznerVolumeFormat,
) -> common::HetznerVolumeFormat {

View File

@@ -246,21 +246,6 @@ async fn send_slack_alert(
];
(text, blocks.into())
}
AlertData::HetznerBuilderTerminationFailed {
server_id,
message,
} => {
let text = format!(
"{level} | Failed to terminated Hetzner builder instance"
);
let blocks = vec![
Block::header(text.clone()),
Block::section(format!(
"server id: **{server_id}**\n{message}"
)),
];
(text, blocks.into())
}
AlertData::None {} => Default::default(),
};
if !text.is_empty() {

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap;
use monitor_client::entities::{
alert::{Alert, AlertData, AlertDataVariant},
alert::{Alert, AlertData},
deployment::Deployment,
server::stats::SeverityLevel,
update::ResourceTarget,
@@ -46,7 +46,6 @@ pub async fn alert_deployments(
let alert = Alert {
id: Default::default(),
level: SeverityLevel::Warning,
variant: AlertDataVariant::ContainerStateChange,
resolved: true,
resolved_ts: ts.into(),
target,

View File

@@ -1,6 +1,7 @@
use std::{collections::HashMap, path::PathBuf, str::FromStr};
use anyhow::Context;
use derive_variants::ExtractVariant;
use mongo_indexed::Indexed;
use monitor_client::entities::{
alert::{Alert, AlertData, AlertDataVariant},
@@ -66,7 +67,6 @@ pub async fn alert_servers(
resolved_ts: None,
level: SeverityLevel::Critical,
target: ResourceTarget::Server(server_status.id.clone()),
variant: AlertDataVariant::ServerUnreachable,
data: AlertData::ServerUnreachable {
id: server_status.id.clone(),
name: server.name.clone(),
@@ -132,7 +132,6 @@ pub async fn alert_servers(
resolved_ts: None,
level: health.cpu,
target: ResourceTarget::Server(server_status.id.clone()),
variant: AlertDataVariant::ServerCpu,
data: AlertData::ServerCpu {
id: server_status.id.clone(),
name: server.name.clone(),
@@ -188,7 +187,6 @@ pub async fn alert_servers(
resolved_ts: None,
level: health.cpu,
target: ResourceTarget::Server(server_status.id.clone()),
variant: AlertDataVariant::ServerMem,
data: AlertData::ServerMem {
id: server_status.id.clone(),
name: server.name.clone(),
@@ -260,7 +258,6 @@ pub async fn alert_servers(
resolved_ts: None,
level: *health,
target: ResourceTarget::Server(server_status.id.clone()),
variant: AlertDataVariant::ServerDisk,
data: AlertData::ServerDisk {
id: server_status.id.clone(),
name: server.name.clone(),
@@ -492,7 +489,7 @@ async fn get_open_alerts(
}
_ => {
let inner = map.entry(alert.target.clone()).or_default();
inner.insert(alert.variant, alert);
inner.insert(alert.data.extract_variant(), alert);
}
}
}

View File

@@ -48,9 +48,6 @@ pub struct Alert {
/// The target of the alert
pub target: ResourceTarget,
/// The type of alert, eg ServerUnreachable, ServerMem, ContainerStateChange
pub variant: AlertDataVariant,
/// The data attached to the alert
pub data: AlertData,
@@ -151,14 +148,6 @@ pub enum AlertData {
message: String,
},
/// A Hetzner builder failed to terminate.
HetznerBuilderTerminationFailed {
/// The id of the server which failed to terminate
server_id: I64,
/// A reason for the failure
message: String,
},
None {},
}

View File

@@ -182,13 +182,6 @@ export type AlertData =
instance_id: string;
/** A reason for the failure */
message: string;
}}
/** A Hetzner builder failed to terminate. */
| { type: "HetznerBuilderTerminationFailed", data: {
/** The id of the server which failed to terminate */
server_id: I64;
/** A reason for the failure */
message: string;
}}
| { type: "None", data: {
}};
@@ -209,8 +202,6 @@ export interface Alert {
level: SeverityLevel;
/** The target of the alert */
target: ResourceTarget;
/** The type of alert, eg ServerUnreachable, ServerMem, ContainerStateChange */
variant: AlertData["type"];
/** The data attached to the alert */
data: AlertData;
/** The timestamp of alert resolution */

View File

@@ -55,7 +55,7 @@ export const AlertsTable = ({
},
{
header: "Alert Type",
accessorKey: "variant",
accessorKey: "data.type",
},
]}
/>

View File

@@ -1,93 +0,0 @@
// import { Config } from "@components/config";
// import { useRead, useWrite } from "@lib/hooks";
// import { Types } from "@monitor/client";
// import {
// Select,
// SelectContent,
// SelectItem,
// SelectTrigger,
// SelectValue,
// } from "@ui/select";
// import { useEffect, useState } from "react";
// import { Config } from "@components/config";
// import { useRead, useWrite } from "@lib/hooks";
// import { Types } from "@monitor/client";
// import {
// Select,
// SelectContent,
// SelectItem,
// SelectTrigger,
// SelectValue,
// } from "@ui/select";
// import { useEffect, useState } from "react";
// export const AlerterConfig = ({ id }: { id: string }) => {
export const AlerterConfig = () => {
// const perms = useRead("GetPermissionLevel", {
// target: { type: "Alerter", id },
// }).data;
// const config = useRead("GetAlerter", { alerter: id }).data?.config;
// const global_disabled =
// useRead("GetCoreInfo", {}).data?.ui_write_disabled ?? false;
// const [type, setType] = useState<Types.AlerterConfig["type"]>();
// useEffect(() => config?.type && setType(config.type), [config?.type]);
// const [update, setConfig] = useState<
// Partial<Types.SlackAlerterConfig | Types.CustomAlerterConfig>
// >({});
// const { mutateAsync } = useWrite("UpdateAlerter");
// if (!config) return null;
// const disabled = global_disabled || perms !== Types.PermissionLevel.Write;
// return (
// <Config
// disabled={disabled}
// config={config.params}
// update={update}
// set={setConfig}
// onSave={async () => {
// if (!type) return;
// await mutateAsync({ id, config: { type, params: update } });
// }}
// components={{
// general: [
// {
// label: "General",
// components: {
// url: true,
// enabled: true,
// },
// },
// ],
// }}
// selector={
// <div className="flex gap-2 items-center text-sm">
// Alerter Type:
// <Select
// value={type}
// onValueChange={(type) => {
// setType(type as any);
// setConfig({
// url: update.url || "",
// enabled: update.enabled === undefined ? true : update.enabled,
// });
// }}
// disabled={disabled}
// >
// <SelectTrigger className="w-32 capitalize" disabled={disabled}>
// <SelectValue />
// </SelectTrigger>
// <SelectContent className="w-32">
// {["Slack", "Custom"].map((key) => (
// <SelectItem value={key} key={key} className="capitalize">
// {key}
// </SelectItem>
// ))}
// </SelectContent>
// </Select>
// </div>
// }
// />
// );
return <></>
};

View File

@@ -0,0 +1,74 @@
import { ConfigItem } from "@components/config/util";
import { Types } from "@monitor/client";
import { Badge } from "@ui/badge";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
} from "@ui/select";
import { MinusCircle } from "lucide-react";
const ALERT_TYPES: Types.AlertData["type"][] = [
"ServerUnreachable",
"ServerCpu",
"ServerMem",
"ServerDisk",
"ContainerStateChange",
"AwsBuilderTerminationFailed",
];
export const AlertTypeConfig = ({
alert_types,
set,
disabled,
}: {
alert_types: Types.AlertData["type"][];
set: (alert_types: Types.AlertData["type"][]) => void;
disabled: boolean;
}) => {
const at = ALERT_TYPES.filter(
(alert_type) => !alert_types.includes(alert_type)
);
return (
<ConfigItem label="Alert Types">
<div className="flex items-center gap-4">
<div className="flex items-center gap-2">
{alert_types.map((type) => (
<Badge
variant="secondary"
className="text-sm flex items-center gap-2 cursor-pointer"
onClick={() => {
if (disabled) return;
set(alert_types.filter((t) => t !== type));
}}
>
{type}
{!disabled && <MinusCircle className="w-3 h-3" />}
</Badge>
))}
</div>
{at.length ? (
<Select
value={undefined}
onValueChange={(type: Types.AlertData["type"]) => {
set([...alert_types, type]);
}}
disabled={disabled}
>
<SelectTrigger className="w-[150px]">
<div className="pr-2">Add Filter</div>
</SelectTrigger>
<SelectContent align="end">
{at.map((alert_type) => (
<SelectItem key={alert_type} value={alert_type}>
{alert_type}
</SelectItem>
))}
</SelectContent>
</Select>
) : undefined}
</div>
</ConfigItem>
);
};

View File

@@ -0,0 +1,58 @@
import { ConfigItem } from "@components/config/util";
import { TextUpdateMenu } from "@components/util";
import { Types } from "@monitor/client";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@ui/select";
const ENDPOINT_TYPES: Types.AlerterEndpoint["type"][] = ["Custom", "Slack"];
export const EndpointConfig = ({
endpoint,
set,
disabled,
}: {
endpoint: Types.AlerterEndpoint;
set: (endpoint: Types.AlerterEndpoint) => void;
disabled: boolean;
}) => {
return (
<ConfigItem label="Endpoint">
<div className="flex items-center gap-4">
<Select
value={endpoint.type}
onValueChange={(type: Types.AlerterEndpoint["type"]) => {
set({ type, params: { url: default_url(type) } });
}}
disabled={disabled}
>
<SelectTrigger className="w-[150px]" disabled={disabled}>
<SelectValue />
</SelectTrigger>
<SelectContent>
{ENDPOINT_TYPES.map((endpoint) => (
<SelectItem key={endpoint} value={endpoint}>
{endpoint}
</SelectItem>
))}
</SelectContent>
</Select>
<TextUpdateMenu
title={`${endpoint.type} Alerter Url`}
value={endpoint.params.url}
onUpdate={(url) =>
set({ ...endpoint, params: { ...endpoint.params, url } })
}
placeholder="Enter endpoint url"
triggerClassName="w-[250px]"
/>
</div>
</ConfigItem>
);
};
const default_url = (type: Types.AlerterEndpoint["type"]) => {
return type === "Custom"
? "http://localhost:7000"
: type === "Slack"
? "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX"
: "";
};

View File

@@ -0,0 +1,56 @@
import { Config } from "@components/config";
import { useRead, useWrite } from "@lib/hooks";
import { Types } from "@monitor/client";
import { useState } from "react";
import { EndpointConfig } from "./endpoint";
import { AlertTypeConfig } from "./alert_types";
export const AlerterConfig = ({ id }: { id: string }) => {
const perms = useRead("GetPermissionLevel", {
target: { type: "Alerter", id },
}).data;
const config = useRead("GetAlerter", { alerter: id }).data?.config;
const global_disabled =
useRead("GetCoreInfo", {}).data?.ui_write_disabled ?? false;
const { mutateAsync } = useWrite("UpdateAlerter");
const [update, set] = useState<Partial<Types.AlerterConfig>>({});
if (!config) return null;
const disabled = global_disabled || perms !== Types.PermissionLevel.Write;
return (
<Config
disabled={disabled}
config={config}
update={update}
set={set}
onSave={async () => {
await mutateAsync({ id, config: update });
}}
components={{
general: [
{
label: "General",
components: {
enabled: true,
endpoint: (endpoint, set) => (
<EndpointConfig
endpoint={endpoint!}
set={(endpoint) => set({ endpoint })}
disabled={disabled}
/>
),
alert_types: (alert_types, set) => (
<AlertTypeConfig
alert_types={alert_types!}
set={(alert_types) => set({ alert_types })}
disabled={disabled}
/>
),
},
},
],
}}
/>
);
};