8 Commits
1.1.2 ... 1.1.4

Author SHA1 Message Date
cppla
0fe01064a4 Update README.md 2024-04-03 21:59:23 +08:00
cppla
510567eaec 修复驴头不对马嘴的bug 2024-04-03 21:31:35 +08:00
cppla
503037c7e2 add two logs 2024-04-03 19:53:04 +08:00
cppla
d75d5438a3 docker compose healthcheck 2024-04-03 17:44:43 +08:00
cppla
91f11dad76 屏蔽无用的nginx日志,增加健康检查wq 2024-04-03 17:16:32 +08:00
cppla
fdc5abacfc fix bug for offline 2024-04-03 16:42:40 +08:00
cppla
388938e02b 解决了网络闪断导致的错误报警 2024-04-03 16:39:08 +08:00
cppla
f912794068 add todo 2024-04-02 10:24:12 +08:00
6 changed files with 129 additions and 17 deletions

View File

@@ -15,7 +15,7 @@ RUN pwd && ls -a
# glibc env run
FROM nginx:latest
RUN mkdir -p /ServerStatus/server/
RUN mkdir -p /ServerStatus/server/ && ln -sf /dev/null /var/log/nginx/access.log && ln -sf /dev/null /var/log/nginx/error.log
COPY --from=builder server /ServerStatus/server/
COPY --from=builder web /usr/share/nginx/html/
@@ -25,5 +25,5 @@ ENV TZ=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
EXPOSE 80 35601
HEALTHCHECK --interval=5s --timeout=3s --retries=3 CMD curl --fail http://localhost:80 || bash -c 'kill -s 15 -1 && (sleep 10; kill -s 9 -1)'
CMD nohup sh -c '/etc/init.d/nginx start && /ServerStatus/server/sergate --config=/ServerStatus/server/config.json --web-dir=/usr/share/nginx/html'

View File

@@ -6,12 +6,12 @@
[![Python Support](https://img.shields.io/badge/python-3.6%2B%20-blue.svg)](https://github.com/cppla/ServerStatus)
[![C++ Compiler](http://img.shields.io/badge/C++-GNU-blue.svg?style=flat&logo=cplusplus)](https://github.com/cppla/ServerStatus)
[![License](https://img.shields.io/badge/license-MIT-4EB1BA.svg?style=flat-square)](https://github.com/cppla/ServerStatus)
[![Version](https://img.shields.io/badge/Version-Build%201.1.2-red)](https://github.com/cppla/ServerStatus)
[![Version](https://img.shields.io/badge/Version-Build%201.1.3-red)](https://github.com/cppla/ServerStatus)
![Latest Host Version](https://dl.cpp.la/Archive/serverstatus_1.1.2_host.png)
![Latest Server Version](https://dl.cpp.la/Archive/serverstatus_1.1.2_server.png)
`Watchdog触发式告警interval只是为了防止频繁收到报警信息造成的骚扰并不是探测间隔。 同时为了防止海外机器闪断报警也加入username、name、type等静态字符串参数的计算支持。值得注意的是Exprtk库默认使用窄字符类型中文等Unicode字符无法解析计算等待修复。 `
`Watchdog触发式告警interval只是为了防止频繁收到报警信息造成的骚扰并不是探测间隔。值得注意的是Exprtk库默认使用窄字符类型中文等Unicode字符无法解析计算等待修复。 `
# 目录:
@@ -115,8 +115,8 @@ cd ServerStatus/server && make
"callback": "https://yourSMSurl"
},
{
"name": "服务器宕机告警排出node1排除s02",
"rule": "online4=0&online6=0&name!='node1'&username!='s02'",
"name": "服务器宕机告警",
"rule": "online4=0&online6=0",
"interval": 600,
"callback": "https://yourSMSurl"
},

View File

@@ -5,6 +5,11 @@ services:
context: .
dockerfile: Dockerfile
image: serverstatus_server
healthcheck:
test: curl --fail http://localhost:80 || bash -c 'kill -s 15 -1 && (sleep 10; kill -s 9 -1)'
interval: 30s
timeout: 10s
retries: 5
container_name: serverstatus
restart: unless-stopped
networks:
@@ -19,7 +24,6 @@ services:
networks:
serverstatus-network:
name: serverstatus-network
ipam:
config:
- subnet: 172.23.0.0/24

View File

@@ -72,8 +72,8 @@
"callback": "https://yourSMSurl"
},
{
"name": "offline warning,exclude name node1",
"rule": "online4=0&online6=0&name!='node1'",
"name": "offline warning",
"rule": "online4=0&online6=0",
"interval": 600,
"callback": "https://yourSMSurl"
},

View File

@@ -110,13 +110,6 @@ void CMain::OnDelClient(int ClientNetID)
{
int ClientID = ClientNetToClient(ClientNetID);
dbg_msg("main", "OnDelClient(ncid=%d, cid=%d)", ClientNetID, ClientID);
//copy offline message for watchdog
WatchdogMessage(ClientNetID,
0, 0, 0, 0, 0, 0,
0, 0, 0,0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0,0, 0, 0,
0, 0, 0, 0);
if(ClientID >= 0 && ClientID < NET_MAX_CLIENTS)
{
Client(ClientID)->m_Connected = false;
@@ -124,6 +117,10 @@ void CMain::OnDelClient(int ClientNetID)
Client(ClientID)->m_ClientNetType = NETTYPE_INVALID;
mem_zero(&Client(ClientID)->m_Stats, sizeof(CClient::CStats));
}
m_OfflineAlarmThreadData.pClients = m_aClients;
m_OfflineAlarmThreadData.pWatchDogs = m_aCWatchDogs;
m_OfflineAlarmThreadData.m_ReloadRequired = ClientID;
thread_create(offlineAlarmThread, &m_OfflineAlarmThreadData);
}
int CMain::HandleMessage(int ClientNetID, char *pMessage)
@@ -353,6 +350,11 @@ void CMain::WatchdogMessage(int ClientNetID, double load_1, double load_5, doubl
time_t currentStamp = (long long)time(/*ago*/0);
if ((currentStamp-Client(ClientID)->m_AlarmLastTime) > Watchdog(ID)->m_aInterval)
{
if (!Client(ClientID)->m_Stats.m_Online4 && !Client(ClientID)->m_Stats.m_Online6)
{
//休眠5分钟如果5分钟后状态发生了变更消息不发出。
printf("download\n");
}
Client(ClientID)->m_AlarmLastTime = currentStamp;
CURL *curl;
CURLcode res;
@@ -498,6 +500,109 @@ void CMain::JSONUpdateThread(void *pUser)
fs_rename(pConfig->m_aJSONFile, aJSONFileTmp);
}
void CMain::offlineAlarmThread(void *pUser)
{
CJSONUpdateThreadData *m_OfflineAlarmThreadData = (CJSONUpdateThreadData *)pUser;
CClient *pClients = m_OfflineAlarmThreadData->pClients;
CWatchDog *pWatchDogs = m_OfflineAlarmThreadData->pWatchDogs;
volatile short ClientID = m_OfflineAlarmThreadData->m_ReloadRequired;
thread_sleep(6000);
if(!pClients[ClientID].m_Connected)
{
int ID = 0;
while (strcmp(pWatchDogs[ID].m_aName, "NULL"))
{
typedef exprtk::symbol_table<double> symbol_table_t;
typedef exprtk::expression<double> expression_t;
typedef exprtk::parser<double> parser_t;
const std::string expression_string = pWatchDogs[ID].m_aRule;
std::string username = pClients[ClientID].m_aUsername;
std::string name = pClients[ClientID].m_aName;
std::string type = pClients[ClientID].m_aType;
std::string host = pClients[ClientID].m_aHost;
std::string location = pClients[ClientID].m_aLocation;
std::double_t online4 = pClients[ClientID].m_Stats.m_Online4;
std::double_t online6 = pClients[ClientID].m_Stats.m_Online6;
symbol_table_t symbol_table;
symbol_table.add_stringvar("username", username);
symbol_table.add_stringvar("name", name);
symbol_table.add_stringvar("type", type);
symbol_table.add_stringvar("host", host);
symbol_table.add_stringvar("location", location);
symbol_table.add_variable("online4",online4);
symbol_table.add_variable("online6",online6);
symbol_table.add_constants();
expression_t expression;
expression.register_symbol_table(symbol_table);
parser_t parser;
parser.compile(expression_string,expression);
if (expression.value() > 0)
{
time_t currentStamp = (long long)time(/*ago*/0);
if ((currentStamp-pClients[ClientID].m_AlarmLastTime) > pWatchDogs[ID].m_aInterval)
{
printf("客户端下线且超过阈值, Client disconnects and sends alert information\n");
pClients[ClientID].m_AlarmLastTime = currentStamp;
CURL *curl;
CURLcode res;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if(curl) {
//standard time
char standardTime[32]= { 0 };
strftime(standardTime, sizeof(standardTime), "%Y-%m-%d %H:%M:%S",localtime(&currentStamp));
//url encode, Rules conflict with url special characterseg&, del rules, by https://cpp.la, 2023-10-09
char encodeBuffer[2048] = { 0 };
sprintf(encodeBuffer, "【告警名称】 %s \n\n【告警时间】 %s \n\n【用户名】 %s \n\n【节点名】 %s \n\n【虚拟化】 %s \n\n【主机名】 %s \n\n【位 置】 %s",
pWatchDogs[ID].m_aName,
standardTime,
pClients[ClientID].m_aUsername,
pClients[ClientID].m_aName,
pClients[ClientID].m_aType,
pClients[ClientID].m_aHost,
pClients[ClientID].m_aLocation);
char *encodeUrl = curl_easy_escape(curl, encodeBuffer, strlen(encodeBuffer));
//standard url
char urlBuffer[2048] = { 0 };
sprintf(urlBuffer, "%s%s",pWatchDogs[ID].m_aCallback, encodeUrl);
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_URL, urlBuffer);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS,"signature=ServerStatus");
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 6L);
res = curl_easy_perform(curl);
if(res != CURLE_OK)
fprintf(stderr, "watchdog failed: %s\n", curl_easy_strerror(res));
if(encodeUrl)
curl_free(encodeUrl);
curl_easy_cleanup(curl);
}
curl_global_cleanup();
}
else
printf("客户端下线但未超过阈值No alarm if the threshold is not exceeded\n");
}
ID++;
}
}
else
{
printf("网络波动No alarm information is sent due to network fluctuations\n");
}
fflush(stdout);
}
int CMain::ReadConfig()
{
// read and parse config
@@ -696,6 +801,7 @@ int CMain::Run()
m_JSONUpdateThreadData.m_ReloadRequired = 2;
m_JSONUpdateThreadData.pClients = m_aClients;
m_JSONUpdateThreadData.pConfig = &m_Config;
m_JSONUpdateThreadData.pWatchDogs = m_aCWatchDogs;
void *LoadThread = thread_create(JSONUpdateThread, &m_JSONUpdateThreadData);
//thread_detach(LoadThread);

View File

@@ -101,10 +101,12 @@ class CMain
{
CClient *pClients;
CConfig *pConfig;
CWatchDog *pWatchDogs;
volatile short m_ReloadRequired;
} m_JSONUpdateThreadData;
} m_JSONUpdateThreadData, m_OfflineAlarmThreadData;
static void JSONUpdateThread(void *pUser);
static void offlineAlarmThread(void *pUser);
public:
CMain(CConfig Config);