Added better error handling and daemon health check

This commit is contained in:
Marcel Baumgartner
2023-06-09 14:38:30 +02:00
parent bd8ba11410
commit fc319f0f73
4 changed files with 97 additions and 12 deletions

View File

@@ -0,0 +1,58 @@
using System.Diagnostics;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Moonlight.App.Database.Entities;
using Moonlight.App.Repositories;
using Moonlight.App.Services;
namespace Moonlight.App.Diagnostics.HealthChecks;
public class DaemonHealthCheck : IHealthCheck
{
private readonly Repository<Node> NodeRepository;
private readonly NodeService NodeService;
public DaemonHealthCheck(Repository<Node> nodeRepository, NodeService nodeService)
{
NodeRepository = nodeRepository;
NodeService = nodeService;
}
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = new CancellationToken())
{
var nodes = NodeRepository.Get().ToArray();
var results = new Dictionary<Node, bool>();
var healthCheckData = new Dictionary<string, object>();
foreach (var node in nodes)
{
try
{
await NodeService.GetCpuMetrics(node);
results.Add(node, true);
}
catch (Exception e)
{
results.Add(node, false);
healthCheckData.Add(node.Name, e.ToStringDemystified());
}
}
var offlineNodes = results
.Where(x => !x.Value)
.ToArray();
if (offlineNodes.Length == nodes.Length)
{
return HealthCheckResult.Unhealthy("All node daemons are offline", null, healthCheckData);
}
if (offlineNodes.Length == 0)
{
return HealthCheckResult.Healthy("All node daemons are online");
}
return HealthCheckResult.Degraded($"{offlineNodes.Length} node daemons are offline", null, healthCheckData);
}
}

View File

@@ -70,7 +70,8 @@ namespace Moonlight
builder.Services.AddHttpContextAccessor(); builder.Services.AddHttpContextAccessor();
builder.Services.AddHealthChecks() builder.Services.AddHealthChecks()
.AddCheck<DatabaseHealthCheck>("Database") .AddCheck<DatabaseHealthCheck>("Database")
.AddCheck<NodeHealthCheck>("Nodes"); .AddCheck<NodeHealthCheck>("Nodes")
.AddCheck<DaemonHealthCheck>("Daemons");
// Databases // Databases
builder.Services.AddDbContext<DataContext>(); builder.Services.AddDbContext<DataContext>();

View File

@@ -33,7 +33,7 @@
@(entry.Key) @(entry.Key)
</button> </button>
</h2> </h2>
<div id="healthCheck_body_@(entry.Key.ToLower())" class="accordion-collapse collapse show" data-bs-parent="#healthCheck"> <div id="healthCheck_body_@(entry.Key.ToLower())" class="accordion-collapse collapse" data-bs-parent="#healthCheck">
<div class="accordion-body"> <div class="accordion-body">
<b><TL>Status</TL>:</b>&nbsp;<TL>@(entry.Value.Status)</TL><br/> <b><TL>Status</TL>:</b>&nbsp;<TL>@(entry.Value.Status)</TL><br/>
<b><TL>Description</TL>:</b>&nbsp;@(entry.Value.Description)<br/> <b><TL>Description</TL>:</b>&nbsp;@(entry.Value.Description)<br/>

View File

@@ -6,6 +6,7 @@
@using Moonlight.App.Models.Misc @using Moonlight.App.Models.Misc
@using Moonlight.App.Services @using Moonlight.App.Services
@using Newtonsoft.Json @using Newtonsoft.Json
@using Logging.Net
@inject ServerRepository ServerRepository @inject ServerRepository ServerRepository
@inject UserRepository UserRepository @inject UserRepository UserRepository
@@ -101,7 +102,28 @@
</a> </a>
</div> </div>
</div> </div>
<LazyLoader Load="LoadHealthCheckData">
@if (HealthCheckData == null)
{
<div class="card">
<div class="card-header">
<div class="card-title">
<TL>Moonlight health</TL>
</div>
</div>
<div class="card-body">
<div class="alert alert-warning">
<TL>Unable to fetch health check data</TL>
</div>
</div>
</div>
}
else
{
<HealthCheckView HealthCheck="@HealthCheckData"/> <HealthCheckView HealthCheck="@HealthCheckData"/>
}
</LazyLoader>
</LazyLoader> </LazyLoader>
</OnlyAdmin> </OnlyAdmin>
@@ -112,15 +134,22 @@
private int DomainCount = 0; private int DomainCount = 0;
private int WebSpaceCount = 0; private int WebSpaceCount = 0;
private HealthCheck HealthCheckData; private HealthCheck? HealthCheckData;
private async Task Load(LazyLoader lazyLoader) private Task Load(LazyLoader lazyLoader)
{ {
ServerCount = ServerRepository.Get().Count(); ServerCount = ServerRepository.Get().Count();
UserCount = UserRepository.Get().Count(); UserCount = UserRepository.Get().Count();
DomainCount = DomainRepository.Get().Count(); DomainCount = DomainRepository.Get().Count();
WebSpaceCount = WebSpaceRepository.Get().Count(); WebSpaceCount = WebSpaceRepository.Get().Count();
return Task.CompletedTask;
}
private async Task LoadHealthCheckData(LazyLoader lazyLoader)
{
await lazyLoader.SetText("Loading health check data");
var appUrl = ConfigService var appUrl = ConfigService
.GetSection("Moonlight") .GetSection("Moonlight")
.GetValue<string>("AppUrl"); .GetValue<string>("AppUrl");
@@ -131,14 +160,11 @@
var json = await client.GetStringAsync($"{appUrl}/_health"); var json = await client.GetStringAsync($"{appUrl}/_health");
HealthCheckData = JsonConvert.DeserializeObject<HealthCheck>(json) ?? new(); HealthCheckData = JsonConvert.DeserializeObject<HealthCheck>(json) ?? new();
} }
catch (Exception) catch (Exception e)
{ {
HealthCheckData = new() HealthCheckData = null;
{ Logger.Warn("Unable to fetch health check data");
Status = "Healthy", Logger.Warn(e);
Entries = new(),
TotalDuration = TimeSpan.MinValue
};
} }
} }
} }