Add an AWS SDK metric publisher
This commit is contained in:
parent
ab0892cc41
commit
b115e95da4
|
@ -0,0 +1,196 @@
|
|||
package org.whispersystems.textsecuregcm.metrics;
|
||||
|
||||
import io.dropwizard.lifecycle.Managed;
|
||||
import io.micrometer.core.instrument.DistributionSummary;
|
||||
import io.micrometer.core.instrument.Metrics;
|
||||
import io.micrometer.core.instrument.Tags;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import software.amazon.awssdk.metrics.MetricCollection;
|
||||
import software.amazon.awssdk.metrics.MetricPublisher;
|
||||
import software.amazon.awssdk.metrics.MetricRecord;
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* A Micrometer AWS SDK metric publisher consumes {@link MetricCollection} instances provided by the AWS SDK when it
|
||||
* makes calls to the AWS API and publishes a subset of metrics from each call via the Micrometer metrics facade. A
|
||||
* single {@code MicrometerAwsSdkMetricPublisher} should be bound to a single AWS service client instance; publishers
|
||||
* should not be assigned to multiple service clients.
|
||||
*
|
||||
* @see <a href="https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/metrics-list.html">Service client metrics</a>
|
||||
*/
|
||||
public class MicrometerAwsSdkMetricPublisher implements MetricPublisher, Managed {
|
||||
|
||||
private final ExecutorService recordMetricsExecutorService;
|
||||
|
||||
private final String awsClientName;
|
||||
private final AtomicInteger mostRecentMaxConcurrency;
|
||||
|
||||
private static final String API_CALL_COUNTER_NAME =
|
||||
MetricsUtil.name(MicrometerAwsSdkMetricPublisher.class, "apiCall");
|
||||
|
||||
private static final String API_CALL_RETRY_COUNT_DISTRIBUTION_NAME =
|
||||
MetricsUtil.name(MicrometerAwsSdkMetricPublisher.class, "apiCallRetries");
|
||||
|
||||
private static final String CHANNEL_ACQUISITION_TIMER_NAME =
|
||||
MetricsUtil.name(MicrometerAwsSdkMetricPublisher.class, "acquireChannelDuration");
|
||||
|
||||
private static final String PENDING_CHANNEL_ACQUISITIONS_DISTRIBUTION_NAME =
|
||||
MetricsUtil.name(MicrometerAwsSdkMetricPublisher.class, "pendingChannelAcquisitions");
|
||||
|
||||
private static final String CONCURRENT_REQUESTS_DISTRIBUTION_NAME =
|
||||
MetricsUtil.name(MicrometerAwsSdkMetricPublisher.class, "concurrentRequests");
|
||||
|
||||
private static final String CLIENT_NAME_TAG = "clientName";
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(MicrometerAwsSdkMetricPublisher.class);
|
||||
|
||||
/**
|
||||
* Constructs a new metric publisher that uses the given executor service to record metrics and tags metrics with the
|
||||
* given client name. Note that the given executor service will be shut down when this publisher is shut down via the
|
||||
* {@link #close()} or {@link #stop()} methods, and as such the executor service should be used only for this
|
||||
* publisher.
|
||||
*
|
||||
* @param recordMetricsExecutorService the executor service via which to record metrics
|
||||
* @param awsClientName the name of AWS service client to which this publisher is attached
|
||||
*/
|
||||
public MicrometerAwsSdkMetricPublisher(final ExecutorService recordMetricsExecutorService, final String awsClientName) {
|
||||
this.recordMetricsExecutorService = recordMetricsExecutorService;
|
||||
this.awsClientName = awsClientName;
|
||||
|
||||
mostRecentMaxConcurrency = Metrics.gauge("maxConcurrency",
|
||||
Tags.of(CLIENT_NAME_TAG, awsClientName),
|
||||
new AtomicInteger(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void publish(final MetricCollection metricCollection) {
|
||||
|
||||
if ("ApiCall".equals(metricCollection.name())) {
|
||||
try {
|
||||
recordMetricsExecutorService.submit(() -> recordApiCallMetrics(metricCollection));
|
||||
} catch (final RejectedExecutionException ignored) {
|
||||
// This can happen if clients make new calls to an upstream service while the server is shutting down
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void recordApiCallMetrics(final MetricCollection apiCallMetricCollection) {
|
||||
if (!apiCallMetricCollection.name().equals("ApiCall")) {
|
||||
throw new IllegalArgumentException("Unexpected API call metric collection name: " + apiCallMetricCollection.name());
|
||||
}
|
||||
|
||||
final Map<String, MetricRecord<?>> metricsByName = toMetricMap(apiCallMetricCollection);
|
||||
|
||||
final Optional<String> maybeAwsServiceId = Optional.ofNullable(metricsByName.get("ServiceId"))
|
||||
.map(metricRecord -> (String) metricRecord.value());
|
||||
|
||||
final Optional<String> maybeOperationName = Optional.ofNullable(metricsByName.get("OperationName"))
|
||||
.map(metricRecord -> (String) metricRecord.value());
|
||||
|
||||
if (maybeAwsServiceId.isPresent() && maybeOperationName.isPresent()) {
|
||||
final String awsServiceId = maybeAwsServiceId.get();
|
||||
final String operationName = maybeOperationName.get();
|
||||
|
||||
final boolean success = Optional.ofNullable(metricsByName.get("ApiCallSuccessful"))
|
||||
.map(metricRecord -> (boolean) metricRecord.value())
|
||||
.orElse(false);
|
||||
|
||||
final int retryCount = Optional.ofNullable(metricsByName.get("RetryCount"))
|
||||
.map(metricRecord -> (int) metricRecord.value())
|
||||
.orElse(0);
|
||||
|
||||
final Tags tags = Tags.of(
|
||||
CLIENT_NAME_TAG, awsClientName,
|
||||
"awsServiceId", awsServiceId,
|
||||
"operationName", operationName,
|
||||
"callSuccess", String.valueOf(success));
|
||||
|
||||
Metrics.counter(API_CALL_COUNTER_NAME, tags).increment();
|
||||
|
||||
DistributionSummary.builder(API_CALL_RETRY_COUNT_DISTRIBUTION_NAME)
|
||||
.tags(tags)
|
||||
.publishPercentileHistogram(true)
|
||||
.register(Metrics.globalRegistry)
|
||||
.record(retryCount);
|
||||
|
||||
apiCallMetricCollection.childrenWithName("ApiCallAttempt")
|
||||
.forEach(callAttemptMetricCollection -> recordAttemptMetrics(callAttemptMetricCollection, tags));
|
||||
}
|
||||
}
|
||||
|
||||
private void recordAttemptMetrics(final MetricCollection apiCallAttemptMetricCollection, final Tags callTags) {
|
||||
|
||||
if (!apiCallAttemptMetricCollection.name().equals("ApiCallAttempt")) {
|
||||
throw new IllegalArgumentException("Unexpected API call attempt metric collection name: " + apiCallAttemptMetricCollection.name());
|
||||
}
|
||||
|
||||
apiCallAttemptMetricCollection.childrenWithName("HttpClient").findFirst().ifPresent(httpMetricCollection -> {
|
||||
final Map<String, MetricRecord<?>> callAttemptMetricsByName = toMetricMap(apiCallAttemptMetricCollection);
|
||||
final Map<String, MetricRecord<?>> httpMetricsByName = toMetricMap(httpMetricCollection);
|
||||
|
||||
Optional.ofNullable(httpMetricsByName.get("MaxConcurrency"))
|
||||
.ifPresent(maxConcurrencyMetricRecord -> mostRecentMaxConcurrency.set((int) maxConcurrencyMetricRecord.value()));
|
||||
|
||||
final Tags attemptTags = Optional.ofNullable(callAttemptMetricsByName.get("ErrorType"))
|
||||
.map(errorTypeMetricRecord -> callTags.and("error", errorTypeMetricRecord.value().toString()))
|
||||
.orElse(callTags);
|
||||
|
||||
Optional.ofNullable(httpMetricsByName.get("ConcurrencyAcquireDuration"))
|
||||
.ifPresent(channelAcquisitionDurationMetricRecord -> Timer.builder(CHANNEL_ACQUISITION_TIMER_NAME)
|
||||
.tags(attemptTags)
|
||||
.publishPercentileHistogram(true)
|
||||
.register(Metrics.globalRegistry)
|
||||
.record((Duration) channelAcquisitionDurationMetricRecord.value()));
|
||||
|
||||
Optional.ofNullable(httpMetricsByName.get("LeasedConcurrency"))
|
||||
.ifPresent(concurrentRequestsMetricRecord -> DistributionSummary.builder(CONCURRENT_REQUESTS_DISTRIBUTION_NAME)
|
||||
.tags(attemptTags)
|
||||
.publishPercentileHistogram(true)
|
||||
.register(Metrics.globalRegistry)
|
||||
.record((int) concurrentRequestsMetricRecord.value()));
|
||||
|
||||
Optional.ofNullable(httpMetricsByName.get("PendingConcurrencyAcquires"))
|
||||
.ifPresent(pendingChannelAcquisitionsMetricRecord -> DistributionSummary.builder(PENDING_CHANNEL_ACQUISITIONS_DISTRIBUTION_NAME)
|
||||
.tags(attemptTags)
|
||||
.publishPercentileHistogram(true)
|
||||
.register(Metrics.globalRegistry)
|
||||
.record((int) pendingChannelAcquisitionsMetricRecord.value()));
|
||||
});
|
||||
}
|
||||
|
||||
private static Map<String, MetricRecord<?>> toMetricMap(final MetricCollection metricCollection) {
|
||||
return metricCollection.stream()
|
||||
.collect(Collectors.toMap(metricRecord -> metricRecord.metric().name(), metricRecord -> metricRecord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// As the upstream documentation for MetricPublisher#close() says:
|
||||
//
|
||||
// > Important: Implementations must block the calling thread until all pending metrics are published and any
|
||||
// > resources acquired have been freed.
|
||||
recordMetricsExecutorService.shutdown();
|
||||
|
||||
try {
|
||||
if (!recordMetricsExecutorService.awaitTermination(1, TimeUnit.MINUTES)) {
|
||||
logger.warn("Metric-recording executor service for {} did not shut down cleanly", awsClientName);
|
||||
}
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() throws Exception {
|
||||
close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue