nvme_disk: enable the autonomous power save feature
if available.
we follow the algorithm in Linux, choosing at most two power states
with low and higher latencies. this allows to save about 1W power
on a WD Blue SN570 device.
Change-Id: Ic403ec19765e9abff54aed4f6d7dcad8f22695ae
Reviewed-on: https://review.haiku-os.org/c/haiku/+/9137
Reviewed-by: Jérôme Duval <jerome.duval@gmail.com>
Reviewed-by: waddlesplash <waddlesplash@gmail.com>
Diff
src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme.h | 1 +
src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_admin.c | 3 ++-
src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c | 7 ++++---
src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_internal.h | 1 +
5 files changed, 68 insertions(+), 13 deletions(-)
@@ -261,8 +261,8 @@
if (info->ctrlr->feature_supported[NVME_FEAT_INTERRUPT_COALESCING]) {
uint32 microseconds = 16, threshold = 32;
nvme_admin_set_feature(info->ctrlr, false, NVME_FEAT_INTERRUPT_COALESCING,
((microseconds / 100) << 8) | threshold, 0, NULL);
nvme_ctrlr_set_feature(info->ctrlr, false, NVME_FEAT_INTERRUPT_COALESCING,
((microseconds / 100) << 8) | threshold, 0, NULL, 0, NULL);
}
if (info->ctrlr->feature_supported[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION]) {
@@ -273,15 +273,66 @@
&& cdata.npss > 0 && cdata.npss < 31) {
TRACE_ALWAYS("\tpower states: %u\n", cdata.npss);
for (uint8 i = 0; i <= cdata.npss; i++) {
struct nvme_power_state *psd = &cdata.psd[i];
struct nvme_power_state psd;
memcpy(&psd, &cdata.psd[i], sizeof(struct nvme_power_state));
TRACE_ALWAYS("\tps %u: mp:%fW %soperational enlat:%u exlat:%u rrt:%u rrl:%u\n",
i, psd->mp / (psd->mxps == 0 ? 100.0 : 10000.0),
psd->nops ? "non-" : "", psd->enlat, psd->exlat, psd->rrt, psd->rrl);
TRACE_ALWAYS("\trwt:%u rwl:%u idlp:%fW actp:%fW apw:%u\n", psd->rwt, psd->rwl,
psd->idlp / (psd->ips == 2 ? 100.0 : (psd->ips == 1 ? 10000.0 : 1.0)),
psd->actp / (psd->aps == 2 ? 100.0 : (psd->aps == 1 ? 10000.0 : 1.0)),
psd->apw);
i, psd.mp / (psd.mxps == 0 ? 100.0 : 10000.0),
psd.nops ? "non-" : "", psd.enlat, psd.exlat, psd.rrt, psd.rrl);
TRACE_ALWAYS("\trwt:%u rwl:%u idlp:%fW actp:%fW apw:%u\n", psd.rwt, psd.rwl,
psd.idlp / (psd.ips == 2 ? 100.0 : (psd.ips == 1 ? 10000.0 : 1.0)),
psd.actp / (psd.aps == 2 ? 100.0 : (psd.aps == 1 ? 10000.0 : 1.0)),
psd.apw);
}
size_t tableSize = 32 * sizeof(uint64);
uint64* table = (uint64*)malloc(tableSize);
memset(table, 0, tableSize);
uint64 target = 0;
bool firstStateSet = false;
bool secondStateSet = false;
for (uint8 i = cdata.npss; i > 0; i--) {
struct nvme_power_state psd;
memcpy(&psd, &cdata.psd[i], sizeof(struct nvme_power_state));
if (psd.nops && psd.exlat <= 100000) {
uint32 totalLatency = psd.enlat + psd.exlat;
uint32 transitionTime = 0;
if (totalLatency < 100000 && !secondStateSet) {
secondStateSet = true;
transitionTime = 2000;
}
if (totalLatency < 15000 && secondStateSet && !firstStateSet) {
transitionTime = 100;
firstStateSet = true;
}
if (transitionTime > 0)
target = (i << 3) | (transitionTime << 8);
}
table[i - 1] = target;
}
TRACE_ALWAYS("\tautonomous power state transition table:\n");
for (int i = 0; i < 8; i++) {
if (table[i * 4] == 0 && table[i * 4 + 1] == 0 && table[i * 4 + 2] == 0
&& table[i * 4 + 3] == 0) {
break;
}
TRACE_ALWAYS("\t%" B_PRIx64 " %" B_PRIx64" %" B_PRIx64" %" B_PRIx64 "\n",
table[i * 4], table[i * 4 + 1], table[i * 4 + 2], table[i * 4 + 3]);
}
int err = nvme_ctrlr_set_feature(info->ctrlr, false,
NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION,
(firstStateSet || secondStateSet) ? 1 : 0, 0, table, tableSize, NULL);
if (err != 0)
TRACE_ERROR("failed to set apst table!\n");
else
TRACE_ALWAYS("\t=> feature apst table set\n");
free(table);
}
}
@@ -623,6 +623,7 @@
extern int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr,
bool save, enum nvme_feat feature,
uint32_t cdw11, uint32_t cdw12,
void *buf, size_t len,
uint32_t *attributes);
/**
@@ -159,6 +159,7 @@
enum nvme_feat feature,
uint32_t cdw11,
uint32_t cdw12,
void *buf, uint32_t len,
uint32_t *attributes)
{
struct nvme_completion_poll_status status;
@@ -176,7 +177,7 @@
status.done = false;
ret = nvme_admin_submit_cmd(ctrlr, &cmd, NULL, 0,
ret = nvme_admin_submit_cmd(ctrlr, &cmd, buf, len,
nvme_request_completion_poll_cb,
&status);
if (ret == 0) {
@@ -502,7 +502,7 @@
* of queues requested (see specifications).
*/
ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES,
num_queues, 0, &cdw0);
num_queues, 0, NULL, 0, &cdw0);
if (ret != 0) {
nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
return ret;
@@ -656,7 +656,7 @@
ret = nvme_admin_set_feature(ctrlr, false,
NVME_FEAT_ASYNC_EVENT_CONFIGURATION,
state.raw, 0, NULL);
state.raw, 0, NULL, 0, NULL);
if (ret != 0) {
nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n");
return ret;
@@ -1193,6 +1193,7 @@
int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr,
bool save, enum nvme_feat feature,
uint32_t cdw11, uint32_t cdw12,
void *buf, size_t len,
uint32_t *attributes)
{
int ret;
@@ -1200,7 +1201,7 @@
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_set_feature(ctrlr, save, feature,
cdw11, cdw12, attributes);
cdw11, cdw12, NULL, 0, attributes);
if (ret != 0)
nvme_notice("Set feature 0x%08x failed\n",
(unsigned int) feature);
@@ -630,6 +630,7 @@
bool save,
enum nvme_feat feature,
uint32_t cdw11, uint32_t cdw12,
void *buf, uint32_t len,
uint32_t *attributes);
extern int nvme_admin_format_nvm(struct nvme_ctrlr *ctrlr,