Discussion:
[ovs-dev] [PATCH ovn] lflow-cache: Auto trim when cache size is reduced significantly.
Dumitru Ceara
2021-06-04 10:00:47 UTC
Permalink
Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
honored. The lflow cache is one of the largest memory consumers in
ovn-controller and it used to trim memory whenever the cache was
flushed. However, that required periodic intervention from the CMS
side.

Instead, we now automatically trim memory every time the lflow cache
utilization decreases by 50%, with a threshold of at least
LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.

[0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827

Reported-at: https://bugzilla.redhat.com/1967882
Signed-off-by: Dumitru Ceara <***@redhat.com>
---
controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
tests/ovn-lflow-cache.at | 76 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 119 insertions(+), 25 deletions(-)

diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
index 56ddf1075..11935e7ae 100644
--- a/controller/lflow-cache.c
+++ b/controller/lflow-cache.c
@@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
COVERAGE_DEFINE(lflow_cache_full);
COVERAGE_DEFINE(lflow_cache_mem_full);
COVERAGE_DEFINE(lflow_cache_made_room);
+COVERAGE_DEFINE(lflow_cache_trim);

static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
[LCACHE_T_CONJ_ID] = "cache-conj-id",
@@ -49,6 +50,8 @@ static const char *lflow_cache_type_names[LCACHE_T_MAX] = {

struct lflow_cache {
struct hmap entries[LCACHE_T_MAX];
+ uint32_t n_entries;
+ uint32_t high_watermark;
uint32_t capacity;
uint64_t mem_usage;
uint64_t max_mem_usage;
@@ -63,7 +66,8 @@ struct lflow_cache_entry {
struct lflow_cache_value value;
};

-static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
+#define LFLOW_CACHE_TRIM_LIMIT 10000
+
static bool lflow_cache_make_room__(struct lflow_cache *lc,
enum lflow_cache_type type);
static struct lflow_cache_value *lflow_cache_add__(
@@ -71,18 +75,18 @@ static struct lflow_cache_value *lflow_cache_add__(
enum lflow_cache_type type, uint64_t value_size);
static void lflow_cache_delete__(struct lflow_cache *lc,
struct lflow_cache_entry *lce);
+static void lflow_cache_trim__(struct lflow_cache *lc, bool force);

struct lflow_cache *
lflow_cache_create(void)
{
- struct lflow_cache *lc = xmalloc(sizeof *lc);
+ struct lflow_cache *lc = xzalloc(sizeof *lc);

for (size_t i = 0; i < LCACHE_T_MAX; i++) {
hmap_init(&lc->entries[i]);
}

lc->enabled = true;
- lc->mem_usage = 0;
return lc;
}

@@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
lflow_cache_delete__(lc, lce);
}
- hmap_shrink(&lc->entries[i]);
}
-
-#if HAVE_DECL_MALLOC_TRIM
- malloc_trim(0);
-#endif
+ lflow_cache_trim__(lc, true);
}

void
@@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity,
uint64_t max_mem_usage = max_mem_usage_kb * 1024;

if ((lc->enabled && !enabled)
- || capacity < lflow_cache_n_entries__(lc)
+ || capacity < lc->n_entries
|| max_mem_usage < lc->mem_usage) {
lflow_cache_flush(lc);
}
@@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache *lc, struct ds *output)

ds_put_format(output, "Enabled: %s\n",
lflow_cache_is_enabled(lc) ? "true" : "false");
+ ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
+ lc->high_watermark);
+ ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc->n_entries);
for (size_t i = 0; i < LCACHE_T_MAX; i++) {
ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
lflow_cache_type_names[i],
@@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc, const struct uuid *lflow_uuid)
COVERAGE_INC(lflow_cache_delete);
lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct lflow_cache_entry,
value));
+ lflow_cache_trim__(lc, false);
}
}

-static size_t
-lflow_cache_n_entries__(const struct lflow_cache *lc)
-{
- size_t n_entries = 0;
-
- for (size_t i = 0; i < LCACHE_T_MAX; i++) {
- n_entries += hmap_count(&lc->entries[i]);
- }
- return n_entries;
-}
-
static bool
lflow_cache_make_room__(struct lflow_cache *lc, enum lflow_cache_type type)
{
@@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
return NULL;
}

- if (lflow_cache_n_entries__(lc) == lc->capacity) {
+ if (lc->n_entries == lc->capacity) {
if (!lflow_cache_make_room__(lc, type)) {
COVERAGE_INC(lflow_cache_full);
return NULL;
@@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid,
lce->size = size;
lce->value.type = type;
hmap_insert(&lc->entries[type], &lce->node, uuid_hash(lflow_uuid));
+ lc->n_entries++;
+ lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
return &lce->value;
}

static void
lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
{
- if (!lce) {
- return;
- }
-
+ ovs_assert(lc->n_entries > 0);
hmap_remove(&lc->entries[lce->value.type], &lce->node);
+ lc->n_entries--;
switch (lce->value.type) {
case LCACHE_T_NONE:
OVS_NOT_REACHED();
@@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce)
lc->mem_usage -= lce->size;
free(lce);
}
+
+static void
+lflow_cache_trim__(struct lflow_cache *lc, bool force)
+{
+ /* Trim if we had at least 'TRIM_LIMIT' elements at some point and if the
+ * current usage is less than half of 'high_watermark'.
+ */
+ ovs_assert(lc->high_watermark >= lc->n_entries);
+ if (!force
+ && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
+ || lc->n_entries > lc->high_watermark / 2)) {
+ return;
+ }
+
+ COVERAGE_INC(lflow_cache_trim);
+ for (size_t i = 0; i < LCACHE_T_MAX; i++) {
+ hmap_shrink(&lc->entries[i]);
+ }
+
+#if HAVE_DECL_MALLOC_TRIM
+ malloc_trim(0);
+#endif
+
+ lc->high_watermark = lc->n_entries;
+}
diff --git a/tests/ovn-lflow-cache.at b/tests/ovn-lflow-cache.at
index e5e9ed1e8..df483c9d7 100644
--- a/tests/ovn-lflow-cache.at
+++ b/tests/ovn-lflow-cache.at
@@ -12,6 +12,8 @@ AT_CHECK(
add matches 3 | grep -v 'Mem usage (KB)'],
[0], [dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -21,6 +23,8 @@ LOOKUP:
conj_id_ofs: 1
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -30,6 +34,8 @@ LOOKUP:
conj_id_ofs: 2
type: expr
Enabled: true
+high-watermark : 2
+total : 2
cache-conj-id : 1
cache-expr : 1
cache-matches : 0
@@ -39,6 +45,8 @@ LOOKUP:
conj_id_ofs: 0
type: matches
Enabled: true
+high-watermark : 3
+total : 3
cache-conj-id : 1
cache-expr : 1
cache-matches : 1
@@ -54,6 +62,8 @@ AT_CHECK(
add-del matches 3 | grep -v 'Mem usage (KB)'],
[0], [dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -66,6 +76,8 @@ DELETE
LOOKUP:
not found
Enabled: true
+high-watermark : 1
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -78,6 +90,8 @@ DELETE
LOOKUP:
not found
Enabled: true
+high-watermark : 1
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -90,6 +104,8 @@ DELETE
LOOKUP:
not found
Enabled: true
+high-watermark : 1
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -105,6 +121,8 @@ AT_CHECK(
add matches 3 | grep -v 'Mem usage (KB)'],
[0], [dnl
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -113,6 +131,8 @@ ADD conj-id:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -121,6 +141,8 @@ ADD expr:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -129,6 +151,8 @@ ADD matches:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -153,6 +177,8 @@ AT_CHECK(
flush | grep -v 'Mem usage (KB)'],
[0], [dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -162,6 +188,8 @@ LOOKUP:
conj_id_ofs: 1
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -171,6 +199,8 @@ LOOKUP:
conj_id_ofs: 2
type: expr
Enabled: true
+high-watermark : 2
+total : 2
cache-conj-id : 1
cache-expr : 1
cache-matches : 0
@@ -180,11 +210,15 @@ LOOKUP:
conj_id_ofs: 0
type: matches
Enabled: true
+high-watermark : 3
+total : 3
cache-conj-id : 1
cache-expr : 1
cache-matches : 1
DISABLE
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -193,6 +227,8 @@ ADD conj-id:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -201,6 +237,8 @@ ADD expr:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -209,11 +247,15 @@ ADD matches:
LOOKUP:
not found
Enabled: false
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
ENABLE
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -223,6 +265,8 @@ LOOKUP:
conj_id_ofs: 7
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -232,6 +276,8 @@ LOOKUP:
conj_id_ofs: 8
type: expr
Enabled: true
+high-watermark : 2
+total : 2
cache-conj-id : 1
cache-expr : 1
cache-matches : 0
@@ -241,11 +287,15 @@ LOOKUP:
conj_id_ofs: 0
type: matches
Enabled: true
+high-watermark : 3
+total : 3
cache-conj-id : 1
cache-expr : 1
cache-matches : 1
FLUSH
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -270,6 +320,8 @@ AT_CHECK(
add matches 10 | grep -v 'Mem usage (KB)'],
[0], [dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -279,6 +331,8 @@ LOOKUP:
conj_id_ofs: 1
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -288,6 +342,8 @@ LOOKUP:
conj_id_ofs: 2
type: expr
Enabled: true
+high-watermark : 2
+total : 2
cache-conj-id : 1
cache-expr : 1
cache-matches : 0
@@ -297,6 +353,8 @@ LOOKUP:
conj_id_ofs: 0
type: matches
Enabled: true
+high-watermark : 3
+total : 3
cache-conj-id : 1
cache-expr : 1
cache-matches : 1
@@ -305,6 +363,8 @@ dnl
dnl Max capacity smaller than current usage, cache should be flushed.
dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -314,6 +374,8 @@ LOOKUP:
conj_id_ofs: 4
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -327,6 +389,8 @@ dnl Cache is full but we can evict the conj-id entry because we're adding
dnl an expr one.
dnl
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 0
cache-expr : 1
cache-matches : 0
@@ -340,6 +404,8 @@ dnl Cache is full but we can evict the expr entry because we're adding
dnl a matches one.
dnl
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 0
cache-expr : 0
cache-matches : 1
@@ -352,6 +418,8 @@ dnl Cache is full and we're adding a conj-id entry so we shouldn't evict
dnl anything else.
dnl
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 0
cache-expr : 0
cache-matches : 1
@@ -361,6 +429,8 @@ dnl Max memory usage smaller than current memory usage, cache should be
dnl flushed.
dnl
Enabled: true
+high-watermark : 0
+total : 0
cache-conj-id : 0
cache-expr : 0
cache-matches : 0
@@ -370,6 +440,8 @@ LOOKUP:
conj_id_ofs: 8
type: conj-id
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -382,6 +454,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
dnl memory limit so adding should fail.
dnl
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
@@ -394,6 +468,8 @@ dnl Cache is full and we're adding a cache entry that would go over the max
dnl memory limit so adding should fail.
dnl
Enabled: true
+high-watermark : 1
+total : 1
cache-conj-id : 1
cache-expr : 0
cache-matches : 0
--
2.27.0
Dan Williams
2021-06-04 14:34:28 UTC
Permalink
Post by Dumitru Ceara
Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
honored.  The lflow cache is one of the largest memory consumers in
ovn-controller and it used to trim memory whenever the cache was
flushed.  However, that required periodic intervention from the CMS
side.
Instead, we now automatically trim memory every time the lflow cache
utilization decreases by 50%, with a threshold of at least
LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.
[0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827
Reported-at: https://bugzilla.redhat.com/1967882
---
 controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
 tests/ovn-lflow-cache.at | 76
++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 25 deletions(-)
diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
index 56ddf1075..11935e7ae 100644
--- a/controller/lflow-cache.c
+++ b/controller/lflow-cache.c
@@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
 COVERAGE_DEFINE(lflow_cache_full);
 COVERAGE_DEFINE(lflow_cache_mem_full);
 COVERAGE_DEFINE(lflow_cache_made_room);
+COVERAGE_DEFINE(lflow_cache_trim);
 
 static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
     [LCACHE_T_CONJ_ID] = "cache-conj-id",
@@ -49,6 +50,8 @@ static const char
*lflow_cache_type_names[LCACHE_T_MAX] = {
 
 struct lflow_cache {
     struct hmap entries[LCACHE_T_MAX];
+    uint32_t n_entries;
+    uint32_t high_watermark;
     uint32_t capacity;
     uint64_t mem_usage;
     uint64_t max_mem_usage;
@@ -63,7 +66,8 @@ struct lflow_cache_entry {
     struct lflow_cache_value value;
 };
 
-static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
+#define LFLOW_CACHE_TRIM_LIMIT 10000
+
 static bool lflow_cache_make_room__(struct lflow_cache *lc,
                                     enum lflow_cache_type type);
 static struct lflow_cache_value *lflow_cache_add__(
@@ -71,18 +75,18 @@ static struct lflow_cache_value
*lflow_cache_add__(
     enum lflow_cache_type type, uint64_t value_size);
 static void lflow_cache_delete__(struct lflow_cache *lc,
                                  struct lflow_cache_entry *lce);
+static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
 
 struct lflow_cache *
 lflow_cache_create(void)
 {
-    struct lflow_cache *lc = xmalloc(sizeof *lc);
+    struct lflow_cache *lc = xzalloc(sizeof *lc);
 
     for (size_t i = 0; i < LCACHE_T_MAX; i++) {
         hmap_init(&lc->entries[i]);
     }
 
     lc->enabled = true;
-    lc->mem_usage = 0;
     return lc;
 }
 
@@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
         HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
             lflow_cache_delete__(lc, lce);
         }
-        hmap_shrink(&lc->entries[i]);
     }
-
-#if HAVE_DECL_MALLOC_TRIM
-    malloc_trim(0);
-#endif
+    lflow_cache_trim__(lc, true);
 }
 
 void
@@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool
enabled, uint32_t capacity,
     uint64_t max_mem_usage = max_mem_usage_kb * 1024;
 
     if ((lc->enabled && !enabled)
-            || capacity < lflow_cache_n_entries__(lc)
+            || capacity < lc->n_entries
             || max_mem_usage < lc->mem_usage) {
         lflow_cache_flush(lc);
     }
@@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache *lc, struct ds *output)
 
     ds_put_format(output, "Enabled: %s\n",
                   lflow_cache_is_enabled(lc) ? "true" : "false");
+    ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
+                  lc->high_watermark);
+    ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc-
Post by Dumitru Ceara
n_entries);
     for (size_t i = 0; i < LCACHE_T_MAX; i++) {
         ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
                       lflow_cache_type_names[i],
@@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc,
const struct uuid *lflow_uuid)
         COVERAGE_INC(lflow_cache_delete);
         lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct
lflow_cache_entry,
                                               value));
+        lflow_cache_trim__(lc, false);
     }
 }
 
-static size_t
-lflow_cache_n_entries__(const struct lflow_cache *lc)
-{
-    size_t n_entries = 0;
-
-    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
-        n_entries += hmap_count(&lc->entries[i]);
-    }
-    return n_entries;
-}
-
 static bool
 lflow_cache_make_room__(struct lflow_cache *lc, enum
lflow_cache_type type)
 {
@@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const
struct uuid *lflow_uuid,
         return NULL;
     }
 
-    if (lflow_cache_n_entries__(lc) == lc->capacity) {
+    if (lc->n_entries == lc->capacity) {
         if (!lflow_cache_make_room__(lc, type)) {
             COVERAGE_INC(lflow_cache_full);
             return NULL;
@@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const
struct uuid *lflow_uuid,
     lce->size = size;
     lce->value.type = type;
     hmap_insert(&lc->entries[type], &lce->node,
uuid_hash(lflow_uuid));
+    lc->n_entries++;
+    lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
     return &lce->value;
 }
 
 static void
 lflow_cache_delete__(struct lflow_cache *lc, struct
lflow_cache_entry *lce)
 {
-    if (!lce) {
-        return;
-    }
-
+    ovs_assert(lc->n_entries > 0);
     hmap_remove(&lc->entries[lce->value.type], &lce->node);
+    lc->n_entries--;
     switch (lce->value.type) {
         OVS_NOT_REACHED();
@@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc,
struct lflow_cache_entry *lce)
     lc->mem_usage -= lce->size;
     free(lce);
 }
+
+static void
+lflow_cache_trim__(struct lflow_cache *lc, bool force)
+{
+    /* Trim if we had at least 'TRIM_LIMIT' elements at some point
and if the
+     * current usage is less than half of 'high_watermark'.
+     */
+    ovs_assert(lc->high_watermark >= lc->n_entries);
+    if (!force
+            && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
+                || lc->n_entries > lc->high_watermark / 2)) {
Did you mean "lc->n_entries < lc->high_watermark / 2" here? Maybe I
mis-understood though.

Dan
Post by Dumitru Ceara
+        return;
+    }
+
+    COVERAGE_INC(lflow_cache_trim);
+    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
+        hmap_shrink(&lc->entries[i]);
+    }
+
+#if HAVE_DECL_MALLOC_TRIM
+    malloc_trim(0);
+#endif
+
+    lc->high_watermark = lc->n_entries;
+}
diff --git a/tests/ovn-lflow-cache.at b/tests/ovn-lflow-cache.at
index e5e9ed1e8..df483c9d7 100644
--- a/tests/ovn-lflow-cache.at
+++ b/tests/ovn-lflow-cache.at
@@ -12,6 +12,8 @@ AT_CHECK(
         add matches 3 | grep -v 'Mem usage (KB)'],
     [0], [dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 1
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 2
   type: expr
 Enabled: true
+high-watermark  : 2
+total           : 2
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 0
   conj_id_ofs: 0
   type: matches
 Enabled: true
+high-watermark  : 3
+total           : 3
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 1
@@ -54,6 +62,8 @@ AT_CHECK(
         add-del matches 3 | grep -v 'Mem usage (KB)'],
     [0], [dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -66,6 +76,8 @@ DELETE
   not found
 Enabled: true
+high-watermark  : 1
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -78,6 +90,8 @@ DELETE
   not found
 Enabled: true
+high-watermark  : 1
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -90,6 +104,8 @@ DELETE
   not found
 Enabled: true
+high-watermark  : 1
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -105,6 +121,8 @@ AT_CHECK(
         add matches 3 | grep -v 'Mem usage (KB)'],
     [0], [dnl
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -153,6 +177,8 @@ AT_CHECK(
         flush | grep -v 'Mem usage (KB)'],
     [0], [dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 1
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 2
   type: expr
 Enabled: true
+high-watermark  : 2
+total           : 2
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 0
   conj_id_ofs: 0
   type: matches
 Enabled: true
+high-watermark  : 3
+total           : 3
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 1
 DISABLE
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   not found
 Enabled: false
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
 ENABLE
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 7
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 8
   type: expr
 Enabled: true
+high-watermark  : 2
+total           : 2
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 0
   conj_id_ofs: 0
   type: matches
 Enabled: true
+high-watermark  : 3
+total           : 3
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 1
 FLUSH
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
@@ -270,6 +320,8 @@ AT_CHECK(
         add matches 10 | grep -v 'Mem usage (KB)'],
     [0], [dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 1
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 2
   type: expr
 Enabled: true
+high-watermark  : 2
+total           : 2
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 0
   conj_id_ofs: 0
   type: matches
 Enabled: true
+high-watermark  : 3
+total           : 3
 cache-conj-id   : 1
 cache-expr      : 1
 cache-matches   : 1
@@ -305,6 +363,8 @@ dnl
 dnl Max capacity smaller than current usage, cache should be
flushed.
 dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 4
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
@@ -327,6 +389,8 @@ dnl Cache is full but we can evict the conj-id
entry because we're adding
 dnl an expr one.
 dnl
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 0
 cache-expr      : 1
 cache-matches   : 0
@@ -340,6 +404,8 @@ dnl Cache is full but we can evict the expr entry because we're adding
 dnl a matches one.
 dnl
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 1
@@ -352,6 +418,8 @@ dnl Cache is full and we're adding a conj-id
entry so we shouldn't evict
 dnl anything else.
 dnl
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 1
@@ -361,6 +429,8 @@ dnl Max memory usage smaller than current memory usage, cache should be
 dnl flushed.
 dnl
 Enabled: true
+high-watermark  : 0
+total           : 0
 cache-conj-id   : 0
 cache-expr      : 0
 cache-matches   : 0
   conj_id_ofs: 8
   type: conj-id
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
@@ -382,6 +454,8 @@ dnl Cache is full and we're adding a cache entry
that would go over the max
 dnl memory limit so adding should fail.
 dnl
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
@@ -394,6 +468,8 @@ dnl Cache is full and we're adding a cache entry
that would go over the max
 dnl memory limit so adding should fail.
 dnl
 Enabled: true
+high-watermark  : 1
+total           : 1
 cache-conj-id   : 1
 cache-expr      : 0
 cache-matches   : 0
Dumitru Ceara
2021-06-04 14:57:04 UTC
Permalink
Post by Dan Williams
Post by Dumitru Ceara
Due to a bug in glibc [0], M_TRIM_THRESHOLD (default 128KB) is not
honored.  The lflow cache is one of the largest memory consumers in
ovn-controller and it used to trim memory whenever the cache was
flushed.  However, that required periodic intervention from the CMS
side.
Instead, we now automatically trim memory every time the lflow cache
utilization decreases by 50%, with a threshold of at least
LFLOW_CACHE_TRIM_LIMIT (10000) elements in the cache.
[0] https://sourceware.org/bugzilla/show_bug.cgi?id=14827
Reported-at: https://bugzilla.redhat.com/1967882
---
 controller/lflow-cache.c | 68 ++++++++++++++++++++++-------------
 tests/ovn-lflow-cache.at | 76
++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 25 deletions(-)
diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c
index 56ddf1075..11935e7ae 100644
--- a/controller/lflow-cache.c
+++ b/controller/lflow-cache.c
@@ -40,6 +40,7 @@ COVERAGE_DEFINE(lflow_cache_delete);
 COVERAGE_DEFINE(lflow_cache_full);
 COVERAGE_DEFINE(lflow_cache_mem_full);
 COVERAGE_DEFINE(lflow_cache_made_room);
+COVERAGE_DEFINE(lflow_cache_trim);
 
 static const char *lflow_cache_type_names[LCACHE_T_MAX] = {
     [LCACHE_T_CONJ_ID] = "cache-conj-id",
@@ -49,6 +50,8 @@ static const char
*lflow_cache_type_names[LCACHE_T_MAX] = {
 
 struct lflow_cache {
     struct hmap entries[LCACHE_T_MAX];
+    uint32_t n_entries;
+    uint32_t high_watermark;
     uint32_t capacity;
     uint64_t mem_usage;
     uint64_t max_mem_usage;
@@ -63,7 +66,8 @@ struct lflow_cache_entry {
     struct lflow_cache_value value;
 };
 
-static size_t lflow_cache_n_entries__(const struct lflow_cache *lc);
+#define LFLOW_CACHE_TRIM_LIMIT 10000
+
 static bool lflow_cache_make_room__(struct lflow_cache *lc,
                                     enum lflow_cache_type type);
 static struct lflow_cache_value *lflow_cache_add__(
@@ -71,18 +75,18 @@ static struct lflow_cache_value
*lflow_cache_add__(
     enum lflow_cache_type type, uint64_t value_size);
 static void lflow_cache_delete__(struct lflow_cache *lc,
                                  struct lflow_cache_entry *lce);
+static void lflow_cache_trim__(struct lflow_cache *lc, bool force);
 
 struct lflow_cache *
 lflow_cache_create(void)
 {
-    struct lflow_cache *lc = xmalloc(sizeof *lc);
+    struct lflow_cache *lc = xzalloc(sizeof *lc);
 
     for (size_t i = 0; i < LCACHE_T_MAX; i++) {
         hmap_init(&lc->entries[i]);
     }
 
     lc->enabled = true;
-    lc->mem_usage = 0;
     return lc;
 }
 
@@ -101,12 +105,8 @@ lflow_cache_flush(struct lflow_cache *lc)
         HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) {
             lflow_cache_delete__(lc, lce);
         }
-        hmap_shrink(&lc->entries[i]);
     }
-
-#if HAVE_DECL_MALLOC_TRIM
-    malloc_trim(0);
-#endif
+    lflow_cache_trim__(lc, true);
 }
 
 void
@@ -134,7 +134,7 @@ lflow_cache_enable(struct lflow_cache *lc, bool
enabled, uint32_t capacity,
     uint64_t max_mem_usage = max_mem_usage_kb * 1024;
 
     if ((lc->enabled && !enabled)
-            || capacity < lflow_cache_n_entries__(lc)
+            || capacity < lc->n_entries
             || max_mem_usage < lc->mem_usage) {
         lflow_cache_flush(lc);
     }
@@ -164,6 +164,9 @@ lflow_cache_get_stats(const struct lflow_cache
*lc, struct ds *output)
 
     ds_put_format(output, "Enabled: %s\n",
                   lflow_cache_is_enabled(lc) ? "true" : "false");
+    ds_put_format(output, "%-16s: %"PRIu32"\n", "high-watermark",
+                  lc->high_watermark);
+    ds_put_format(output, "%-16s: %"PRIu32"\n", "total", lc-
Post by Dumitru Ceara
n_entries);
     for (size_t i = 0; i < LCACHE_T_MAX; i++) {
         ds_put_format(output, "%-16s: %"PRIuSIZE"\n",
                       lflow_cache_type_names[i],
@@ -254,20 +257,10 @@ lflow_cache_delete(struct lflow_cache *lc,
const struct uuid *lflow_uuid)
         COVERAGE_INC(lflow_cache_delete);
         lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct
lflow_cache_entry,
                                               value));
+        lflow_cache_trim__(lc, false);
     }
 }
 
-static size_t
-lflow_cache_n_entries__(const struct lflow_cache *lc)
-{
-    size_t n_entries = 0;
-
-    for (size_t i = 0; i < LCACHE_T_MAX; i++) {
-        n_entries += hmap_count(&lc->entries[i]);
-    }
-    return n_entries;
-}
-
 static bool
 lflow_cache_make_room__(struct lflow_cache *lc, enum
lflow_cache_type type)
 {
@@ -319,7 +312,7 @@ lflow_cache_add__(struct lflow_cache *lc, const
struct uuid *lflow_uuid,
         return NULL;
     }
 
-    if (lflow_cache_n_entries__(lc) == lc->capacity) {
+    if (lc->n_entries == lc->capacity) {
         if (!lflow_cache_make_room__(lc, type)) {
             COVERAGE_INC(lflow_cache_full);
             return NULL;
@@ -336,17 +329,17 @@ lflow_cache_add__(struct lflow_cache *lc, const
struct uuid *lflow_uuid,
     lce->size = size;
     lce->value.type = type;
     hmap_insert(&lc->entries[type], &lce->node,
uuid_hash(lflow_uuid));
+    lc->n_entries++;
+    lc->high_watermark = MAX(lc->high_watermark, lc->n_entries);
     return &lce->value;
 }
 
 static void
 lflow_cache_delete__(struct lflow_cache *lc, struct
lflow_cache_entry *lce)
 {
-    if (!lce) {
-        return;
-    }
-
+    ovs_assert(lc->n_entries > 0);
     hmap_remove(&lc->entries[lce->value.type], &lce->node);
+    lc->n_entries--;
     switch (lce->value.type) {
         OVS_NOT_REACHED();
@@ -369,3 +362,28 @@ lflow_cache_delete__(struct lflow_cache *lc,
struct lflow_cache_entry *lce)
     lc->mem_usage -= lce->size;
     free(lce);
 }
+
+static void
+lflow_cache_trim__(struct lflow_cache *lc, bool force)
+{
+    /* Trim if we had at least 'TRIM_LIMIT' elements at some point
and if the
+     * current usage is less than half of 'high_watermark'.
+     */
+    ovs_assert(lc->high_watermark >= lc->n_entries);
+    if (!force
+            && (lc->high_watermark <= LFLOW_CACHE_TRIM_LIMIT
+                || lc->n_entries > lc->high_watermark / 2)) {
Did you mean "lc->n_entries < lc->high_watermark / 2" here? Maybe I
mis-understood though.
No, if 'force' is false we only want to trim if we are below 50% of the
last high watermark. Otherwise return early without trimming.
Post by Dan Williams
Dan
Thanks,
Dumitru

Loading...