Memory usage fix


#1

Heads up!

We’ve been digging around some memory usage issues for some time and we recently found and fixed some problems. There was a non-obvious memory leak in the DBChunk class, and there was an issue with the linux allocator not behaving in a manner we like - which caused memory fragmentation and much higher memory usage than needed. We’ll try to update the binaries soon. In the meantime, for those who build by hand, here’s the diff:

Index: include/array/MemArray.h
===================================================================
--- include/array/MemArray.h	(revision 6147)
+++ include/array/MemArray.h	(working copy)
@@ -415,7 +415,19 @@
 
         virtual bool isTemporary() const;
 
+        /**
+         * @see ConstChunk::isMaterialized
+         */
         bool isMaterialized() const;
+
+        /**
+         * @see ConstChunk::materialize
+         */
+        ConstChunk* materialize() const
+        {
+            return const_cast<MemChunk*> (this);
+        }
+
         bool isSparse() const;
         bool isRLE() const;
         void setSparse(bool sparse);
Index: include/array/Array.h
===================================================================
--- include/array/Array.h	(revision 6147)
+++ include/array/Array.h	(working copy)
@@ -484,17 +484,22 @@
    virtual const AttributeDesc& getAttributeDesc() const = 0;
 
    /**
-    * Count number of present (non-empty) elements in the chunk
+    * Count number of present (non-empty) elements in the chunk.
+    * Materialized subclasses that do not use the field materializedChunk might want to provide their own implementation.
+    * @return the number of non-empty elements in the chunk.
     */
    virtual size_t count() const;
 
    /**
-    * Check if count of non-empty elements in the chunk is known
+    * Check if count of non-empty elements in the chunk is known.
+    * Materialized subclasses that do not use the field materializedChunk might want to provide their own implementation.
+    * @return true if count() will run in constant time; false otherwise.
     */
    virtual bool isCountKnown() const;
 
    /**
-    * Get numer of element in the chunk
+    * Get numer of logical elements in the chunk.
+    * @return the product of the chunk sizes in all dimensions.
     */
    size_t getNumberOfElements(bool withOverlap) const;
 
@@ -542,7 +547,11 @@
     virtual boost::shared_ptr<ConstRLEEmptyBitmap> getEmptyBitmap() const;
     virtual ConstChunk const* getBitmapChunk() const;
 
-    ConstChunk* materialize() const;
+    /**
+     * Compute and place the chunk data in memory (if needed) and return a pointer to it.
+     * @return a pointer to a chunk object that is materialized; may be a pointer to this.
+     */
+    virtual ConstChunk* materialize() const;
 
     virtual void overrideTileMode(bool) {}
     
@@ -550,6 +559,10 @@
     ConstChunk();
     virtual ~ConstChunk();
     
+    /**
+     * A pointer to a materialized copy of this chunk. Deallocated on destruction. Used as part of materialize() and other routines like count().
+     * Note that not all subclasses use this field. Note also that DBChunk objects can exist indefinitely without being destroyed.
+     */
     MemChunk* materializedChunk;
     boost::shared_ptr<ConstArrayIterator> emptyIterator;
 };
Index: src/smgr/io/InternalStorage.h
===================================================================
--- src/smgr/io/InternalStorage.h	(revision 6147)
+++ src/smgr/io/InternalStorage.h	(working copy)
@@ -292,7 +292,20 @@
 
         virtual bool isSparse() const;
         virtual bool isRLE() const;
+
+        /**
+         * @see ConstChunk::isMaterialized
+         */
         virtual bool isMaterialized() const;
+
+        /**
+         * @see ConstChunk::materialize
+         */
+        ConstChunk* materialize() const
+        {
+            return const_cast<DBChunk*>(this);
+        }
+
         virtual void setSparse(bool sparse);
         virtual void setRLE(bool rle);

Index: src/system/SciDBConfigOptions.cpp
===================================================================
--- src/system/SciDBConfigOptions.cpp	(revision 6147)
+++ src/system/SciDBConfigOptions.cpp	(working copy)
@@ -155,9 +155,9 @@
                 false, false)
         (CONFIG_MAX_MEMORY_LIMIT, 0, "max-memory-limit", "MAX_MEMORY_LIMIT", "", Config::INTEGER, "Maximum amount of memory the scidb process can take up (megabytes)", -1, false)
 
-        (CONFIG_SMALL_MEMALLOC_SIZE, 0, "small-memalloc-size", "SMALL-MEMALLOC-SIZE", "", Config::INTEGER, "Maximum size of a memory allocation request which is considered small (in bytes). Larger memory allocation requests may be allocated according to a different policy.", -1, false)
+        (CONFIG_SMALL_MEMALLOC_SIZE, 0, "small-memalloc-size", "SMALL-MEMALLOC-SIZE", "", Config::INTEGER, "Maximum size of a memory allocation request which is considered small (in bytes). Larger memory allocation requests may be allocated according to a different policy.", 64 * 1024, false)
 
-        (CONFIG_LARGE_MEMALLOC_LIMIT, 0, "large-memalloc-limit", "LARGE-MEMALLOC-LIMIT", "", Config::INTEGER, "Maximum number of large  (vs. small) memory allocations. The policy for doing large memory allocations may be different from the (default) policy used for small memory allocations. This parameter limits the number of outstanding allocations performed using the (non-default) large-size allocation policy.", -1, false)
+        (CONFIG_LARGE_MEMALLOC_LIMIT, 0, "large-memalloc-limit", "LARGE-MEMALLOC-LIMIT", "", Config::INTEGER, "Maximum number of large  (vs. small) memory allocations. The policy for doing large memory allocations may be different from the (default) policy used for small memory allocations. This parameter limits the number of outstanding allocations performed using the (non-default) large-size allocation policy.", std::numeric_limits<int>::max(), false)
     
         (CONFIG_STRICT_CACHE_LIMIT, 0, "strict-cache-limit", "STRICT_CACHE_LIMIT", "", Config::BOOLEAN, "Block thread if cache is overflown", false, false)
         (CONFIG_REPART_SEQ_SCAN_THRESHOLD, 0, "repart-seq-scan-threshold", "REPART_SEQ_SCAN_THRESHOLD", "", Config::INTEGER, "Number of chunks in array cause repart to use sequential scan through source array", 1000000, false)

#2

The patch is malformed:

~/scidb-13.3.0.6147$ patch --dry-run -p0 < ../../patches/memfix.patch
patching file include/array/MemArray.h
patching file include/array/Array.h
patching file src/smgr/io/InternalStorage.h
patch: **** malformed patch at line 103: Index: src/system/SciDBConfigOptions.cpp

You need to add a newline before line 103, as such:

         virtual void setRLE(bool rle);
Index: src/system/SciDBConfigOptions.cpp

becomes

         virtual void setRLE(bool rle);

Index: src/system/SciDBConfigOptions.cpp

Then patch stops complaining:

~/scidb-13.3.0.6147$ patch --dry-run -p0 < ../../patches/memfix2.patch
patching file include/array/MemArray.h
patching file include/array/Array.h
patching file src/smgr/io/InternalStorage.h
patching file src/system/SciDBConfigOptions.cpp

N.B. don’t forget to remove the ‘–dry-run’


#3

Thanks a lot, Douglas!

I adjusted the post above as you said. This was actually done as a series of fixes on a moving target and I made a mistake when I put the diffs together into a single patch. Thanks for checking and catching it!


#4