Move private settings and configuration into the C source

Include the board config first to be able to configure the memory management in the documented way. If not used, the defaults are used. On the other hand, there is no need to pollute the other source files with these local management settings. So, move them from the header into the C source file. Signed-off-by: Juergen Beisert <jbe@pengutronix.de>
2009-12-10 13:09:02 +01:00 · 2009-12-10 13:09:02 +01:00 · 6f1327ad3f
parent a8abb313cd
commit 6f1327ad3f
2 changed files with 459 additions and 477 deletions
--- a/common/dlmalloc.c
+++ b/common/dlmalloc.c
@ -1,10 +1,467 @@

-#include <malloc.h>
-
 #include <config.h>
+#include <malloc.h>
+
 #include <stdio.h>
 #include <module.h>

+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+    M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+      to keep before releasing via malloc_trim in free().
+
+      Automatic trimming is mainly useful in long-lived programs.
+      Because trimming via sbrk can be slow on some systems, and can
+      sometimes be wasteful (in cases where programs immediately
+      afterward allocate more large chunks) the value should be high
+      enough so that your overall system performance would improve by
+      releasing.
+
+      The trim threshold and the mmap control parameters (see below)
+      can be traded off with one another. Trimming and mmapping are
+      two different ways of releasing unused memory back to the
+      system. Between these two, it is often possible to keep
+      system-level demands of a long-lived program down to a bare
+      minimum. For example, in one test suite of sessions measuring
+      the XF86 X server on Linux, using a trim threshold of 128K and a
+      mmap threshold of 192K led to near-minimal long term resource
+      consumption.
+
+      If you are using this malloc in a long-lived program, it should
+      pay to experiment with these values.  As a rough guide, you
+      might set to a value close to the average size of a process
+      (program) running on your system.  Releasing this much memory
+      would allow such a process to run in memory.  Generally, it's
+      worth it to tune for trimming rather tham memory mapping when a
+      program undergoes phases where several large chunks are
+      allocated and released in ways that can reuse each other's
+      storage, perhaps mixed with phases where there are no such
+      chunks at all.  And in well-behaved long-lived programs,
+      controlling release of large blocks via trimming versus mapping
+      is usually faster.
+
+      However, in most programs, these parameters serve mainly as
+      protection against the system-level effects of carrying around
+      massive amounts of unneeded memory. Since frequent calls to
+      sbrk, mmap, and munmap otherwise degrade performance, the default
+      parameters are set to relatively high values that serve only as
+      safeguards.
+
+      The default trim value is high enough to cause trimming only in
+      fairly extreme (by current memory consumption standards) cases.
+      It must be greater than page size to have any useful effect.  To
+      disable trimming completely, you can set to (unsigned long)(-1);
+
+
+*/
+
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+    M_TOP_PAD is the amount of extra `padding' space to allocate or
+      retain whenever sbrk is called. It is used in two ways internally:
+
+      * When sbrk is called to extend the top of the arena to satisfy
+	a new malloc request, this much padding is added to the sbrk
+	request.
+
+      * When malloc_trim is called automatically from free(),
+	it is used as the `pad' argument.
+
+      In both cases, the actual amount of padding is rounded
+      so that the end of the arena is always a system page boundary.
+
+      The main reason for using padding is to avoid calling sbrk so
+      often. Having even a small pad greatly reduces the likelihood
+      that nearly every malloc request during program start-up (or
+      after trimming) will invoke sbrk, which needlessly wastes
+      time.
+
+      Automatic rounding-up to page-size units is normally sufficient
+      to avoid measurable overhead, so the default is 0.  However, in
+      systems where sbrk is relatively slow, it can pay to increase
+      this value, at the expense of carrying around more memory than
+      the program needs.
+
+*/
+
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (128 * 1024)
+#endif
+
+/*
+
+    M_MMAP_THRESHOLD is the request size threshold for using mmap()
+      to service a request. Requests of at least this size that cannot
+      be allocated using already-existing space will be serviced via mmap.
+      (If enough normal freed space already exists it is used instead.)
+
+      Using mmap segregates relatively large chunks of memory so that
+      they can be individually obtained and released from the host
+      system. A request serviced through mmap is never reused by any
+      other request (at least not directly; the system may just so
+      happen to remap successive requests to the same locations).
+
+      Segregating space in this way has the benefit that mmapped space
+      can ALWAYS be individually released back to the system, which
+      helps keep the system level memory demands of a long-lived
+      program low. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      menas that even trimming via malloc_trim would not release them.
+
+      However, it has the disadvantages that:
+
+	 1. The space cannot be reclaimed, consolidated, and then
+	    used to service later requests, as happens with normal chunks.
+	 2. It can lead to more wastage because of mmap page alignment
+	    requirements
+	 3. It causes malloc performance to be more dependent on host
+	    system memory management support routines which may vary in
+	    implementation quality and may impose arbitrary
+	    limitations. Generally, servicing a request via normal
+	    malloc steps is faster than going through a system's mmap.
+
+      All together, these considerations should lead you to use mmap
+      only for relatively large requests.
+
+
+*/
+
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (0)
+#endif
+
+/*
+    M_MMAP_MAX is the maximum number of requests to simultaneously
+      service using mmap. This parameter exists because:
+
+	 1. Some systems have a limited number of internal tables for
+	    use by mmap.
+	 2. In most systems, overreliance on mmap can degrade overall
+	    performance.
+	 3. If a program allocates many large regions, it is probably
+	    better off using normal sbrk-based allocation routines that
+	    can reclaim and reallocate normal heap memory. Using a
+	    small value allows transition into this mode after the
+	    first few allocations.
+
+      Setting to 0 disables all use of mmap.  If HAVE_MMAP is not set,
+      the default value is 0, and attempts to set it to non-zero values
+      in mallopt will fail.
+*/
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes. On a 64-bit machine, you can reduce malloc
+  overhead by defining INTERNAL_SIZE_T to be a 32 bit `unsigned int'
+  at the expense of not being able to handle requests greater than
+  2^31. This limitation is hardly ever a concern; you are encouraged
+  to set this. However, the default version is the same as size_t.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  Some people think it should. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+
+/*   #define REALLOC_ZERO_BYTES_FREES */
+
+/*
+    Debugging:
+
+    Because freed chunks may be overwritten with link fields, this
+    malloc will often die when freed memory is overwritten by user
+    programs.  This can be very effective (albeit in an annoying way)
+    in helping track down dangling pointers.
+
+    If you compile with -DDEBUG, a number of assertion checks are
+    enabled that will catch more memory errors. You probably won't be
+    able to make much sense of the actual assertion errors, but they
+    should help you locate incorrectly overwritten memory.  The
+    checking is fairly extensive, and will slow down execution
+    noticeably. Calling malloc_stats or mallinfo with DEBUG set will
+    attempt to check every non-mmapped allocated and free chunk in the
+    course of computing the summmaries. (By nature, mmapped regions
+    cannot be checked very much automatically.)
+
+    Setting DEBUG may also be helpful if you are trying to modify
+    this code. The assertions in the check routines spell out in more
+    detail the assumptions and invariants underlying the algorithms.
+
+*/
+
+#ifdef DEBUG
+/* #include <assert.h> */
+#define assert(x) ((void)0)
+#else
+#define assert(x) ((void)0)
+#endif
+
+/*
+  HAVE_MEMCPY should be defined if you are not otherwise using
+  ANSI STD C, but still have memcpy and memset in your C library
+  and want to use them in calloc and realloc. Otherwise simple
+  macro versions are defined here.
+
+  USE_MEMCPY should be defined as 1 if you actually want to
+  have memset and memcpy called. People report that the macro
+  versions are often enough faster than libc versions on many
+  systems that it is better to use them.
+
+*/
+
+#define HAVE_MEMCPY
+#define USE_MEMCPY 1
+
+#if (__STD_C || defined(HAVE_MEMCPY))
+
+#if __STD_C
+void* memset(void*, int, size_t);
+void* memcpy(void*, const void*, size_t);
+#else
+Void_t* memset();
+Void_t* memcpy();
+#endif
+#endif
+
+#if USE_MEMCPY
+
+/* The following macros are only invoked with (2n+1)-multiples of
+   INTERNAL_SIZE_T units, with a positive integer n. This is exploited
+   for fast inline execution when n is small. */
+
+#define MALLOC_ZERO(charp, nbytes)                                            \
+do {                                                                          \
+  INTERNAL_SIZE_T mzsz = (nbytes);                                            \
+  if(mzsz <= 9*sizeof(mzsz)) {                                                \
+    INTERNAL_SIZE_T* mz = (INTERNAL_SIZE_T*) (charp);                         \
+    if(mzsz >= 5*sizeof(mzsz)) {     *mz++ = 0;                               \
+				     *mz++ = 0;                               \
+      if(mzsz >= 7*sizeof(mzsz)) {   *mz++ = 0;                               \
+				     *mz++ = 0;                               \
+	if(mzsz >= 9*sizeof(mzsz)) { *mz++ = 0;                               \
+				     *mz++ = 0; }}}                           \
+				     *mz++ = 0;                               \
+				     *mz++ = 0;                               \
+				     *mz   = 0;                               \
+  } else memset((charp), 0, mzsz);                                            \
+} while(0)
+
+#define MALLOC_COPY(dest,src,nbytes)                                          \
+do {                                                                          \
+  INTERNAL_SIZE_T mcsz = (nbytes);                                            \
+  if(mcsz <= 9*sizeof(mcsz)) {                                                \
+    INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) (src);                        \
+    INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) (dest);                       \
+    if(mcsz >= 5*sizeof(mcsz)) {     *mcdst++ = *mcsrc++;                     \
+				     *mcdst++ = *mcsrc++;                     \
+      if(mcsz >= 7*sizeof(mcsz)) {   *mcdst++ = *mcsrc++;                     \
+				     *mcdst++ = *mcsrc++;                     \
+	if(mcsz >= 9*sizeof(mcsz)) { *mcdst++ = *mcsrc++;                     \
+				     *mcdst++ = *mcsrc++; }}}                 \
+				     *mcdst++ = *mcsrc++;                     \
+				     *mcdst++ = *mcsrc++;                     \
+				     *mcdst   = *mcsrc  ;                     \
+  } else memcpy(dest, src, mcsz);                                             \
+} while(0)
+
+#else /* !USE_MEMCPY */
+
+/* Use Duff's device for good zeroing/copying performance. */
+
+#define MALLOC_ZERO(charp, nbytes)                                            \
+do {                                                                          \
+  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
+  long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn;                         \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mzp++ = 0;                                             \
+    case 7:           *mzp++ = 0;                                             \
+    case 6:           *mzp++ = 0;                                             \
+    case 5:           *mzp++ = 0;                                             \
+    case 4:           *mzp++ = 0;                                             \
+    case 3:           *mzp++ = 0;                                             \
+    case 2:           *mzp++ = 0;                                             \
+    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
+  }                                                                           \
+} while(0)
+
+#define MALLOC_COPY(dest,src,nbytes)                                          \
+do {                                                                          \
+  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
+  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
+  long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn;                         \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
+    case 7:           *mcdst++ = *mcsrc++;                                    \
+    case 6:           *mcdst++ = *mcsrc++;                                    \
+    case 5:           *mcdst++ = *mcsrc++;                                    \
+    case 4:           *mcdst++ = *mcsrc++;                                    \
+    case 3:           *mcdst++ = *mcsrc++;                                    \
+    case 2:           *mcdst++ = *mcsrc++;                                    \
+    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
+  }                                                                           \
+} while(0)
+
+#endif
+
+/*
+
+  Special defines for linux libc
+
+  Except when compiled using these special defines for Linux libc
+  using weak aliases, this malloc is NOT designed to work in
+  multithreaded applications.  No semaphores or other concurrency
+  control are provided to ensure that multiple malloc or free calls
+  don't run at the same time, which could be disasterous. A single
+  semaphore could be used across malloc, realloc, and free (which is
+  essentially the effect of the linux weak alias approach). It would
+  be hard to obtain finer granularity.
+
+*/
+
+
+#ifdef INTERNAL_LINUX_C_LIB
+
+#if __STD_C
+
+Void_t * __default_morecore_init (ptrdiff_t);
+Void_t *(*__morecore)(ptrdiff_t) = __default_morecore_init;
+
+#else
+
+Void_t * __default_morecore_init ();
+Void_t *(*__morecore)() = __default_morecore_init;
+
+#endif
+
+#define MORECORE (*__morecore)
+#define MORECORE_FAILURE 0
+#define MORECORE_CLEARS 1
+
+#else /* INTERNAL_LINUX_C_LIB */
+
+#if __STD_C
+extern Void_t*     sbrk(ptrdiff_t);
+#else
+extern Void_t*     sbrk();
+#endif
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE -1
+#endif
+
+#ifndef MORECORE_CLEARS
+#define MORECORE_CLEARS 1
+#endif
+
+#endif /* INTERNAL_LINUX_C_LIB */
+
+/*
+  Define HAVE_MMAP to optionally make malloc() use mmap() to
+  allocate very large blocks.  These will be returned to the
+  operating system immediately after a free().
+*/
+
+#define HAVE_MMAP 0	/* Not available for U-Boot */
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.  This is currently only possible on Linux with
+  kernel versions newer than 1.3.77.
+*/
+
+#undef	HAVE_MREMAP	/* Not available for U-Boot */
+
+/*
+
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing the same kind of
+  information you can get from malloc_stats. It should work on
+  any SVID/XPG compliant system that has a /usr/include/malloc.h
+  defining struct mallinfo. (If you'd like to install such a thing
+  yourself, cut out the preliminary declarations as described above
+  and below and save them in a malloc.h file. But there's no
+  compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields, most of which are not even meaningful in this
+  version of malloc. Some of these fields are are instead filled by
+  mallinfo() with other numbers that might possibly be of interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
+  version is declared below.  These must be precisely the same for
+  mallinfo() to work.
+
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+
+/* SVID2/XPG mallinfo structure */
+
+struct mallinfo {
+  int arena;    /* total space allocated from system */
+  int ordblks;  /* number of non-inuse chunks */
+  int smblks;   /* unused -- always zero */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* total space in mmapped regions */
+  int usmblks;  /* unused -- always zero */
+  int fsmblks;  /* unused -- always zero */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total non-inuse space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+
+/* SVID2/XPG mallopt options */
+
+#define M_MXFAST  1    /* UNUSED in this malloc */
+#define M_NLBLKS  2    /* UNUSED in this malloc */
+#define M_GRAIN   3    /* UNUSED in this malloc */
+#define M_KEEP    4    /* UNUSED in this malloc */
+
+
+/* mallopt options that actually do something */
+
+#define M_TRIM_THRESHOLD    -1
+#define M_TOP_PAD           -2
+#define M_MMAP_THRESHOLD    -3
+#define M_MMAP_MAX          -4
+
+/*
+  Access to system page size. To the extent possible, this malloc
+  manages memory from the system in page-size units.
+
+  The following mechanics for getpagesize were adapted from
+  bsd/gnu getpagesize.h
+*/
+
+#define	malloc_getpagesize	4096
+
+
 /*
  Emulation of sbrk for WIN32
  All code within the ifdef WIN32 is untested by me.
--- a/include/malloc.h
+++ b/include/malloc.h
@ -259,481 +259,6 @@ extern "C" {
 */


-/*
-    Debugging:
-
-    Because freed chunks may be overwritten with link fields, this
-    malloc will often die when freed memory is overwritten by user
-    programs.  This can be very effective (albeit in an annoying way)
-    in helping track down dangling pointers.
-
-    If you compile with -DDEBUG, a number of assertion checks are
-    enabled that will catch more memory errors. You probably won't be
-    able to make much sense of the actual assertion errors, but they
-    should help you locate incorrectly overwritten memory.  The
-    checking is fairly extensive, and will slow down execution
-    noticeably. Calling malloc_stats or mallinfo with DEBUG set will
-    attempt to check every non-mmapped allocated and free chunk in the
-    course of computing the summmaries. (By nature, mmapped regions
-    cannot be checked very much automatically.)
-
-    Setting DEBUG may also be helpful if you are trying to modify
-    this code. The assertions in the check routines spell out in more
-    detail the assumptions and invariants underlying the algorithms.
-
-*/
-
-#ifdef DEBUG
-/* #include <assert.h> */
-#define assert(x) ((void)0)
-#else
-#define assert(x) ((void)0)
-#endif
-
-
-/*
-  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
-  of chunk sizes. On a 64-bit machine, you can reduce malloc
-  overhead by defining INTERNAL_SIZE_T to be a 32 bit `unsigned int'
-  at the expense of not being able to handle requests greater than
-  2^31. This limitation is hardly ever a concern; you are encouraged
-  to set this. However, the default version is the same as size_t.
-*/
-
-#ifndef INTERNAL_SIZE_T
-#define INTERNAL_SIZE_T size_t
-#endif
-
-/*
-  REALLOC_ZERO_BYTES_FREES should be set if a call to
-  realloc with zero bytes should be the same as a call to free.
-  Some people think it should. Otherwise, since this malloc
-  returns a unique pointer for malloc(0), so does realloc(p, 0).
-*/
-
-
-/*   #define REALLOC_ZERO_BYTES_FREES */
-
-
-/*
-  HAVE_MEMCPY should be defined if you are not otherwise using
-  ANSI STD C, but still have memcpy and memset in your C library
-  and want to use them in calloc and realloc. Otherwise simple
-  macro versions are defined here.
-
-  USE_MEMCPY should be defined as 1 if you actually want to
-  have memset and memcpy called. People report that the macro
-  versions are often enough faster than libc versions on many
-  systems that it is better to use them.
-
-*/
-
-#define HAVE_MEMCPY
-#define USE_MEMCPY 1
-
-#if (__STD_C || defined(HAVE_MEMCPY))
-
-#if __STD_C
-void* memset(void*, int, size_t);
-void* memcpy(void*, const void*, size_t);
-#else
-Void_t* memset();
-Void_t* memcpy();
-#endif
-#endif
-
-#if USE_MEMCPY
-
-/* The following macros are only invoked with (2n+1)-multiples of
-   INTERNAL_SIZE_T units, with a positive integer n. This is exploited
-   for fast inline execution when n is small. */
-
-#define MALLOC_ZERO(charp, nbytes)                                            \
-do {                                                                          \
-  INTERNAL_SIZE_T mzsz = (nbytes);                                            \
-  if(mzsz <= 9*sizeof(mzsz)) {                                                \
-    INTERNAL_SIZE_T* mz = (INTERNAL_SIZE_T*) (charp);                         \
-    if(mzsz >= 5*sizeof(mzsz)) {     *mz++ = 0;                               \
-				     *mz++ = 0;                               \
-      if(mzsz >= 7*sizeof(mzsz)) {   *mz++ = 0;                               \
-				     *mz++ = 0;                               \
-	if(mzsz >= 9*sizeof(mzsz)) { *mz++ = 0;                               \
-				     *mz++ = 0; }}}                           \
-				     *mz++ = 0;                               \
-				     *mz++ = 0;                               \
-				     *mz   = 0;                               \
-  } else memset((charp), 0, mzsz);                                            \
-} while(0)
-
-#define MALLOC_COPY(dest,src,nbytes)                                          \
-do {                                                                          \
-  INTERNAL_SIZE_T mcsz = (nbytes);                                            \
-  if(mcsz <= 9*sizeof(mcsz)) {                                                \
-    INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) (src);                        \
-    INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) (dest);                       \
-    if(mcsz >= 5*sizeof(mcsz)) {     *mcdst++ = *mcsrc++;                     \
-				     *mcdst++ = *mcsrc++;                     \
-      if(mcsz >= 7*sizeof(mcsz)) {   *mcdst++ = *mcsrc++;                     \
-				     *mcdst++ = *mcsrc++;                     \
-	if(mcsz >= 9*sizeof(mcsz)) { *mcdst++ = *mcsrc++;                     \
-				     *mcdst++ = *mcsrc++; }}}                 \
-				     *mcdst++ = *mcsrc++;                     \
-				     *mcdst++ = *mcsrc++;                     \
-				     *mcdst   = *mcsrc  ;                     \
-  } else memcpy(dest, src, mcsz);                                             \
-} while(0)
-
-#else /* !USE_MEMCPY */
-
-/* Use Duff's device for good zeroing/copying performance. */
-
-#define MALLOC_ZERO(charp, nbytes)                                            \
-do {                                                                          \
-  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
-  long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn;                         \
-  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
-  switch (mctmp) {                                                            \
-    case 0: for(;;) { *mzp++ = 0;                                             \
-    case 7:           *mzp++ = 0;                                             \
-    case 6:           *mzp++ = 0;                                             \
-    case 5:           *mzp++ = 0;                                             \
-    case 4:           *mzp++ = 0;                                             \
-    case 3:           *mzp++ = 0;                                             \
-    case 2:           *mzp++ = 0;                                             \
-    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
-  }                                                                           \
-} while(0)
-
-#define MALLOC_COPY(dest,src,nbytes)                                          \
-do {                                                                          \
-  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
-  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
-  long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn;                         \
-  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
-  switch (mctmp) {                                                            \
-    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
-    case 7:           *mcdst++ = *mcsrc++;                                    \
-    case 6:           *mcdst++ = *mcsrc++;                                    \
-    case 5:           *mcdst++ = *mcsrc++;                                    \
-    case 4:           *mcdst++ = *mcsrc++;                                    \
-    case 3:           *mcdst++ = *mcsrc++;                                    \
-    case 2:           *mcdst++ = *mcsrc++;                                    \
-    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
-  }                                                                           \
-} while(0)
-
-#endif
-
-
-/*
-  Define HAVE_MMAP to optionally make malloc() use mmap() to
-  allocate very large blocks.  These will be returned to the
-  operating system immediately after a free().
-*/
-
-/***
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif
-***/
-#define	HAVE_MMAP 0	/* Not available for U-Boot */
-
-/*
-  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
-  large blocks.  This is currently only possible on Linux with
-  kernel versions newer than 1.3.77.
-*/
-
-/***
-#ifndef HAVE_MREMAP
-#ifdef INTERNAL_LINUX_C_LIB
-#define HAVE_MREMAP 1
-#else
-#define HAVE_MREMAP 0
-#endif
-#endif
-***/
-#undef	HAVE_MREMAP	/* Not available for U-Boot */
-
-
-/*
-  Access to system page size. To the extent possible, this malloc
-  manages memory from the system in page-size units.
-
-  The following mechanics for getpagesize were adapted from
-  bsd/gnu getpagesize.h
-*/
-
-#define	malloc_getpagesize	4096
-
-/*
-
-  This version of malloc supports the standard SVID/XPG mallinfo
-  routine that returns a struct containing the same kind of
-  information you can get from malloc_stats. It should work on
-  any SVID/XPG compliant system that has a /usr/include/malloc.h
-  defining struct mallinfo. (If you'd like to install such a thing
-  yourself, cut out the preliminary declarations as described above
-  and below and save them in a malloc.h file. But there's no
-  compelling reason to bother to do this.)
-
-  The main declaration needed is the mallinfo struct that is returned
-  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
-  bunch of fields, most of which are not even meaningful in this
-  version of malloc. Some of these fields are are instead filled by
-  mallinfo() with other numbers that might possibly be of interest.
-
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
-  /usr/include/malloc.h file that includes a declaration of struct
-  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
-  version is declared below.  These must be precisely the same for
-  mallinfo() to work.
-
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-
-/* SVID2/XPG mallinfo structure */
-
-struct mallinfo {
-  int arena;    /* total space allocated from system */
-  int ordblks;  /* number of non-inuse chunks */
-  int smblks;   /* unused -- always zero */
-  int hblks;    /* number of mmapped regions */
-  int hblkhd;   /* total space in mmapped regions */
-  int usmblks;  /* unused -- always zero */
-  int fsmblks;  /* unused -- always zero */
-  int uordblks; /* total allocated space */
-  int fordblks; /* total non-inuse space */
-  int keepcost; /* top-most, releasable (via malloc_trim) space */
-};
-
-/* SVID2/XPG mallopt options */
-
-#define M_MXFAST  1    /* UNUSED in this malloc */
-#define M_NLBLKS  2    /* UNUSED in this malloc */
-#define M_GRAIN   3    /* UNUSED in this malloc */
-#define M_KEEP    4    /* UNUSED in this malloc */
-
-
-/* mallopt options that actually do something */
-
-#define M_TRIM_THRESHOLD    -1
-#define M_TOP_PAD           -2
-#define M_MMAP_THRESHOLD    -3
-#define M_MMAP_MAX          -4
-
-
-#ifndef DEFAULT_TRIM_THRESHOLD
-#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
-#endif
-
-/*
-    M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
-      to keep before releasing via malloc_trim in free().
-
-      Automatic trimming is mainly useful in long-lived programs.
-      Because trimming via sbrk can be slow on some systems, and can
-      sometimes be wasteful (in cases where programs immediately
-      afterward allocate more large chunks) the value should be high
-      enough so that your overall system performance would improve by
-      releasing.
-
-      The trim threshold and the mmap control parameters (see below)
-      can be traded off with one another. Trimming and mmapping are
-      two different ways of releasing unused memory back to the
-      system. Between these two, it is often possible to keep
-      system-level demands of a long-lived program down to a bare
-      minimum. For example, in one test suite of sessions measuring
-      the XF86 X server on Linux, using a trim threshold of 128K and a
-      mmap threshold of 192K led to near-minimal long term resource
-      consumption.
-
-      If you are using this malloc in a long-lived program, it should
-      pay to experiment with these values.  As a rough guide, you
-      might set to a value close to the average size of a process
-      (program) running on your system.  Releasing this much memory
-      would allow such a process to run in memory.  Generally, it's
-      worth it to tune for trimming rather tham memory mapping when a
-      program undergoes phases where several large chunks are
-      allocated and released in ways that can reuse each other's
-      storage, perhaps mixed with phases where there are no such
-      chunks at all.  And in well-behaved long-lived programs,
-      controlling release of large blocks via trimming versus mapping
-      is usually faster.
-
-      However, in most programs, these parameters serve mainly as
-      protection against the system-level effects of carrying around
-      massive amounts of unneeded memory. Since frequent calls to
-      sbrk, mmap, and munmap otherwise degrade performance, the default
-      parameters are set to relatively high values that serve only as
-      safeguards.
-
-      The default trim value is high enough to cause trimming only in
-      fairly extreme (by current memory consumption standards) cases.
-      It must be greater than page size to have any useful effect.  To
-      disable trimming completely, you can set to (unsigned long)(-1);
-
-
-*/
-
-
-#ifndef DEFAULT_TOP_PAD
-#define DEFAULT_TOP_PAD        (0)
-#endif
-
-/*
-    M_TOP_PAD is the amount of extra `padding' space to allocate or
-      retain whenever sbrk is called. It is used in two ways internally:
-
-      * When sbrk is called to extend the top of the arena to satisfy
-	a new malloc request, this much padding is added to the sbrk
-	request.
-
-      * When malloc_trim is called automatically from free(),
-	it is used as the `pad' argument.
-
-      In both cases, the actual amount of padding is rounded
-      so that the end of the arena is always a system page boundary.
-
-      The main reason for using padding is to avoid calling sbrk so
-      often. Having even a small pad greatly reduces the likelihood
-      that nearly every malloc request during program start-up (or
-      after trimming) will invoke sbrk, which needlessly wastes
-      time.
-
-      Automatic rounding-up to page-size units is normally sufficient
-      to avoid measurable overhead, so the default is 0.  However, in
-      systems where sbrk is relatively slow, it can pay to increase
-      this value, at the expense of carrying around more memory than
-      the program needs.
-
-*/
-
-
-#ifndef DEFAULT_MMAP_THRESHOLD
-#define DEFAULT_MMAP_THRESHOLD (128 * 1024)
-#endif
-
-/*
-
-    M_MMAP_THRESHOLD is the request size threshold for using mmap()
-      to service a request. Requests of at least this size that cannot
-      be allocated using already-existing space will be serviced via mmap.
-      (If enough normal freed space already exists it is used instead.)
-
-      Using mmap segregates relatively large chunks of memory so that
-      they can be individually obtained and released from the host
-      system. A request serviced through mmap is never reused by any
-      other request (at least not directly; the system may just so
-      happen to remap successive requests to the same locations).
-
-      Segregating space in this way has the benefit that mmapped space
-      can ALWAYS be individually released back to the system, which
-      helps keep the system level memory demands of a long-lived
-      program low. Mapped memory can never become `locked' between
-      other chunks, as can happen with normally allocated chunks, which
-      menas that even trimming via malloc_trim would not release them.
-
-      However, it has the disadvantages that:
-
-	 1. The space cannot be reclaimed, consolidated, and then
-	    used to service later requests, as happens with normal chunks.
-	 2. It can lead to more wastage because of mmap page alignment
-	    requirements
-	 3. It causes malloc performance to be more dependent on host
-	    system memory management support routines which may vary in
-	    implementation quality and may impose arbitrary
-	    limitations. Generally, servicing a request via normal
-	    malloc steps is faster than going through a system's mmap.
-
-      All together, these considerations should lead you to use mmap
-      only for relatively large requests.
-
-
-*/
-
-
-#ifndef DEFAULT_MMAP_MAX
-#define DEFAULT_MMAP_MAX       (0)
-#endif
-
-/*
-    M_MMAP_MAX is the maximum number of requests to simultaneously
-      service using mmap. This parameter exists because:
-
-	 1. Some systems have a limited number of internal tables for
-	    use by mmap.
-	 2. In most systems, overreliance on mmap can degrade overall
-	    performance.
-	 3. If a program allocates many large regions, it is probably
-	    better off using normal sbrk-based allocation routines that
-	    can reclaim and reallocate normal heap memory. Using a
-	    small value allows transition into this mode after the
-	    first few allocations.
-
-      Setting to 0 disables all use of mmap.  If HAVE_MMAP is not set,
-      the default value is 0, and attempts to set it to non-zero values
-      in mallopt will fail.
-*/
-
-
-/*
-
-  Special defines for linux libc
-
-  Except when compiled using these special defines for Linux libc
-  using weak aliases, this malloc is NOT designed to work in
-  multithreaded applications.  No semaphores or other concurrency
-  control are provided to ensure that multiple malloc or free calls
-  don't run at the same time, which could be disasterous. A single
-  semaphore could be used across malloc, realloc, and free (which is
-  essentially the effect of the linux weak alias approach). It would
-  be hard to obtain finer granularity.
-
-*/
-
-
-#ifdef INTERNAL_LINUX_C_LIB
-
-#if __STD_C
-
-Void_t * __default_morecore_init (ptrdiff_t);
-Void_t *(*__morecore)(ptrdiff_t) = __default_morecore_init;
-
-#else
-
-Void_t * __default_morecore_init ();
-Void_t *(*__morecore)() = __default_morecore_init;
-
-#endif
-
-#define MORECORE (*__morecore)
-#define MORECORE_FAILURE 0
-#define MORECORE_CLEARS 1
-
-#else /* INTERNAL_LINUX_C_LIB */
-
-#if __STD_C
-extern Void_t*     sbrk(ptrdiff_t);
-#else
-extern Void_t*     sbrk();
-#endif
-
-#ifndef MORECORE
-#define MORECORE sbrk
-#endif
-
-#ifndef MORECORE_FAILURE
-#define MORECORE_FAILURE -1
-#endif
-
-#ifndef MORECORE_CLEARS
-#define MORECORE_CLEARS 1
-#endif
-
-#endif /* INTERNAL_LINUX_C_LIB */

 #if defined(INTERNAL_LINUX_C_LIB) && defined(__ELF__)