目录
  1. 1. __libc_malloc
    1. 1.1. _int_malloc
      1. 1.1.1. fastbin范围内
      2. 1.1.2. smallbin范围
      3. 1.1.3. malloc_consolidate
      4. 1.1.4. 循环,处理unsorted_bin
      5. 1.1.5. 如果在largebin范围
      6. 1.1.6. 寻找binmap中是否有足够大的chunk
      7. 1.1.7. 查看top chunk
      8. 1.1.8. 调用sysmalloc
  2. 2. 涉及函数列表
    1. 2.1. alloc_perturb
    2. 2.2. assert()
    3. 2.3. bin_at
    4. 2.4. catomic_compare_and_exchange_val_acq()
    5. 2.5. check_malloced_chunk
    6. 2.6. check_remalloced_chunk ()
    7. 2.7. chunk2mem
    8. 2.8. chunksize
    9. 2.9. last()
    10. 2.10. largebin_index
    11. 2.11. fastbin_index(sz)
    12. 2.12. fastbin(ar_ptr, idx)
    13. 2.13. first()
    14. 2.14. set_inuse_bit_at_offset
    15. 2.15. set_non_main_arena(
    16. 2.16. smallbin_index
    17. 2.17. unlink
    18. 2.18. __glibc_likely
  3. 3. 涉及变量
    1. 3.1. SINGLE_THREAD_P
  4. 4. 相关结构
    1. 4.1. malloc_chunk
    2. 4.2. malloc_state
  5. 5. 流程概述
  6. 6. 图示意
malloc流程

以下源码均来自libc2.27版本

__libc_malloc

可以看出申请内存的工作进一步给到了_int_malloc函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim; //空指针,返回的malloc地址

//判断是否有自定义堆分配函数,即__malloc_hook是否为空
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0)) //如果不为空,则执行hook函数并返回。
return (*hook)(bytes, RETURN_ADDRESS (0));

//如果使用TCACHE,执行下面代码,这里先不讨论(tcache是libc2.26之后引进的一种新机制,类似于fastbin)
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);

MAYBE_INIT_TCACHE ();

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL)
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif
//到这里继续看

if (SINGLE_THREAD_P) //如果是单线程的,默认从main_arena申请内存
{
victim = _int_malloc (&main_arena, bytes);
//assert()其作用是如果它的条件返回错误,则终止程序执行
//这里要满足1.申请到内存 2.是mmap的内存 3.申请到的内存必须在其所分配的arena中
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
}

arena_get (ar_ptr, bytes); // 获取或创建arena 并且会将arena 加锁(mutex 互斥锁)处理。

victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena before.
只有当我们能够找到一个可用的 arena 时,才可以用另一个 arena 重试*/
if (!victim && ar_ptr != NULL) //如果没有分配到内存,且有可用的arena
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}

if (ar_ptr != NULL) //申请到了arena,在退出前需要解锁。
__libc_lock_unlock (ar_ptr->mutex);

assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}

_int_malloc

这里是源码

之后将源码拆分分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
static void *
_int_malloc (mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */

mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */

mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */

unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */

mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */

#if USE_TCACHE
size_t tcache_unsorted_count; /* count of unsorted chunks processed */
#endif

/*
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
aligned.
*/

checked_request2size (bytes, nb); //字节对齐,将申请字节大小转化为内部chunk大小
//nb就是转化为chunk的大小

/* There are no usable arenas. Fall back to sysmalloc to get a chunk from
mmap. */
//没有可用的 arena。用sysmalloc以从mmap获得chunk.
if (__glibc_unlikely (av == NULL))
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}

/*
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
*/
//---------------------------------------------------
//---------------------------------------------------
//宏定义一个函数 REMOVE_FB
#define REMOVE_FB(fb, victim, pp) \
do \
{ \
victim = pp; \
if (victim == NULL) \
break; \
} \
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
!= victim); \
//如果*fd=victim,则*fd=victim->fd,返回victim,即*fd!=victim进行循环
//---------------------------------------------------
//---------------------------------------------------
//如果nb(chunk)大小在fastbin范围
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp;
victim = *fb;

if (victim != NULL)
{
if (SINGLE_THREAD_P)
*fb = victim->fd;
else
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL))
{
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}

/*
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
*/

if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);
bin = bin_at (av, idx);

if ((victim = last (bin)) != bin)
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put (tc_victim, tc_idx);
}
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
*/

else
{
idx = largebin_index (nb);
if (atomic_load_relaxed (&av->have_fastchunks))
malloc_consolidate (av);
}

/*
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.

The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.
*/

#if USE_TCACHE
INTERNAL_SIZE_T tcache_nb = 0;
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
tcache_nb = nb;
int return_cached = 0;

tcache_unsorted_count = 0;
#endif

for (;; )
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize_nomask (victim)
> av->system_mem, 0))
malloc_printerr ("malloc(): memory corruption");
size = chunksize (victim);

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/

if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
{
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

/* Take now instead of binning if exact fit */

if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
#if USE_TCACHE
}
#endif
}

/* place chunk in bin */

if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
}
else
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert (chunk_main_arena (bck->bk));
if ((unsigned long) (size)
< (unsigned long) chunksize_nomask (bck->bk))
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert (chunk_main_arena (fwd));
while ((unsigned long) size < chunksize_nomask (fwd))
{
fwd = fwd->fd_nextsize;
assert (chunk_main_arena (fwd));
}

if ((unsigned long) size
== (unsigned long) chunksize_nomask (fwd))
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}

mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif

#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}

#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
return tcache_get (tc_idx);
}
#endif

/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/

if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin
&& (unsigned long) chunksize_nomask (victim)
>= (unsigned long) (nb))
{
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin)
&& chunksize_nomask (victim)
== chunksize_nomask (victim->fd))
victim = victim->fd;

remainder_size = size - nb;
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/

++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);

for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
}
while ((map = av->binmap[block]) == 0);

bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}

else
{
size = chunksize (victim);

/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

/* unlink */
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
}

/* Split */
else
{
remainder = chunk_at_offset (victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks 2");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize (victim);

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (atomic_load_relaxed (&av->have_fastchunks))
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}

/*
Otherwise, relay to handle system-dependent cases
*/
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
}
}

fastbin范围内

fastbin大致流程.

1.chunk大小在fastbin范围

2.通过用户申请字节获得fastbin下标

3.获取对应bin的地址,取得bin->fd也就是头结点。

4.判断bin是否为空,为空则判断是否在smallbin范围

5.不为空则判断取出的节点chunksize是否等于对应bin的大小

6.返回chunkfd地址(也就是chunk的mem地址)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
   //如果nb(chunk)大小在fastbin范围
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb); //idx就是bin的下标
mfastbinptr *fb = &fastbin (av, idx);
//取得fastbinsY[idx]地址赋给fb指针,*fb就是fastbin的首节点
mchunkptr pp;
victim = *fb; //victim现在指向相应fastbinsY[idx]里的内容,指向bin的首部节点

if (victim != NULL)
{
if (SINGLE_THREAD_P) //单线程时,fd指向bin的头结点,就是要取出的空闲chunk
*fb = victim->fd;
else
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL)) //victim!=NULL向下执行
{
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0)) //victim_idx应该等于idx

//这个地方就是常用的fastbin的检查

malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem (victim); //将chunk地址转化为用户可写地址(即chunk的fd位地址)
alloc_perturb (p, bytes); //设初值
return p;
}
}
}

smallbin范围

  1. chunk大小在smallbin范围
  2. 获得bin下标
  3. 获取要取出的chunk地址
  4. 判断是否为空,为空则向下判断是否为largechunk
  5. 将下一个chunkPREV_INUSE置为一,取出chunk,返回地址
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
if (in_smallbin_range (nb))
{
idx = smallbin_index (nb); //smallbin的下标
bin = bin_at (av, idx);

if ((victim = last (bin)) != bin) //smallbin不为空,victim=bin->bk,要取出的chunk
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset (victim, nb); //将下一个chunk的PREV_INUSE置为一
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb); //检查标志位
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put (tc_victim, tc_idx);
}
}
}
#endif
void *p = chunk2mem (victim); //从malloc头到用户指针的转换
alloc_perturb (p, bytes); //初始化mem内存
return p;
}
}

malloc_consolidate

进行malloc_consolidate合并fastbins

1
2
3
4
5
6
else
{
idx = largebin_index (nb);
if (atomic_load_relaxed (&av->have_fastchunks))
malloc_consolidate (av);
}

函数流程

  1. 判断fastbin是否初始化,如果未初始化,则进行初始化然后退出。
  2. 按照fastbin由小到大的顺序合并chunk,每种相同大小的fastbin中chunk的处理顺序是从fastbin->fd开始取,下一个处理的是p->fd,依次类推。
  3. 首先尝试合并pre_chunk。
  4. 然后尝试合并next_chunk:如果next_chunk是top_chunk,则直接合并到top_chunk,然后进行第六步;如果next_chunk不是top_chunk,尝试合并。
  5. 将处理完的chunk插入到unsorted bin头部。
  6. 获取下一个空闲的fastbin,回到第二步,直到清空所有fastbin中的chunk,然后退出。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
------------------------- malloc_consolidate -------------------------

malloc_consolidate is a specialized version of free() that tears
down chunks held in fastbins. Free itself cannot be used for this
purpose since, among other things, it might place chunks back onto
fastbins. So, instead, we need to use a minor variant of the same
code.
*/
static void malloc_consolidate(mstate av)
{
mfastbinptr* fb; /* current fastbin being consolidated */
mfastbinptr* maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */

/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;

atomic_store_relaxed (&av->have_fastchunks, false);

unsorted_bin = unsorted_chunks(av);

/*
Remove each chunk from fast bin and consolidate it, placing it
then in unsorted bin. Among other reasons for doing this,
placing in unsorted bin avoids needing to calculate actual bins
until malloc is sure that chunks aren't immediately going to be
reused anyway.
*/

maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do { //如果fb<fastbin最大值,fb++,按照由小到大的顺序合并chunk
p = atomic_exchange_acq (fb, NULL);
if (p != 0) { //循环合并chunk,并p=p->fd直到p为0
do {
{
unsigned int idx = fastbin_index (chunksize (p));
if ((&fastbin (av, idx)) != fb)
malloc_printerr ("malloc_consolidate(): invalid chunk size");
}

check_inuse_chunk(av, p);
nextp = p->fd; //按照fd的顺序遍历fastbin

/* Slightly streamlined version of consolidation code in free() */
size = chunksize (p);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);
//获取下一个chunk和本chunk的size
//pre_inuse为0,向前合并
if (!prev_inuse(p)) {
prevsize = prev_size (p);
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
//下面的chunk不是top_chunk
if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

if (!nextinuse) {
size += nextsize;
unlink(av, nextchunk, bck, fwd);
} else
clear_inuse_bit_at_offset(nextchunk, 0);

first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;

if (!in_smallbin_range (size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}

set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin; // 将此chunk放到unsoeted bin中
p->fd = first_unsorted;
set_foot(p, size);
}

else { //如果下面的chunk是top_chunk,那么合并到top_chunk
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}

} while ( (p = nextp) != 0);

}
} while (fb++ != maxfb);
}

循环,处理unsorted_bin

unsorted_bin不为空时,执行以下循环,iters为循环条件

1.如果请求的 chunk 是一个 small chunk, 且 unsorted bin 只有一个 chunk, 并且这个 chunk 在上次分配时被使用过(也就是 last_remainder), 并且 chunk 的大小大于 (分配的大小 + MINSIZE), 这种情况下就直接将该 chunk 进行切割,更新last_remainder,返回截取的chunkmem地址

2.victim恰好等于申请chunk大小,返回chunk结束

3.victim放到对应大小的smallbin或者largebin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
       int iters = 0;	//循环条件,最多10000次
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
//victim为unsorted_bin尾结点,指向正在处理的chunk
bck = victim->bk; //bck是为了方便双链表的删除操作
if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize_nomask (victim)
> av->system_mem, 0))
malloc_printerr ("malloc(): memory corruption");
size = chunksize (victim); //size尾结点大小

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/
/******************************************************************
以下代码从last_remainder截取chunk
******************************************************************/
if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
{
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
/******************************************************************
以上代码从last_remainder截取chunk
******************************************************************/
/******************************************************************
恰好等于申请chunk大小,返回chunk结束
******************************************************************/
/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

/* Take now instead of binning if exact fit */

if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
#if USE_TCACHE
}
#endif
}
/******************************************************************
恰好等于申请chunk大小,返回chunk结束
******************************************************************/

/* place chunk in bin */
/******************************************************************
victim放到对应大小的smallbin或者largebin
******************************************************************/
if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd; //mchunkptr fwd;
}
else
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert (chunk_main_arena (bck->bk));
if ((unsigned long) (size)
< (unsigned long) chunksize_nomask (bck->bk))
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert (chunk_main_arena (fwd));
while ((unsigned long) size < chunksize_nomask (fwd))
{
fwd = fwd->fd_nextsize;
assert (chunk_main_arena (fwd));
}

if ((unsigned long) size
== (unsigned long) chunksize_nomask (fwd))
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}

mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif
/******************************************************************
放到对应大小的smallbin或者largebin
******************************************************************/
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}

如果在largebin范围

(相对上一次,smallbin又加入了fastbin和unsortedbin的chunk)

到此为止,fastbin,unsortedbin的空闲chunk都已处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/

if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin
&& (unsigned long) chunksize_nomask (victim)
>= (unsigned long) (nb))
{
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin)
&& chunksize_nomask (victim)
== chunksize_nomask (victim->fd))
victim = victim->fd;

remainder_size = size - nb;
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

寻找binmap中是否有足够大的chunk

  • 根据binmap找到的small bin 和 large bin含有free chunk的bin,去找足够大小的chunk
1
2
3
4
5
6
#define NBINS             128
#define BINMAPSHIFT 5
#define BITSPERMAP (1U << BINMAPSHIFT)
#define BINMAPSIZE (NBINS / BITSPERMAP)
unsigned int binmap[BINMAPSIZE];
//ptmalloc 用一个 bit 来标识某一个 bin 中是否包含空闲 chunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  /*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/

++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);

for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
}
while ((map = av->binmap[block]) == 0);

bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}

else
{
size = chunksize (victim);

/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

/* unlink */
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
}

/* Split */
else
{
remainder = chunk_at_offset (victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks 2");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

查看top chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize (victim);

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (atomic_load_relaxed (&av->have_fastchunks))
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}

调用sysmalloc

1
2
3
4
5
6
7
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}

涉及函数列表

alloc_perturb

1
2
3
4
5
6
7
8
9
10
11
static int perturb_byte;

static void
alloc_perturb (char *p, size_t n)
{
if (__glibc_unlikely (perturb_byte))
memset (p, perturb_byte ^ 0xff, n);
}

/*void *memset(void *s, int ch, size_t n);
将s中当前位置后面的n个字节 (typedef unsigned int size_t )用 ch 替换并返回 s

assert()

1
2
3
#include <assert.h>
void assert( int expression );
/*assert的作用是现计算表达式 expression ,如果其值为假(即为0),那么它先向stderr打印一条出错信息,然后通过调用 abort 来终止程序运行

bin_at

1
2
3
4
/* addressing -- note that bin_at(0) does not exist */
#define bin_at(m, i) \
(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \
- offsetof (struct malloc_chunk, fd))

catomic_compare_and_exchange_val_acq()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
catomic_compare_and_exchange_val_acq
如果*MEM等于OLDVAL,则将*MEM存储为NEWVAL,返回OLDVAL;

/* Atomically store NEWVAL in *MEM if *MEM is equal to OLDVAL.
Return the old *MEM value. */
#ifndef catomic_compare_and_exchange_val_acq
# ifdef __arch_c_compare_and_exchange_val_32_acq
# define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
__atomic_val_bysize (__arch_c_compare_and_exchange_val,acq,    \
     mem, newval, oldval)
# else
# define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
atomic_compare_and_exchange_val_acq (mem, newval, oldval)
# endif
#endif

相关博客链接
https://blog.csdn.net/luozhaotian/article/details/80267895

check_malloced_chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)

static void
do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
/* same as recycled case ... */
do_check_remalloced_chunk (av, p, s);

/*
... plus, must obey implementation invariant that prev_inuse is
always true of any allocated chunk; i.e., that each allocated
chunk borders either a previously allocated and still in-use
chunk, or the base of its memory arena. This is ensured
by making all allocations from the `lowest' part of any found
chunk. This does not necessarily hold however for chunks
recycled via fastbins.
*/

assert (prev_inuse (p));
}

check_remalloced_chunk ()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)

static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);

if (!chunk_is_mmapped (p))
{
assert (av == arena_for_chunk (p));
if (chunk_main_arena (p))
assert (av == &main_arena);
else
assert (av != &main_arena);
}

do_check_inuse_chunk (av, p);

/* Legal size ... */
assert ((sz & MALLOC_ALIGN_MASK) == 0);
assert ((unsigned long) (sz) >= MINSIZE);
/* ... and alignment */
assert (aligned_OK (chunk2mem (p)));
/* chunk is less than MINSIZE more than request */
assert ((long) (sz) - (long) (s) >= 0);
assert ((long) (sz) - (long) (s + MINSIZE) < 0);
}
/*chunk_is_mmapped检查是否是mmap位是否为1,当mmap为1的时候可以跳过此检查
do_check_inuse_chunk检查previnuse是否为1

chunk2mem

1
2
#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
//从malloc头到用户指针的转换

chunksize

1
2
3
4
5
6
7
#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
/* Get size, ignoring use bits */
#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
/* Like chunksize, but do not mask SIZE_BITS. */
#define chunksize_nomask(p) ((p)->mchunk_size)

//标志位清0

last()

1
#define last(b)   ((b)->bk)

largebin_index

1
2
3
4
#define largebin_index(sz) \
(SIZE_SZ == 8 ? largebin_index_64 (sz) \
: MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz) \
: largebin_index_32 (sz))

fastbin_index(sz)

1
2
3
4
#define fastbin_index(sz) \

((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
//计算bin下标

fastbin(ar_ptr, idx)

1
2
3
4
typedef struct malloc_chunk *mfastbinptr;
/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];
#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])

first()

1
#define first(b)   ((b)->fd)

set_inuse_bit_at_offset

1
2
3
#define set_inuse_bit_at_offset(p, s)					      \
(((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
//将物理相邻的下一个chunk的PREV_INUSE置为一

set_non_main_arena(

1
#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)

smallbin_index

1
2
3
#define smallbin_index(sz) \
((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+ SMALLBIN_CORRECTION)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/* Take a chunk off a bin list */
#define unlink(AV, P, BK, FD) { \
if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0)) \
malloc_printerr ("corrupted size vs. prev_size"); \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \
malloc_printerr ("corrupted double-linked list"); \
else { \
FD->bk = BK; \
BK->fd = FD; \
if (!in_smallbin_range (chunksize_nomask (P)) \
&& __builtin_expect (P->fd_nextsize != NULL, 0)) { \
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr ("corrupted double-linked list (not small)"); \
if (FD->fd_nextsize == NULL) { \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else { \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} else { \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}

__glibc_likely

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#if __GNUC__ >= 3
# define __glibc_unlikely(cond) __builtin_expect ((cond), 0)
# define __glibc_likely(cond) __builtin_expect ((cond), 1)
#else
# define __glibc_unlikely(cond) (cond)
# define __glibc_likely(cond) (cond)
#endif

(先将返回值看成 cond的值就行)
程序员可以使用likely/unlikely宏来预测cond的结果,然后通知编译器在编译时优化这个分支的汇编代码
原型: long __builtin_expect (long EXP, long C),函数返回值就是EXP表达式的值,C是一个常量(01),函数在语义上期待EXP==C

相关博客链接
https://turingsec.github.io/builtin_expect/

涉及变量

SINGLE_THREAD_P

1
2
3
4
/* For SINGLE_THREAD_P.  */
#include <sysdep-cancel.h>
//从<sysdep-cancel.h>得到
大概看了不是很懂,大概意思单线程为 1 多线程为 0 (自己猜的:不保证正确性)

相关结构

malloc_chunk

1
2
3
4
5
6
7
8
9
10
11
12
struct malloc_chunk {

INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */
INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */

struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;

/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};

malloc_state

  • 该结构用于管理堆,记录每个 arena 当前申请的内存的具体状态,比如说是否有空闲 chunk,有什么大小的空闲 chunk 等等。无论是 thread arena 还是 main arena,它们都只有一个 malloc state 结构。由于 thread 的 arena 可能有多个,malloc state 结构会在最新申请的 arena 中。
  • main arena 的 malloc_state 并不是 heap segment 的一部分,而是一个全局变量,存储在 libc.so数据段
  • __libc_lock_define(, mutex);

    该变量用于控制程序串行访问同一个分配区,当一个线程获取了分配区之后,其它线程要想访问该分配区,就必须等待该线程分配完成候才能够使用。

  • flags

    flags 记录了分配区的一些标志,比如 bit0 记录了分配区是否有 fast bin chunk ,bit1 标识分配区是否能返回连续的虚拟地址空间。具体如下

  • fastbinsY[NFASTBINS]
    存放每个 fast chunk 链表头部的指针

  • top
    指向分配区的 top chunk

  • last_reminder
    最新的 chunk 分割之后剩下的那部分

  • bins
    用于存储 unstored bin,small bins 和 large bins 的 chunk 链表。

  • binmap
    ptmalloc 用一个 bit 来标识某一个 bin 中是否包含空闲 chunk 。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct malloc_state
{
/* Serialize access. */
__libc_lock_define (, mutex);

/* Flags (formerly in max_fast). */
int flags;

/* Set if the fastbin chunks contain recently inserted free blocks. */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;

/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];

/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;

/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;

/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];

/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];

/* Linked list */
struct malloc_state *next;

/* Linked list for free arenas. Access to this field is serialized
by free_list_lock in arena.c. */
struct malloc_state *next_free;

/* Number of threads attached to this arena. 0 if the arena is on
the free list. Access to this field is serialized by
free_list_lock in arena.c. */
INTERNAL_SIZE_T attached_threads;

/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};
1
2
3
#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
#define NBINS 128
#define NSMALLBINS 64

流程概述

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
ptmalloc 的响应用户内存分配要求的具体步骤为:

1) 获取分配区的锁, 为了防止多个线程同时访问同一个分配区, 在进行分配之前需要取得分配区域的锁. 线程先查看线程私有实例中是否已经存在一个分配区, 如果存 在尝试对该分配区加锁, 如果加锁成功, 使用该分配区分配内存, 否则, 该线程搜 索分配区循环链表试图获得一个空闲(没有加锁)的分配区. 如果所有的分配区都 已经加锁, 那么 ptmalloc 会开辟一个新的分配区, 把该分配区加入到全局分配区循 环链表和线程的私有实例中并加锁, 然后使用该分配区进行分配操作. 开辟出来的 新分配区一定为非主分配区, 因为主分配区是从父进程那里继承来的. 开辟非主分配区时会调用 mmap()创建一个 sub-heap, 并设置好 top chunk.

2) 将用户的请求大小转换为实际需要分配的 chunk 空间大小. 具体查看 request2size 宏 (malloc.c:3332)

3) 判断所需分配 chunk 的大小是否满足 chunk_size <= max_fast (max_fast 默认为 64B), 如果是的话, 则转下一步, 否则跳到第 5 步. (malloc.c:3340)

4) 首先尝试在 Fastbins 中查找所需大小的 chunk 分配给用户. 如果可以找到, 则分配结束. 否则转到下一步. (malloc.c:3340)

5) 判断所需大小是否处在 small bins 中, 即判断 chunk_size < 512B 是否成立. 如果 chunk 大小处在 small bins 中, 则转下一步, 否则转到第 7 步. (malloc.c:3377)

6) 根据所需分配的 chunk 的大小, 找到具体所在的某个 small bin, 从该 Bin 的尾部摘取一个恰好满足大小的 chunk. 若成功, 则分配结束, 否则, 转到 8. (malloc.c:3377)

7) 到了这一步, 说明需要分配的是一块大的内存, 于是, ptmalloc 首先会遍历 Fastbins 中的 chunk, 将相邻的空闲 chunk 进行合并, 并链接到 unsorted bin 中. 对于 Fastbins 的合并是由 `malloc_consolidate` 做处理. (malloc.c:3421)

8) 遍历 unsorted bin 中的 chunk, 如果请求的 chunk 是一个 small chunk, 且 unsorted bin 只有一个 chunk, 并且这个 chunk 在上次分配时被使用过(也就是 last_remainder), 并且 chunk 的大小大于 (分配的大小 + MINSIZE), 这种情况下就直接将该 chunk 进行切割, 分配结束, 否则继续遍历, 如果发现一个 unsorted bin 的 size 恰好等于需要分配的 size, 命中缓存, 分配结束, 否则将根据 chunk 的空间大小将其放入对应的 small bins 或是 large bins 中, 遍历完成后, 转入下一步. (malloc.c:3442)

9) 到了这一步说明需要分配的是一块大的内存, 并且 Fastbins 和 unsorted bin 中所有的 chunk 都清除干净 了. 从 large bins 中按照 “smallest-first, best-fit”(最小&合适, 也就是说大于或等于所需 size 的最小 chunk) 原则, 找一个合适的 chunk, 从中划分一块合适大小的 chunk 进行切割, 并将剩下的部分放到 unsorted bin, 若操作成功, 则分配结束, 否则转到下一步. (malloc.c:3576)

10) 到了这一步说明在对应的 bin 上没有找到合适的大小, 无论是 small bin 还是 large bin, 对于 small bin, 如果没有对应大小的 small bin, 只能 idx+1. 对于 large bin,在上一步的 large bin 并不一定能找到合适的 chunk 进行切割, 因为 large bins 间隔是很大的, 假如当前的 idx 的 large bin 只有一个 chunk, 但是所需 size 大于该 chunk, 这就导致找不到合适的, 只能继续 idx+1, 最后都需要根据 bitmap 找到之后第一个非空闲的 bin. 在这两种情况下找到的 bin 中的 chunk 一定可以进行切割或者全部分配(剩余的 size < MINSIZE) (malloc.c:3649)

11) 如果仍然都没有找到合适的 chunk, 那么就需要操作 top chunk 来进行分配了. 判断 top chunk 大小是否满足所需 chunk 的大小, 如果是, 则从 top chunk 中分出一块来. 否则转到下一步. (malloc.c:3749)

12) 到了这一步, 说明 top chunk 也不能满足分配要求, 所以, 于是就有了两个选择: 如果是主分配区, 调用 sbrk(), 增加 top chunk 大小;如果是非主分配区, 调用 mmap 来分配一个新的 sub-heap, 增加 top chunk 大小;或者使用 mmap()来直接分配. 在这里, 需要依靠 chunk 的大小来决定到底使用哪种方法. 判断所需分配的 chunk 大小是否大于等于 mmap 分配阈值, 如果是的话, 则转下一步, 调用 mmap 分配, 否则跳到第 13 步, 增加 top chunk 的大小. (malloc.c:3800)

13) 使用 mmap 系统调用为程序的内存空间映射一块 chunk_size align 4kB 大小的空间. 然后将内存指针返回给用户.

14) 判断是否为第一次调用 malloc, 若是主分配区, 则需要进行一次初始化工作, 分配一块大小为(chunk_size + 128KB) align 4KB 大小的空间作为初始的 heap. 若已经初始化过了, 主分配区则调用 sbrk()增加 heap 空间, 分主分配区则在 top chunk 中切割出一个 chunk, 使之满足分配需求, 并将内存指针返回给用户.

图示意

malloc流程.png

文章作者: zzl
文章链接: https://www.zzl14.xyz/2020/04/13/malloc流程/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 try
打赏
  • 微信
  • 支付宝

评论