本文主题是通过dlv调试工具单步调试GoLang源码map数据结构的实现原理,加深对map的理解和运用。 Golang中map是一种kv存储结构,底层基于hash的实现;
Delve Debugger
Version: 1.8.2
Build: $Id: dbb493ec14d1e7753504d016b1e1ef1665b75b16 $
go version go1.17.10 darwin/amd64
package main
import "fmt"
func main() {
m := write()
fmt.Println(m["a"])
delete(m, "a")
}
func write() map[string]int {
m := make(map[string]int)
m["a"] = 1
m["b"] = 2
m["c"] = 2
return m
}
$ dlv debug main.go
(dlv) b main.main
Breakpoint 1 set at 0x10ac8aa for main.main() ./main.go:5
(dlv) c
> main.main() ./main.go:5 (hits goroutine(1):1 total:1) (PC: 0x10ac8aa)
1: package main
2:
3: import "fmt"
4:
=> 5: func main() {
6: m := write()
7: fmt.Println(m["a"])
8: delete(m, "a")
9: }
10:
(dlv) s
> main.write() ./main.go:11 (PC: 0x10ac9aa)
6: m := write()
7: fmt.Println(m["a"])
8: delete(m, "a")
9: }
10:
=> 11: func write() map[string]int {
12: m := make(map[string]int)
13: m["a"] = 1
14: m["b"] = 2
15: m["c"] = 2
16: return m
(dlv) disass
TEXT main.write(SB) /Users/sun/work/workhelp/map/main.go
main.go:11 0x10ac9a0 493b6610 cmp rsp, qword ptr [r14+0x10]
main.go:11 0x10ac9a4 0f86b4000000 jbe 0x10aca5e
main.go:11 0x10ac9aa 4883ec50 sub rsp, 0x50
main.go:11 0x10ac9ae 48896c2448 mov qword ptr [rsp+0x48], rbp
main.go:11 0x10ac9b3 488d6c2448 lea rbp, ptr [rsp+0x48]
main.go:11 0x10ac9b8 48c744242000000000 mov qword ptr [rsp+0x20], 0x0
=> main.go:12 0x10ac9c1 e81a07f6ff call $runtime.makemap_small
main.go:12 0x10ac9c6 4889442428 mov qword ptr [rsp+0x28], rax
main.go:13 0x10ac9cb 4889c3 mov rbx, rax
main.go:13 0x10ac9ce 488d0d237d0100 lea rcx, ptr [rip+0x17d23]
main.go:13 0x10ac9d5 bf01000000 mov edi, 0x1
main.go:13 0x10ac9da 488d057fa60000 lea rax, ptr [rip+0xa67f]
main.go:13 0x10ac9e1 e8fa4ff6ff call $runtime.mapassign_faststr
main.go:13 0x10ac9e6 4889442440 mov qword ptr [rsp+0x40], rax
main.go:13 0x10ac9eb 8400 test byte ptr [rax], al
main.go:13 0x10ac9ed 48c70001000000 mov qword ptr [rax], 0x1
main.go:14 0x10ac9f4 488b5c2428 mov rbx, qword ptr [rsp+0x28]
main.go:14 0x10ac9f9 488d0560a60000 lea rax, ptr [rip+0xa660]
main.go:14 0x10aca00 488d0df27c0100 lea rcx, ptr [rip+0x17cf2]
main.go:14 0x10aca07 bf01000000 mov edi, 0x1
main.go:14 0x10aca0c e8cf4ff6ff call $runtime.mapassign_faststr
main.go:14 0x10aca11 4889442438 mov qword ptr [rsp+0x38], rax
main.go:14 0x10aca16 8400 test byte ptr [rax], al
main.go:14 0x10aca18 48c70002000000 mov qword ptr [rax], 0x2
main.go:15 0x10aca1f 488b5c2428 mov rbx, qword ptr [rsp+0x28]
main.go:15 0x10aca24 488d0535a60000 lea rax, ptr [rip+0xa635]
main.go:15 0x10aca2b 488d0dc87c0100 lea rcx, ptr [rip+0x17cc8]
main.go:15 0x10aca32 bf01000000 mov edi, 0x1
main.go:15 0x10aca37 e8a44ff6ff call $runtime.mapassign_faststr
main.go:15 0x10aca3c 4889442430 mov qword ptr [rsp+0x30], rax
main.go:15 0x10aca41 8400 test byte ptr [rax], al
main.go:15 0x10aca43 48c70002000000 mov qword ptr [rax], 0x2
main.go:16 0x10aca4a 488b442428 mov rax, qword ptr [rsp+0x28]
main.go:16 0x10aca4f 4889442420 mov qword ptr [rsp+0x20], rax
main.go:16 0x10aca54 488b6c2448 mov rbp, qword ptr [rsp+0x48]
main.go:16 0x10aca59 4883c450 add rsp, 0x50
main.go:16 0x10aca5d c3 ret
main.go:11 0x10aca5e 6690 data16 nop
main.go:11 0x10aca60 e8bb1cfbff call $runtime.morestack_noctxt
.:0 0x10aca65 e936ffffff jmp $main.write
(dlv) si
> runtime.makemap_small() /usr/local/Cellar/go/1.17/src/runtime/map.go:292 (PC: 0x100d0e0)
Warning: debugging optimized function
287: }
288:
289: // makemap_small implements Go map creation for make(map[k]v) and
290: // make(map[k]v, hint) when hint is known to be at most bucketCnt
291: // at compile time and the map needs to be allocated on the heap.
=> 292: func makemap_small() *hmap {
293: h := new(hmap)
294: h.hash0 = fastrand()
295: return h
296: }
跟踪到这里我们可以发现,map 经编译后底层结构是hmap; makemap_small和makemap函数都是创建hmap 的入口,区别是初始化的属性数量差异,makemap_small 只设置了 hash0属性,makemap 初始化大部分属性。 hmap 的结构定义如下:
(dlv) p h
*runtime.hmap {
count: 0, // 元素数量
flags: 0,
B: 0, // 桶的个数 2^B
noverflow: 0, // 溢出桶个数 hash 冲突产品
hash0: 3724950317, // hash seed
buckets: unsafe.Pointer(0x0), // buckets 数组指针
oldbuckets: unsafe.Pointer(0x0), // 扩容时赋值的buckets数组
nevacuate: 0, // 搬迁进度
extra: *runtime.mapextra nil, // 扩容的指针
}
(dlv) si
=> 202: func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
203: if h == nil {
204: panic(plainError("assignment to entry in nil map"))
205: }
mapassign_faststr 首先检验 h 是否是nil, 由上文可知,h = makemap_small, h != nil
=> 210: if h.flags&hashWriting != 0 {
211: throw("concurrent map writes")
212: }
map 检查 h.flags 是否已经存在goroutine写入操作,如果存在就panic; 由此可见 第一点 map 是并发不安全的, 第二点 map 是有状态的,由h.flags 管理。
213: key := stringStructOf(&s)
214: hash := t.hasher(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
215:
216: // Set hashWriting after calling t.hasher for consistency with mapassign.
=> 217: h.flags ^= hashWriting
218:
219: if h.buckets == nil {
220: h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
221: }
222:
(dlv) p key
*runtime.stringStruct {str: unsafe.Pointer(0x10c46f8), len: 1}
(dlv) p hash
11401214700741223085
(dlv)
通过这段逻辑 可知 hmap 对 key 进行处理 并生成 hash 值; 同时处理 h.buckets; 对此 我们可能疑惑 hmap 是如何完成 kv 存储,hash 冲突处理,存储扩容,垃圾回收等功能的; 我们先往下看,稍后作出解答
224: bucket := hash & bucketMask(h.B)
225: if h.growing() {
226: growWork_faststr(t, h, bucket)
227: }
228: b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
=> 229: top := tophash(hash)
这里 可以看到一个新的数据结构 bmap; bmap 存储的具体结构;结合源代码仔细理解一下,
func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
// h map 指针,s key
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
// 竞态检查
if raceenabled {
callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
}
//检查是否是并发写入
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
//runtime.stringStruct 包装一下s, 应该对齐key; *runtime.stringStruct {str: unsafe.Pointer(0x10c46f9), len: 1}
key := stringStructOf(&s)
//根据hash seead 和key 生成hash值; 15254809650390487842
hash := t.hasher(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
// Set hashWriting after calling t.hasher for consistency with mapassign.
//设置状态值,表示写入
h.flags ^= hashWriting
//没有存储数据空间则申请, makemap_small 创建出来的hmap中 buckets 为nil
if h.buckets == nil {
h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
bucket := hash & bucketMask(h.B)
if h.growing() {
growWork_faststr(t, h, bucket)
}
b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
// 211 = 15254809650390487842 的高8位
top := tophash(hash)
var insertb *bmap
var inserti uintptr
var insertk unsafe.Pointer
bucketloop:
for {
// bucketCnt 常量8; 判断bmap中top等于211的值
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
if isEmpty(b.tophash[i]) && insertb == nil {
insertb = b
inserti = i
}
if b.tophash[i] == emptyRest {
break bucketloop
}
continue
}
// 计算k 的地址
k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
if k.len != key.len {
continue
}
// 判断key 是否存在且一致
if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
continue
}
// already have a mapping for key. Update it.
inserti = i
insertb = b
// Overwrite existing key, so it can be garbage collected.
// The size is already guaranteed to be set correctly.
k.str = key.str
goto done
}
//如果不在当前bmap中,那么从overflow中继续查找
ovf := b.overflow(t)
if ovf == nil {
break
}
b = ovf
}
// Did not find mapping for key. Allocate new cell & add entry.
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
// 扩容的2个条件,满足任何一个都将进行扩容操作; 负载因子 和 overflow 桶数量
if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
// 没有容纳空间,申请overflow 桶
if insertb == nil {
// The current bucket and all the overflow buckets connected to it are full, allocate a new one.
insertb = h.newoverflow(t, b)
inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
//插入key
insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
// store new key at insert position
*((*stringStruct)(insertk)) = *key
h.count++
done:
// 插入value
elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.elemsize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
// 去掉写标记
h.flags &^= hashWriting
return elem
}
总结一下map 的赋值操作, map 根据key的类型选择不同处理函数,编译阶段确定的; 第二步 对key进行hash 操作,将hash值的高8位 与bmap中tophash 比对,查询是否存在且类型和值是一致的; 第三步 就是查询可设置内容的位置,如果当前bmap 没有可用的,那么去overflow中查询;
5: func main() {
6: m := write()
7: fmt.Println(m["a"])
=> 8: delete(m, "a")
9: }
赋值了解之后,看下删除是如何设置的; 在delete(m, “a”) si 命令进入底层函数 mapdelete_faststr
func mapdelete_faststr(t *maptype, h *hmap, ky string) {
//竞态检查
if raceenabled && h != nil {
callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
}
// h 等于nil 或者 没有元素时就不需要 再删除了
if h == nil || h.count == 0 {
return
}
// 删除时不允许 并发写
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
// key hash
key := stringStructOf(&ky)
hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
// Set hashWriting after calling t.hasher for consistency with mapdelete
h.flags ^= hashWriting
bucket := hash & bucketMask(h.B)
if h.growing() {
growWork_faststr(t, h, bucket)
}
b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
bOrig := b
// 高8位
top := tophash(hash)
search:
// 从当前bmap 寻找,否则从overflow 寻找
for ; b != nil; b = b.overflow(t) {
for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
k := (*stringStruct)(kptr)
if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
continue
}
// Clear key's pointer.
k.str = nil
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
if t.elem.ptrdata != 0 {
memclrHasPointers(e, t.elem.size)
} else {
memclrNoHeapPointers(e, t.elem.size)
}
// 标记空
b.tophash[i] = emptyOne
// If the bucket now ends in a bunch of emptyOne states,
// change those to emptyRest states.
if i == bucketCnt-1 {
if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
goto notLast
}
} else {
if b.tophash[i+1] != emptyRest {
goto notLast
}
}
for {
b.tophash[i] = emptyRest
if i == 0 {
if b == bOrig {
break // beginning of initial bucket, we're done.
}
// Find previous bucket, continue at its last entry.
c := b
for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
}
i = bucketCnt - 1
} else {
i--
}
if b.tophash[i] != emptyOne {
break
}
}
notLast:
h.count--
// Reset the hash seed to make it more difficult for attackers to
// repeatedly trigger hash collisions. See issue 25237.
if h.count == 0 {
h.hash0 = fastrand()
}
break search
}
}
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
总结一下, map 删除也是并发不安全的; 删除时 进行相同的查询逻辑,当找到key时 会置空标志;
[1] https://yangxikun.com/golang/2019/10/07/golang-map.html
[2] https://github.com/cch123/golang-notes/blob/master/map.md