• golang的垃圾回收算法之十一Stack的处理


    一、说明

    在前面分析完成了堆内存的处理,其实栈内存的处理在Golang中也是很重要的。学过编程的都知道,在软件开发中,基本上控制的就两种内存,一个是堆,一个是栈。对于上层应用来说,堆的处理比较麻烦,但栈基本上不用费啥心思。只要注意着不要超出栈的大小就可以了。
    但在实际的内存管理中,栈的管理却并不轻松,因为栈是需要系统自己控制的,这就需要一系列的控制方式、算法和流程。

    二、栈的分配

    栈在哪里?问这个问题看似简单,但是估计很多人不知道。大多的软件开发人员,估计只知道哪些是分配在栈上,比如函数内的临时变量,函数参数等。但栈在哪里,可能就晕了。栈在哪里呢?一般来说,进程和线程都有拥有自己的栈,换句话说,栈基本都在进程或者线程上。明白了这个,就意味着明白了栈空间的创建一定时随着线程的创建而产生的。在golang语言中,重点是协程,所以协程在创建之初就要把相关的栈创建出来(Golang是有栈协程,无栈协程是公用系统的)。看一下相关的代码:

    func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
    	_g_ := getg()
    
                    ......
    
    	// We could allocate a larger initial stack if necessary.
    	// Not worth it: this is almost always an error.
    	// 4*sizeof(uintreg): extra space added below
    	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
    	if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
    		throw("newproc: function arguments too large for new goroutine")
    	}
    
    	_p_ := _g_.m.p.ptr()
    	newg := gfget(_p_)
    	if newg == nil {
                                    //这里处理栈,
    		newg = malg(_StackMin)
    		casgstatus(newg, _Gidle, _Gdead)
    		newg.gcRescan = -1
    		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
    	}
    	if newg.stack.hi == 0 {
    		throw("newproc1: newg missing stack")
    	}
    
                    ......
    	_g_.m.locks--
    	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
    		_g_.stackguard0 = stackPreempt
    	}
    	return newg
    }
    // Allocate a new g, with a stack big enough for stacksize bytes.
    func malg(stacksize int32) *g {
    	newg := new(g)
    	if stacksize >= 0 {
    		stacksize = round2(_StackSystem + stacksize)
    		systemstack(func() {
    			newg.stack, newg.stkbar = stackalloc(uint32(stacksize))
    		})
    		newg.stackguard0 = newg.stack.lo + _StackGuard
    		newg.stackguard1 = ^uintptr(0)
    		newg.stackAlloc = uintptr(stacksize)
    	}
    	return newg
    }
    

    _StackMin=2048个字节,它用来创建默认的栈空间。在malg这个函数中也可以看到,新的G的栈设置_StackGuard的大小作为扩容的基点。下面看一下相关的栈的应用的代码,先看一下相关的数据结构和原始数据的定义:

    const (
    	// StackSystem is a number of additional bytes to add
    	// to each stack below the usual guard area for OS-specific
    	// purposes like signal handling. Used on Windows, Plan 9,
    	// and Darwin/ARM because they do not use a separate stack.
    	_StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + sys.GoosDarwin*sys.GoarchArm*1024
    
    	// The minimum size of stack used by Go code
    	_StackMin = 2048
    
    	// The minimum stack size to allocate.
    	// The hackery here rounds FixedStack0 up to a power of 2.
    	_FixedStack0 = _StackMin + _StackSystem
    	_FixedStack1 = _FixedStack0 - 1
    	_FixedStack2 = _FixedStack1 | (_FixedStack1 >> 1)
    	_FixedStack3 = _FixedStack2 | (_FixedStack2 >> 2)
    	_FixedStack4 = _FixedStack3 | (_FixedStack3 >> 4)
    	_FixedStack5 = _FixedStack4 | (_FixedStack4 >> 8)
    	_FixedStack6 = _FixedStack5 | (_FixedStack5 >> 16)
    	_FixedStack  = _FixedStack6 + 1
    
    	// Functions that need frames bigger than this use an extra
    	// instruction to do the stack split check, to avoid overflow
    	// in case SP - framesize wraps below zero.
    	// This value can be no bigger than the size of the unmapped
    	// space at zero.
    	_StackBig = 4096
    
    	// The stack guard is a pointer this many bytes above the
    	// bottom of the stack.
    	_StackGuard = 880*sys.StackGuardMultiplier + _StackSystem
    
    	// After a stack split check the SP is allowed to be this
    	// many bytes below the stack guard. This saves an instruction
    	// in the checking sequence for tiny frames.
    	_StackSmall = 128
    
    	// The maximum number of bytes that a chain of NOSPLIT
    	// functions can use.
    	_StackLimit = _StackGuard - _StackSystem - _StackSmall
    )
    
    // Goroutine preemption request.
    // Stored into g->stackguard0 to cause split stack check failure.
    // Must be greater than any real sp.
    // 0xfffffade in hex.
    const (
    	_StackPreempt = uintptrMask & -1314
    	_StackFork    = uintptrMask & -1234
    )
    
    const (
    	// stackDebug == 0: no logging
    	//            == 1: logging of per-stack operations
    	//            == 2: logging of per-frame operations
    	//            == 3: logging of per-word updates
    	//            == 4: logging of per-word reads
    	stackDebug       = 0
    	stackFromSystem  = 0 // allocate stacks from system memory instead of the heap
    	stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
    	stackPoisonCopy  = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
    
    	stackCache = 1
    
    	// check the BP links during traceback.
    	debugCheckBP = false
    )
    
    const (
    	uintptrMask = 1<<(8*sys.PtrSize) - 1
    
    	// Goroutine preemption request.
    	// Stored into g->stackguard0 to cause split stack check failure.
    	// Must be greater than any real sp.
    	// 0xfffffade in hex.
    	stackPreempt = uintptrMask & -1314
    
    	// Thread is forking.
    	// Stored into g->stackguard0 to cause split stack check failure.
    	// Must be greater than any real sp.
    	stackFork = uintptrMask & -1234
    )
    
    // Global pool of spans that have free stacks.
    // Stacks are assigned an order according to size.
    //     order = log_2(size/FixedStack)
    // There is a free list for each order.
    // TODO: one lock per order?
    var stackpool [_NumStackOrders]mSpanList
    var stackpoolmu mutex
    
    // Global pool of large stack spans.
    var stackLarge struct {
    	lock mutex
    	free [_MHeapMap_Bits]mSpanList // free lists by log_2(s.npages)
    }
    

    再看一下栈的初始化:

    func stackinit() {
    	if _StackCacheSize&_PageMask != 0 {
    		throw("cache size must be a multiple of page size")
    	}
    	for i := range stackpool {
    		stackpool[i].init()
    	}
    	for i := range stackLarge.free {
    		stackLarge.free[i].init()
    	}
    }
    

    比较简单,除了判断大小的异常就是对栈不两种情况的分别初始化。进入正题,栈的分配的代码:

    // stackalloc allocates an n byte stack.
    //
    // stackalloc must run on the system stack because it uses per-P
    // resources and must not split the stack.
    //
    //go:systemstack
    func stackalloc(n uint32) (stack, []stkbar) {
    	// Stackalloc must be called on scheduler stack, so that we
    	// never try to grow the stack during the code that stackalloc runs.
    	// Doing so would cause a deadlock (issue 1547).
    	thisg := getg()
    	if thisg != thisg.m.g0 {
    		throw("stackalloc not on scheduler stack")
    	}
    	if n&(n-1) != 0 {
    		throw("stack size not a power of 2")
    	}
    	if stackDebug >= 1 {
    		print("stackalloc ", n, "\n")
    	}
    
    	// Compute the size of stack barrier array.
    	maxstkbar := gcMaxStackBarriers(int(n))
    	nstkbar := unsafe.Sizeof(stkbar{}) * uintptr(maxstkbar)
    	var stkbarSlice slice
    
    	if debug.efence != 0 || stackFromSystem != 0 {
    		v := sysAlloc(round(uintptr(n), _PageSize), &memstats.stacks_sys)
    		if v == nil {
    			throw("out of memory (stackalloc)")
    		}
    		top := uintptr(n) - nstkbar
    		if maxstkbar != 0 {
    			stkbarSlice = slice{add(v, top), 0, maxstkbar}
    		}
    		return stack{uintptr(v), uintptr(v) + top}, *(*[]stkbar)(unsafe.Pointer(&stkbarSlice))
    	}
    
    	// Small stacks are allocated with a fixed-size free-list allocator.
    	// If we need a stack of a bigger size, we fall back on allocating
    	// a dedicated span.
    	var v unsafe.Pointer
    	if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize {
    		order := uint8(0)
    		n2 := n
    		for n2 > _FixedStack {
    			order++
    			n2 >>= 1
    		}
    		var x gclinkptr
    		c := thisg.m.mcache
    		if c == nil || thisg.m.preemptoff != "" || thisg.m.helpgc != 0 {
    			// c == nil can happen in the guts of exitsyscall or
    			// procresize. Just get a stack from the global pool.
    			// Also don't touch stackcache during gc
    			// as it's flushed concurrently.
    			lock(&stackpoolmu)
    			x = stackpoolalloc(order)
    			unlock(&stackpoolmu)
    		} else {
    			x = c.stackcache[order].list
    			if x.ptr() == nil {
    				stackcacherefill(c, order)
    				x = c.stackcache[order].list
    			}
    			c.stackcache[order].list = x.ptr().next
    			c.stackcache[order].size -= uintptr(n)
    		}
    		v = unsafe.Pointer(x)
    	} else {
    		var s *mspan
    		npage := uintptr(n) >> _PageShift
    		log2npage := stacklog2(npage)
    
    		// Try to get a stack from the large stack cache.
    		lock(&stackLarge.lock)
    		if !stackLarge.free[log2npage].isEmpty() {
    			s = stackLarge.free[log2npage].first
    			stackLarge.free[log2npage].remove(s)
    		}
    		unlock(&stackLarge.lock)
    
    		if s == nil {
    			// Allocate a new stack from the heap.
    			s = mheap_.allocStack(npage)
    			if s == nil {
    				throw("out of memory")
    			}
    		}
    		v = unsafe.Pointer(s.base())
    	}
    
    	if raceenabled {
    		racemalloc(v, uintptr(n))
    	}
    	if msanenabled {
    		msanmalloc(v, uintptr(n))
    	}
    	if stackDebug >= 1 {
    		print("  allocated ", v, "\n")
    	}
    	top := uintptr(n) - nstkbar
    	if maxstkbar != 0 {
    		stkbarSlice = slice{add(v, top), 0, maxstkbar}
    	}
    	return stack{uintptr(v), uintptr(v) + top}, *(*[]stkbar)(unsafe.Pointer(&stkbarSlice))
    }
    

    上面的注释比较清晰,先拿到当前G,并计算分配的大小,如果是小于32K的直接从Freelist中分配,否则看一下大内存栈是否还有空闲,没有重新使用Span分配(mheap_.allocStack(npage))。此函数里面调用了两个函数,第一个是:

    // Allocates a stack from the free pool. Must be called with
    // stackpoolmu held.
    func stackpoolalloc(order uint8) gclinkptr {
    	list := &stackpool[order]
    	s := list.first
    	if s == nil {
    		// no free stacks. Allocate another span worth.
    		s = mheap_.allocStack(_StackCacheSize >> _PageShift)
    		if s == nil {
    			throw("out of memory")
    		}
    		if s.allocCount != 0 {
    			throw("bad allocCount")
    		}
    		if s.stackfreelist.ptr() != nil {
    			throw("bad stackfreelist")
    		}
    		for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
    			x := gclinkptr(s.base() + i)
    			x.ptr().next = s.stackfreelist
    			s.stackfreelist = x
    		}
    		list.insert(s)
    	}
    	x := s.stackfreelist
    	if x.ptr() == nil {
    		throw("span has no free stacks")
    	}
    	s.stackfreelist = x.ptr().next
    	s.allocCount++
    	if s.stackfreelist.ptr() == nil {
    		// all stacks in s are allocated.
    		list.remove(s)
    	}
    	return x
    }
    

    代码同样很简单,首先判断空闲链表是否还有对象,如果没有则回到Heap中创建并插入到链表中去。然后从中取第一个中的空闲返回同时统计分配的内存块数。
    第二个调用的函数是:

    / stackcacherefill/stackcacherelease implement a global pool of stack segments.
    // The pool is required to prevent unlimited growth of per-thread caches.
    //
    //go:systemstack
    func stackcacherefill(c *mcache, order uint8) {
    	if stackDebug >= 1 {
    		print("stackcacherefill order=", order, "\n")
    	}
    
    	// Grab some stacks from the global cache.
    	// Grab half of the allowed capacity (to prevent thrashing).
    	var list gclinkptr
    	var size uintptr
    	lock(&stackpoolmu)
                    //注意此处只使用了一半
    	for size < _StackCacheSize/2 {
    		x := stackpoolalloc(order)
    		x.ptr().next = list
    		list = x
    		size += _FixedStack << order
    	}
    	unlock(&stackpoolmu)
    	c.stackcache[order].list = list
    	c.stackcache[order].size = size
    }
    

    这个真没啥好说的,就是一个链表形成一个缓冲池。然后用各种方式控制住他的大小。

    三、栈的回收

    直接调用free函数

    // stackfree frees an n byte stack allocation at stk.
    //
    // stackfree must run on the system stack because it uses per-P
    // resources and must not split the stack.
    //
    //go:systemstack
    func stackfree(stk stack, n uintptr) {
    	gp := getg()
    	v := unsafe.Pointer(stk.lo)
    	if n&(n-1) != 0 {
    		throw("stack not a power of 2")
    	}
    	if stk.lo+n < stk.hi {
    		throw("bad stack size")
    	}
    	if stackDebug >= 1 {
    		println("stackfree", v, n)
    		memclrNoHeapPointers(v, n) // for testing, clobber stack data
    	}
    	if debug.efence != 0 || stackFromSystem != 0 {
    		if debug.efence != 0 || stackFaultOnFree != 0 {
    			sysFault(v, n)
    		} else {
    			sysFree(v, n, &memstats.stacks_sys)
    		}
    		return
    	}
    	if msanenabled {
    		msanfree(v, n)
    	}
    	if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize {
    		order := uint8(0)
    		n2 := n
    		for n2 > _FixedStack {
    			order++
    			n2 >>= 1
    		}
    		x := gclinkptr(v)
    		c := gp.m.mcache
    		if c == nil || gp.m.preemptoff != "" || gp.m.helpgc != 0 {
    			lock(&stackpoolmu)
    			stackpoolfree(x, order)
    			unlock(&stackpoolmu)
    		} else {
    			if c.stackcache[order].size >= _StackCacheSize {
    				stackcacherelease(c, order)
    			}
    			x.ptr().next = c.stackcache[order].list
    			c.stackcache[order].list = x
    			c.stackcache[order].size += n
    		}
    	} else {
    		s := mheap_.lookup(v)
    		if s.state != _MSpanStack {
    			println(hex(s.base()), v)
    			throw("bad span state")
    		}
    		if gcphase == _GCoff {
    			// Free the stack immediately if we're
    			// sweeping.
    			mheap_.freeStack(s)
    		} else {
    			// If the GC is running, we can't return a
    			// stack span to the heap because it could be
    			// reused as a heap span, and this state
    			// change would race with GC. Add it to the
    			// large stack cache instead.
    			log2npage := stacklog2(s.npages)
    			lock(&stackLarge.lock)
    			stackLarge.free[log2npage].insert(s)
    			unlock(&stackLarge.lock)
    		}
    	}
    }
    
    var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
    
    var ptrnames = []string{
    	0: "scalar",
    	1: "ptr",
    }
    

    主要进行条件判断后是否启动GC,来进行不同的回收操作。或者直接释放回堆或者直接进入大栈空闲链表。
    首先是回收到缓冲池中:

    //go:systemstack
    func stackcacherelease(c *mcache, order uint8) {
    	if stackDebug >= 1 {
    		print("stackcacherelease order=", order, "\n")
    	}
    	x := c.stackcache[order].list
    	size := c.stackcache[order].size
    	lock(&stackpoolmu)
    	for size > _StackCacheSize/2 {
    		y := x.ptr().next
    		stackpoolfree(x, order)
    		x = y
    		size -= _FixedStack << order
    	}
    	unlock(&stackpoolmu)
    	c.stackcache[order].list = x
    	c.stackcache[order].size = size
    }
    
    //go:systemstack
    func stackcache_clear(c *mcache) {
    	if stackDebug >= 1 {
    		print("stackcache clear\n")
    	}
    	lock(&stackpoolmu)
    	for order := uint8(0); order < _NumStackOrders; order++ {
    		x := c.stackcache[order].list
    		for x.ptr() != nil {
    			y := x.ptr().next
    			stackpoolfree(x, order)
    			x = y
    		}
    		c.stackcache[order].list = 0
    		c.stackcache[order].size = 0
    	}
    	unlock(&stackpoolmu)
    }
    // Adds stack x to the free pool. Must be called with stackpoolmu held.
    func stackpoolfree(x gclinkptr, order uint8) {
    	s := mheap_.lookup(unsafe.Pointer(x))
    	if s.state != _MSpanStack {
    		throw("freeing stack not in a stack span")
    	}
    	if s.stackfreelist.ptr() == nil {
    		// s will now have a free stack
    		stackpool[order].insert(s)
    	}
    	x.ptr().next = s.stackfreelist
    	s.stackfreelist = x
    	s.allocCount--
    	if gcphase == _GCoff && s.allocCount == 0 {
    		// Span is completely free. Return it to the heap
    		// immediately if we're sweeping.
    		//
    		// If GC is active, we delay the free until the end of
    		// GC to avoid the following type of situation:
    		//
    		// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
    		// 2) The stack that pointer points to is copied
    		// 3) The old stack is freed
    		// 4) The containing span is marked free
    		// 5) GC attempts to mark the SudoG.elem pointer. The
    		//    marking fails because the pointer looks like a
    		//    pointer into a free span.
    		//
    		// By not freeing, we prevent step #4 until GC is done.
    		stackpool[order].remove(s)
    		s.stackfreelist = 0
    		mheap_.freeStack(s)
    	}
    }
    

    先进入Freelist如果触发了GC则进行回收操作即删除缓冲池中的相关。
    还有一个freeStackSpans:

    // freeStackSpans frees unused stack spans at the end of GC.
    func freeStackSpans() {
    	lock(&stackpoolmu)
    
    	// Scan stack pools for empty stack spans.
    	for order := range stackpool {
    		list := &stackpool[order]
    		for s := list.first; s != nil; {
    			next := s.next
    			if s.allocCount == 0 {
    				list.remove(s)
    				s.stackfreelist = 0
    				mheap_.freeStack(s)
    			}
    			s = next
    		}
    	}
    
    	unlock(&stackpoolmu)
    
    	// Free large stack spans.
    	lock(&stackLarge.lock)
    	for i := range stackLarge.free {
    		for s := stackLarge.free[i].first; s != nil; {
    			next := s.next
    			stackLarge.free[i].remove(s)
    			mheap_.freeStack(s)
    			s = next
    		}
    	}
    	unlock(&stackLarge.lock)
    }
    

    这个的目的是在GC完成后释放不使用的栈空间。

    四、栈的扩容

    栈扩容的判断函数在src/cmd/internal/obj/x86/obj6.go此文件中的stacksplit函数中,判断是否扩容的条件为:
    1、当栈大小128(含)内时,SP 小于 stackguard0 即栈扩容。
    2、当栈大小大于128时,则由公式 SP - FramSzie + StackSmall 和 stackguard0 相比较,小于 stackguard0 则扩容。
    3、当栈大小大于4096时,先判断stackguard0 是否为 StackPreempt 状态,再由 SP-stackguard0+StackGuard <= framesize + (StackGuard-StackSmall)判断是否为true,是则扩容。
    这个栈操作比较复杂,调用了一系列的汇编代码(包括runtime·morestack函数在asm_amd64.s),这里不分析相关的代码,有兴趣可以看一下,只分析一下最终调用的newstack:

    // Called from runtime·morestack when more stack is needed.
    // Allocate larger stack and relocate to new stack.
    // Stack growth is multiplicative, for constant amortized cost.
    //
    // g->atomicstatus will be Grunning or Gscanrunning upon entry.
    // If the GC is trying to stop this g then it will set preemptscan to true.
    //
    // ctxt is the value of the context register on morestack. newstack
    // will write it to g.sched.ctxt.
    func newstack(ctxt unsafe.Pointer) {
    	thisg := getg()
    	// TODO: double check all gp. shouldn't be getg().
    	if thisg.m.morebuf.g.ptr().stackguard0 == stackFork {
    		throw("stack growth after fork")
    	}
    	if thisg.m.morebuf.g.ptr() != thisg.m.curg {
    		print("runtime: newstack called from g=", hex(thisg.m.morebuf.g), "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n")
    		morebuf := thisg.m.morebuf
    		traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g.ptr())
    		throw("runtime: wrong goroutine in newstack")
    	}
    
    	gp := thisg.m.curg
    	// Write ctxt to gp.sched. We do this here instead of in
    	// morestack so it has the necessary write barrier.
    	gp.sched.ctxt = ctxt
    
    	if thisg.m.curg.throwsplit {
    		// Update syscallsp, syscallpc in case traceback uses them.
    		morebuf := thisg.m.morebuf
    		gp.syscallsp = morebuf.sp
    		gp.syscallpc = morebuf.pc
    		print("runtime: newstack sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
    			"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
    			"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
    
    		traceback(morebuf.pc, morebuf.sp, morebuf.lr, gp)
    		throw("runtime: stack split at bad time")
    	}
    
    	morebuf := thisg.m.morebuf
    	thisg.m.morebuf.pc = 0
    	thisg.m.morebuf.lr = 0
    	thisg.m.morebuf.sp = 0
    	thisg.m.morebuf.g = 0
    
    	// NOTE: stackguard0 may change underfoot, if another thread
    	// is about to try to preempt gp. Read it just once and use that same
    	// value now and below.先判断是否被抢占的状态
    	preempt := atomic.Loaduintptr(&gp.stackguard0) == stackPreempt
    
    	// Be conservative about where we preempt.
    	// We are interested in preempting user Go code, not runtime code.
    	// If we're holding locks, mallocing, or preemption is disabled, don't
    	// preempt.
    	// This check is very early in newstack so that even the status change
    	// from Grunning to Gwaiting and back doesn't happen in this case.
    	// That status change by itself can be viewed as a small preemption,
    	// because the GC might change Gwaiting to Gscanwaiting, and then
    	// this goroutine has to wait for the GC to finish before continuing.
    	// If the GC is in some way dependent on this goroutine (for example,
    	// it needs a lock held by the goroutine), that small preemption turns
    	// into a real deadlock.
    	if preempt {
    		if thisg.m.locks != 0 || thisg.m.mallocing != 0 || thisg.m.preemptoff != "" || thisg.m.p.ptr().status != _Prunning {
    			// Let the goroutine keep running for now.
    			// gp->preempt is set, so it will be preempted next time.
    			gp.stackguard0 = gp.stack.lo + _StackGuard
    			gogo(&gp.sched) // never return
    		}
    	}
    
    	if gp.stack.lo == 0 {
    		throw("missing stack in newstack")
    	}
    	sp := gp.sched.sp
    	if sys.ArchFamily == sys.AMD64 || sys.ArchFamily == sys.I386 {
    		// The call to morestack cost a word.
    		sp -= sys.PtrSize
    	}
    	if stackDebug >= 1 || sp < gp.stack.lo {
    		print("runtime: newstack sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
    			"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
    			"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
    	}
    	if sp < gp.stack.lo {
    		print("runtime: gp=", gp, ", gp->status=", hex(readgstatus(gp)), "\n ")
    		print("runtime: split stack overflow: ", hex(sp), " < ", hex(gp.stack.lo), "\n")
    		throw("runtime: split stack overflow")
    	}
    
    	//被抢占下的操作
    	if preempt {
    		if gp == thisg.m.g0 {
    			throw("runtime: preempt g0")
    		}
    		if thisg.m.p == 0 && thisg.m.locks == 0 {
    			throw("runtime: g is running but p is not")
    		}
    		// Synchronize with scang.
    		casgstatus(gp, _Grunning, _Gwaiting)
    		if gp.preemptscan {
    			for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) {
    				// Likely to be racing with the GC as
    				// it sees a _Gwaiting and does the
    				// stack scan. If so, gcworkdone will
    				// be set and gcphasework will simply
    				// return.
    			}
    			if !gp.gcscandone {
    				// gcw is safe because we're on the
    				// system stack.
    				gcw := &gp.m.p.ptr().gcw
    				scanstack(gp, gcw)
    				if gcBlackenPromptly {
    					gcw.dispose()
    				}
    				gp.gcscandone = true
    			}
    			gp.preemptscan = false
    			gp.preempt = false
    			casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
    			// This clears gcscanvalid.
    			casgstatus(gp, _Gwaiting, _Grunning)
    			gp.stackguard0 = gp.stack.lo + _StackGuard
    			gogo(&gp.sched) // never return
    		}
    
    		// Act like goroutine called runtime.Gosched.
    		casgstatus(gp, _Gwaiting, _Grunning)
    		gopreempt_m(gp) // never return
    	}
    
    	// Allocate a bigger segment and move the stack.
    	oldsize := int(gp.stackAlloc)
    	newsize := oldsize * 2
    	if uintptr(newsize) > maxstacksize {
    		print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n")
    		throw("stack overflow")
    	}
    
    	// The goroutine must be executing in order to call newstack,
    	// so it must be Grunning (or Gscanrunning).
    	casgstatus(gp, _Grunning, _Gcopystack)
    
    	// The concurrent GC will not scan the stack while we are doing the copy since
    	// the gp is in a Gcopystack status.
    	//这个函数收缩再分析
    	copystack(gp, uintptr(newsize), true)
    	if stackDebug >= 1 {
    		print("stack grow done\n")
    	}
    	casgstatus(gp, _Gcopystack, _Grunning)
    	//切换系统调度
    	gogo(&gp.sched)
    }
    

    这个扩容还是比较麻烦的。如果觉得麻烦可以先理清楚流程,有时间再认真分析。

    五、栈的收缩

    栈太大没有啥使用场景时需要控制成本,收缩一下。
    先看一下收缩的函数:

    // Maybe shrink the stack being used by gp.
    // Called at garbage collection time.
    // gp must be stopped, but the world need not be.
    func shrinkstack(gp *g) {
    	gstatus := readgstatus(gp)
    	if gstatus&^_Gscan == _Gdead {
    		if gp.stack.lo != 0 {
    			// Free whole stack - it will get reallocated
    			// if G is used again.
    			stackfree(gp.stack, gp.stackAlloc)
    			gp.stack.lo = 0
    			gp.stack.hi = 0
    			gp.stkbar = nil
    			gp.stkbarPos = 0
    		}
    		return
    	}
    	if gp.stack.lo == 0 {
    		throw("missing stack in shrinkstack")
    	}
    	if gstatus&_Gscan == 0 {
    		throw("bad status in shrinkstack")
    	}
    
    	if debug.gcshrinkstackoff > 0 {
    		return
    	}
    	if gp.startpc == gcBgMarkWorkerPC {
    		// We're not allowed to shrink the gcBgMarkWorker
    		// stack (see gcBgMarkWorker for explanation).
    		return
    	}
    
    	oldsize := gp.stackAlloc
    	newsize := oldsize / 2
    	// Don't shrink the allocation below the minimum-sized stack
    	// allocation.
    	if newsize < _FixedStack {
    		return
    	}
    	// Compute how much of the stack is currently in use and only
    	// shrink the stack if gp is using less than a quarter of its
    	// current stack. The currently used stack includes everything
    	// down to the SP plus the stack guard space that ensures
    	// there's room for nosplit functions.
    	avail := gp.stack.hi - gp.stack.lo
    	if used := gp.stack.hi - gp.sched.sp + _StackLimit; used >= avail/4 {
    		return
    	}
    
    	// We can't copy the stack if we're in a syscall.
    	// The syscall might have pointers into the stack.
    	if gp.syscallsp != 0 {
    		return
    	}
    	if sys.GoosWindows != 0 && gp.m != nil && gp.m.libcallsp != 0 {
    		return
    	}
    
    	if stackDebug > 0 {
    		print("shrinking stack ", oldsize, "->", newsize, "\n")
    	}
    
    	copystack(gp, newsize, false)
    }
    

    在上面的代码中,设定后新栈为老栈一半值后,首先要判断收缩后栈空间是否已经到达最小值,到达则不再收缩。其次,需要计算下当前g使用到的栈是否到达到四分之一的限值,最后把老栈的数据拷贝到新栈即可。
    这里面调用了一个栈拷贝函数:

    // Copies gp's stack to a new stack of a different size.
    // Caller must have changed gp status to Gcopystack.
    //
    // If sync is true, this is a self-triggered stack growth and, in
    // particular, no other G may be writing to gp's stack (e.g., via a
    // channel operation). If sync is false, copystack protects against
    // concurrent channel operations.
    func copystack(gp *g, newsize uintptr, sync bool) {
    	if gp.syscallsp != 0 {
    		throw("stack growth not allowed in system call")
    	}
    	old := gp.stack
    	if old.lo == 0 {
    		throw("nil stackbase")
    	}
    	used := old.hi - gp.sched.sp
    
    	// allocate new stack
    	new, newstkbar := stackalloc(uint32(newsize))
    	if stackPoisonCopy != 0 {
    		fillstack(new, 0xfd)
    	}
    	if stackDebug >= 1 {
    		print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]/", gp.stackAlloc, " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n")
    	}
    
    	// Compute adjustment.
    	var adjinfo adjustinfo
    	adjinfo.old = old
    	adjinfo.delta = new.hi - old.hi
    
    	// Adjust sudogs, synchronizing with channel ops if necessary.
    	ncopy := used
    	if sync {
    		adjustsudogs(gp, &adjinfo)
    	} else {
    		// sudogs can point in to the stack. During concurrent
    		// shrinking, these areas may be written to. Find the
    		// highest such pointer so we can handle everything
    		// there and below carefully. (This shouldn't be far
    		// from the bottom of the stack, so there's little
    		// cost in handling everything below it carefully.)
    		adjinfo.sghi = findsghi(gp, old)
    
    		// Synchronize with channel ops and copy the part of
    		// the stack they may interact with.
    		ncopy -= syncadjustsudogs(gp, used, &adjinfo)
    	}
    
    	// Copy the stack (or the rest of it) to the new location
    	memmove(unsafe.Pointer(new.hi-ncopy), unsafe.Pointer(old.hi-ncopy), ncopy)
    
    	// Disallow sigprof scans of this stack and block if there's
    	// one in progress.
    	gcLockStackBarriers(gp)
    
    	// Adjust remaining structures that have pointers into stacks.
    	// We have to do most of these before we traceback the new
    	// stack because gentraceback uses them.
    	adjustctxt(gp, &adjinfo)
    	adjustdefers(gp, &adjinfo)
    	adjustpanics(gp, &adjinfo)
    	adjuststkbar(gp, &adjinfo)
    	if adjinfo.sghi != 0 {
    		adjinfo.sghi += adjinfo.delta
    	}
    
    	// copy old stack barriers to new stack barrier array
    	newstkbar = newstkbar[:len(gp.stkbar)]
    	copy(newstkbar, gp.stkbar)
    
    	// Swap out old stack for new one
    	gp.stack = new
    	gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request
    	gp.sched.sp = new.hi - used
    	oldsize := gp.stackAlloc
    	gp.stackAlloc = newsize
    	gp.stkbar = newstkbar
    	gp.stktopsp += adjinfo.delta
    
    	// Adjust pointers in the new stack.
    	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, adjustframe, noescape(unsafe.Pointer(&adjinfo)), 0)
    
    	gcUnlockStackBarriers(gp)
    
    	// free old stack
    	if stackPoisonCopy != 0 {
    		fillstack(old, 0xfc)
    	}
    	stackfree(old, oldsize)
    }
    

    学过C/c++的看到memmove函数估计就明白咋回事儿了。在这个拷贝函数中:
    首先会计算用到的栈空间大小,供后面的复制内存的需要。然后使用stackalloc分配合适大小,对比新老栈的hi值的差得到delta,然后在下面的调整函数中会用到,这特别是指针的位置的移动用这个差值可以移动到指定的位置,从而取得新栈的指针。
    再下来,使用memmove拷贝数据到新栈,调整指针txt、defer、panic 位置,将G切换到新栈,最后释放原栈空间。
    在源码中有一个栈的调整图,看看对着源码就明白了。

    六、总结

    越靠近底层发现其实原来学得好好多基础的,如数据结构,算法,OS原理等,越是有用。虽然有些东西可能差得相当大,但基本上原理上面的套路,仍然是没有脱离。即使是在多核系统中,具体到某一核心内的操作,仍然是这些原理控制的内容。基础是什么,这就是基础。没有华丽的外表,体现出来甚至很枯燥,但用到的时候儿,才发现,真得是这样啊。
    没人原意打基础,就和盖房子,没人不原意把基础打好,但就是好多人打不好。

  • 相关阅读:
    uboot移植-野火imx6ull
    HTML的段落中怎么样显示出标签要使用的尖括号<>?
    Prophet模型的简介以及案例分析
    Android系统的特性
    Vue 3.0 使用的 diff 算法相比 Vue 2.0 中的双端比对有什么优势?
    Leetcode刷题Day5休息 & Day6----------哈希表
    刷题记录:牛客NC14701取数游戏2
    6-26漏洞利用-tomcat管理密码破解
    ResNet 代码实现
    一次网络请求的流程
  • 原文地址:https://blog.csdn.net/fpcc/article/details/126951949