本文主要以linux-3.10为源码分析学习。
中断通用的入口,为前文中断概述中讲述的common_interrupt。
common_interrupt:
XCPT_FRAME
ASM_CLAC
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ # 调用中断处理函数进行处理
/* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr: # 中断处理返回
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
decl PER_CPU_VAR(irq_count)
/* Restore saved previous stack */
popq %rsi
CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
leaq ARGOFFSET-RBP(%rsi), %rsp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel # 是否返回内核状态
/* Interrupt came from user space */
/*
* Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
*/
retint_with_reschedule: # 检查是否需要进行调度
movl $_TIF_WORK_MASK,%edi
retint_check:
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
andl %edi,%edx
CFI_REMEMBER_STATE
jnz retint_careful
retint_swapgs: /* return to user-space */
/*
* The iretq could re-enable interrupts:
*/
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
SWAPGS
jmp restore_args
retint_restore_args: /* return to kernel space */
DISABLE_INTERRUPTS(CLBR_ANY)
/*
* The iretq could re-enable interrupts:
*/
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 1,8,1
irq_return:
INTERRUPT_RETURN
_ASM_EXTABLE(irq_return, bad_iret)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
_ASM_EXTABLE(native_iret, bad_iret)
#endif
.section .fixup,"ax"
bad_iret:
/*
* The iret traps when the %cs or %ss being restored is bogus.
* We've lost the original trap vector and error code.
* #GPF is the most likely one to get for an invalid selector.
* So pretend we completed the iret and took the #GPF in user mode.
*
* We are now running with the kernel GS after exception recovery.
* But error_entry expects us to have user GS to match the user %cs,
* so swap back.
*/
pushq $0
SWAPGS
jmp general_protection
.previous
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx # 检查调度标致位
jnc retint_signal # 处理之前先处理信号
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
SCHEDULE_USER # 调用主动调取
popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule # 检查是否需要再调度
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel)
cmpl $0,TI_preempt_count(%rcx) # 检查是否加锁
jnz retint_restore_args
bt $TIF_NEED_RESCHED,TI_flags(%rcx) # 判断调度标志位是否需要调度
jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
call preempt_schedule_irq # 主动调度
jmp exit_intr
#endif
其中preempt_schedule_irq函数就是调用主动调度的函数,如下。
asmlinkage void __sched preempt_schedule_irq(void)
{
struct thread_info *ti = current_thread_info();
enum ctx_state prev_state;
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
prev_state = exception_enter();
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
__schedule(); // 主动调度
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
/*
* Check again in case we missed a preemption opportunity
* between schedule and now.
*/
barrier();
} while (need_resched());
exception_exit(prev_state);
}
在Linux系统中,系统调用的方式主要就是常见的三种形式。
其中方式1为Linux传统的系统调用的方式该方式需要陷入通过中断陷入内核,执行完成之后再返回用户空间,主要的问题就是通过软中断效率较低,故方式2、3就是为了避开频繁的中断来提高系统调用的效率,其主要原理是通过将运行的可执行文件执行一个vdso的映射的__kernel_vsyscall函数,从而能够提升效率快速进行系统调用。
在进行trap_init->cpu_init->syscall_init,初始化快速调用的处理函数。
void syscall_init(void)
{
/*
* LSTAR and STAR live in a bit strange symbiosis.
* They both write to the same internal register. STAR allows to
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
wrmsrl(MSR_LSTAR, system_call); //设置处理的回调函数
wrmsrl(MSR_CSTAR, ignore_sysret);
#ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init();
#endif
/* Flags to clear on syscall */
wrmsrl(MSR_SYSCALL_MASK,
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
X86_EFLAGS_IOPL|X86_EFLAGS_AC);
}
MSR_LSTAR代表的就是快速回调处理的函数即system_call函数。详细的可咨询查阅syscall的资料。
在调用trap_init->set_system_trap_gate(SYSCALL_VECTOR, &system_call);
此时就是设置的80为符合的中断处理函数。
# define SYSCALL_VECTOR 0x80
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
/*
* A hypervisor implementation might want to use a label
* after the swapgs, so that it can do the swapgs
* for the guest and jump here on syscall.
*/
GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
system_call_fastpath:
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
#else
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
ja badsys
movq %r10,%rcx
call *sys_call_table(,%rax,8) # XXX: rip relative // 跳入系统调用的回调函数执行
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
* Has incomplete stack frame and undefined top of stack.
*/
ret_from_sys_call: # 返回检查系统调用
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
sysret_check:
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
*/
TRACE_IRQS_ON
movq RIP-ARGOFFSET(%rsp),%rcx
CFI_REGISTER rip,rcx
RESTORE_ARGS 1,-ARG_SKIP,0
/*CFI_REGISTER rflags,r11*/
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx # 检查是否需要调度,
jnc sysret_signal # 检查是否需要处理信号,优先处理信号
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
SCHEDULE_USER # 进行调度
popq_cfi %rdi
jmp sysret_check
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
#ifdef CONFIG_AUDITSYSCALL
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
/*
* We have a signal, or exit tracing or single-step.
* These all wind up with the iret return path anyway,
* so just join that path right now.
*/
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
jmp int_check_syscall_exit_work
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
#ifdef CONFIG_AUDITSYSCALL
/*
* Fast path for syscall audit without full syscall trace.
* We just call __audit_syscall_entry() directly, and then
* jump back to the normal fast path.
*/
auditsys:
movq %r10,%r9 /* 6th arg: 4th syscall arg */
movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
movq %rax,%rsi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
call __audit_syscall_entry
LOAD_ARGS 0 /* reload call-clobbered registers */
jmp system_call_fastpath
/*
* Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
*/
sysret_audit:
movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
setbe %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
jmp sysret_check
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */
tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz auditsys
#endif
SAVE_REST
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
FIXUP_TOP_OF_STACK %rdi
movq %rsp,%rdi
call syscall_trace_enter
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_enter() returned
* the value it wants us to use in the table lookup.
*/
LOAD_ARGS ARGOFFSET, 1
RESTORE_REST
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
#else
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz int_careful
andl $~TS_COMPAT,TI_status(%rcx)
jmp retint_swapgs
/* Either reschedule or signal or syscall exit tracking needed. */
/* First do a reschedule test. */
/* edx: work, edi: workmask */
int_careful:
bt $TIF_NEED_RESCHED,%edx # 检查是否需要调度
jnc int_very_careful
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
SCHEDULE_USER # 主动调度
popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
int_check_syscall_exit_work:
SAVE_REST
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq_cfi %rdi
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq_cfi %rdi
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
int_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
call do_notify_resume
1: movl $_TIF_WORK_MASK,%edi
int_restore_rest:
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
END(system_call)
其中SCHEDULE_USER的定义如下
#ifdef CONFIG_CONTEXT_TRACKING
# define SCHEDULE_USER call schedule_user
#else
# define SCHEDULE_USER call schedule
#endif
调用schedule相关函数。至此中断调度和系统调用的时机检查基本如上流程。
在进程进行schedule的时候会选择下一个需要调度的进程。
static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable();
cpu = smp_processor_id(); // 获取当前的cpu
rq = cpu_rq(cpu); // 获取当前的调度队列
rcu_note_context_switch(cpu);
prev = rq->curr; // 获取当前进程
...
pre_schedule(rq, prev);
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
put_prev_task(rq, prev); // 放置当前进程到队列中
next = pick_next_task(rq); // 选择下一个待执行进程
clear_tsk_need_resched(prev);
rq->skip_clock_update = 0;
...
}
调度的主要流程就是选择下一个可执行的进程pick_next_task。
static inline struct task_struct *
pick_next_task(struct rq *rq)
{
const struct sched_class *class;
struct task_struct *p;
/*
* Optimization: we know that if all tasks are in
* the fair class we can call that function directly:
*/
if (likely(rq->nr_running == rq->cfs.h_nr_running)) { // 如果当前的都是cfs进程则直接进行调度
p = fair_sched_class.pick_next_task(rq);
if (likely(p))
return p;
}
for_each_class(class) {
p = class->pick_next_task(rq); // 便利每一个class分别选用不用的策略来进行调度
if (p)
return p;
}
BUG(); /* the idle class will always have a runnable task */
}
...
#define sched_class_highest (&stop_sched_class)
#define for_each_class(class) \
for (class = sched_class_highest; class; class = class->next)
此时就是从stop_sched_class开始一路往最低优先级便利选择。
stop_sched_class->rt_sched_class->fair_sched_class->idle_sched_class
优先级如上所示,先是stop_sched_class,再是实时进程,然后是cfs公平调度算法,最后就是空闲算法。每个class都调度pick_next_task函数来选择下一个运行的进程。
static struct task_struct *pick_next_task_stop(struct rq *rq)
{
struct task_struct *stop = rq->stop;
if (stop && stop->on_rq) {
stop->se.exec_start = rq->clock_task; // 检查是否有 有则选择stop的进程
return stop;
}
return NULL; // 否则返回空
}
该类用在内核任务,迁移任务时采用。
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
struct rt_rq *rt_rq)
{
struct rt_prio_array *array = &rt_rq->active;
struct sched_rt_entity *next = NULL;
struct list_head *queue;
int idx;
idx = sched_find_first_bit(array->bitmap); // 比较进程的优先级 根据优先级选择进程
BUG_ON(idx >= MAX_RT_PRIO);
queue = array->queue + idx;
next = list_entry(queue->next, struct sched_rt_entity, run_list);
return next;
}
static struct task_struct *_pick_next_task_rt(struct rq *rq)
{
struct sched_rt_entity *rt_se;
struct task_struct *p;
struct rt_rq *rt_rq;
rt_rq = &rq->rt;
if (!rt_rq->rt_nr_running)
return NULL;
if (rt_rq_throttled(rt_rq))
return NULL;
do {
rt_se = pick_next_rt_entity(rq, rt_rq); // 获取下一个高优先级进程
BUG_ON(!rt_se);
rt_rq = group_rt_rq(rt_se); // 选择下一个优先组队列
} while (rt_rq);
p = rt_task_of(rt_se);
p->se.exec_start = rq->clock_task; // 设置
return p;
}
static struct task_struct *pick_next_task_rt(struct rq *rq)
{
struct task_struct *p = _pick_next_task_rt(rq); // 选择下一个进程
/* The running task is never eligible for pushing */
if (p)
dequeue_pushable_task(rq, p);
#ifdef CONFIG_SMP
/*
* We detect this state here so that we can avoid taking the RQ
* lock again later if there is no need to push
*/
rq->post_schedule = has_pushable_tasks(rq);
#endif
return p;
}
主要是通过优先级来进行选择下一个待运行的进程,该调度策略为实时调度策略,在进程响应要求较高的时候优势比较明显。
该策略为公平调度策略,是当前应用较为广泛的一种调度策略,将进程分为不同的调度组,根据组的运行的整体时间来进行调度。
static struct task_struct *pick_next_task_fair(struct rq *rq)
{
struct task_struct *p;
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
if (!cfs_rq->nr_running)
return NULL;
do {
se = pick_next_entity(cfs_rq); // 选择下一个调度实体
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
p = task_of(se);
if (hrtick_enabled(rq))
hrtick_start_fair(rq, p); // 选择调度实体中可调度的进程
return p;
}
/*
* Pick the next process, keeping these things in mind, in this order:
* 1) keep things fair between processes/task groups
* 2) pick the "next" process, since someone really wants that to run
* 3) pick the "last" process, for cache locality
* 4) do not run the "skip" process, if something else is available
*/
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
struct sched_entity *se = __pick_first_entity(cfs_rq); // 获取下一个运行的节点
struct sched_entity *left = se;
/*
* Avoid running the skip buddy, if running something else can
* be done without getting too unfair.
*/
if (cfs_rq->skip == se) { // 是否skip如果跳过则选择下一个
struct sched_entity *second = __pick_next_entity(se); //
if (second && wakeup_preempt_entity(second, left) < 1)
se = second;
}
/*
* Prefer last buddy, try to return the CPU to a preempted task.
*/
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
se = cfs_rq->last;
/*
* Someone really wants this to run. If it's not unfair, run it.
*/
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
se = cfs_rq->next;
clear_buddies(cfs_rq, se);
return se;
}
static struct sched_entity *__pick_next_entity(struct sched_entity *se)
{
struct rb_node *next = rb_next(&se->run_node); // 获取树中最小的节点运行
if (!next)
return NULL;
return rb_entry(next, struct sched_entity, run_node); // 获取节点
}
因为cfs基于红黑树来进行组织的故在获取调度进程的时候可直接选择,下一个待调度组织的最少执行节点进程,其中有关进程的运行和计算的内容会跟随schedule_tick来给进程进行标记(有机会后续分析)。
static struct task_struct *pick_next_task_idle(struct rq *rq)
{
schedstat_inc(rq, sched_goidle);
#ifdef CONFIG_SMP
/* Trigger the post schedule to do an idle_enter for CFS */
rq->post_schedule = 1;
#endif
return rq->idle;
}
这个空闲的进程是在start_kernel之后,给每个cpu都复制了一个进程。
kernel_init->kernel_init_freeable->smp_init->idle_threads_init->idle_init->fork_idle->idle_init->
struct task_struct * __cpuinit fork_idle(int cpu)
{
struct task_struct *task;
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); // 拷贝0号进场当做idle进场
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
init_idle(task, cpu); // 设置task相关信息
}
return task;
}
此时init_struct_pid就是初始化的进程,即拷贝的进程运行如下逻辑。
start_kernel->rest_init->cpu_startup_entry流程,
// idle.c
void cpu_startup_entry(enum cpuhp_state state)
{
/*
* This #ifdef needs to die, but it's too late in the cycle to
* make this generic (arm and sh have never invoked the canary
* init for the non boot cpus!). Will be fixed in 3.11
*/
#ifdef CONFIG_X86
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. The boot CPU already has it initialized but no harm
* in doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
#endif
current_set_polling();
arch_cpu_idle_prepare();
cpu_idle_loop();
}
/*
* Generic idle loop implementation
*/
static void cpu_idle_loop(void)
{
while (1) {
tick_nohz_idle_enter();
while (!need_resched()) { // 检查是否需要调度
check_pgt_cache();
rmb();
if (cpu_is_offline(smp_processor_id()))
arch_cpu_idle_dead();
local_irq_disable();
arch_cpu_idle_enter();
/*
* In poll mode we reenable interrupts and spin.
*
* Also if we detected in the wakeup from idle
* path that the tick broadcast device expired
* for us, we don't want to go deep idle as we
* know that the IPI is going to arrive right
* away
*/
if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
cpu_idle_poll();
} else {
current_clr_polling();
if (!need_resched()) {
stop_critical_timings();
rcu_idle_enter();
arch_cpu_idle();
WARN_ON_ONCE(irqs_disabled());
rcu_idle_exit();
start_critical_timings();
} else {
local_irq_enable();
}
current_set_polling();
}
arch_cpu_idle_exit();
}
tick_nohz_idle_exit();
schedule_preempt_disabled(); // 检查是否需要调度其他进程
}
}
此时如上就是每个cpu都会被调度到的idle的进程主要就是检查是否有需要调度的进程然后进行调度。
本文主要就是了解了一下中断调度和系统调用的场景下的调度时机,有关中断和系统调用的逻辑流程本文没有深入学习(可查阅相关资料),在了解了调度时机之后在了解了有关进程调度的类型,在Linux3.10中有四种调度的类型,本文也并没有深入去分析每个调度类的具体的逻辑详细,只是简单的概述了rt的优先级和cfs公平调度的红黑树的选择下一个进场的内容,后续有机会再深入学习。由于本人才疏学浅,如有错误请批评指正。
https://zhuanlan.zhihu.com/p/79236207
https://www.cnblogs.com/hellokitty2/p/16703115.html