当前位置:Linux教程 - Linux资讯 - Linux内核抢占补丁的基本原理

Linux内核抢占补丁的基本原理

  作者 jklCPU在内核中运行时并不是处处不可抢占的,内核中存在一些空隙,在这时进行抢占是安全的,内核抢占补丁的基本原理就是将SMP可并行的代码段看成是可以进行内核抢占的区域。2.4内核正好细化了多CPU下的内核线程同步机构,对不可并行的指令块用spinlock和rwlock作了细致的表示,该补丁的实现可谓水到渠成。具体的方法就是在进程的任务结构上增加一个preempt_count变量作为内核抢占锁,它随着spinlock和rwlock一起加锁和解锁。当preempt_count为0时表示可以进行内核调度。内核调度器的入口为preempt_schedule(),它将当前进程标记为TASK_PREEMPTED状态再调用schedule(),在TASK_PREEMPTED状态,schedule()不会将进程从运行队列中删除。下面是内核抢占补丁的主要代码示意:arch/i386/kernel/entry.S:preempt_count = 4 # 将task_strUCt中的flags用作preempt_count,flags被移到了别的位置ret_from_exception: # 从异常返回#ifdef CONFIG_SMPGET_CURRENT(%ebx)movl processor(%ebx),%eaxshll $CONFIG_X86_L1_CACHE_SHIFT,%eaxmovl SYMBOL_NAME(irq_stat)(,%eax),%ecx # softirq_activetestl SYMBOL_NAME(irq_stat)+4(,%eax),%ecx # softirq_mask#elsemovl SYMBOL_NAME(irq_stat),%ecx # softirq_activetestl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask#endifjne handle_softirq#ifdef CONFIG_PREEMPTcliincl preempt_count(%ebx) # 异常的入口没有禁止内核调度的指令,与ret_from_intr匹配一下#endifENTRY(ret_from_intr) # 硬件中断的返回GET_CURRENT(%ebx)#ifdef CONFIG_PREEMPTclidecl preempt_count(%ebx) # 恢复内核抢占标志#endifmovl EFLAGS(%esp),%eax # mix EFLAGS and CSmovb CS(%esp),%altestl $(VM_MASK 3),%eax # return to VM86 mode or non-supervisor?jne ret_with_reschedule#ifdef CONFIG_PREEMPTcmpl $0,preempt_count(%ebx)jnz restore_all # 如果preempt_count非零则表示禁止内核抢占cmpl $0,need_resched(%ebx)jz restore_all #movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecxaddl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecxjnz restore_allincl preempt_count(%ebx)sticall SYMBOL_NAME(preempt_schedule)jmp ret_from_intr # 新进程返回,返回ret_from_intr恢复抢占标志后再返回#elsejmp restore_all#endifALIGNhandle_softirq:#ifdef CONFIG_PREEMPTcliGET_CURRENT(%ebx)incl preempt_count(%ebx)sti#endifcall SYMBOL_NAME(do_softirq)jmp ret_from_intrALIGNreschedule:call SYMBOL_NAME(schedule) # testjmp ret_from_sys_callinclude/asm/hw_irq.h:...#ifdef CONFIG_PREEMPT#define BUMP_CONTEX_SWITCH_LOCK \GET_CURRENT \"incl 4(%ebx)\n\t"#else#define BUMP_CONTEX_SWITCH_LOCK#endif#define SAVE_ALL \ 硬件中断保护入口现场"cld\n\t" \"pushl %es\n\t" \"pushl %ds\n\t" \"pushl %eax\n\t" \"pushl %ebp\n\t" \"pushl %edi\n\t" \"pushl %esi\n\t" \"pushl %edx\n\t" \"pushl %ecx\n\t" \"pushl %ebx\n\t" \"movl $" STR(__KERNEL_DS) ",%edx\n\t" \"movl %edx,%ds\n\t" \"movl %edx,%es\n\t" \BUMP_CONTEX_SWITCH_LOCK # 硬件中断的入口禁止内核抢占include/Linux/spinlock.h:#ifdef CONFIG_PREEMPT#define switch_lock_count() current->preempt_count#define in_ctx_sw_off() (switch_lock_count().counter) 判断当前进程的抢占计数是否非零#define atomic_ptr_in_ctx_sw_off() (&switch_lock_count())#define ctx_sw_off() \ 禁止内核抢占do { \atomic_inc(atomic_ptr_in_ctx_sw_off()); \ 当前进程的内核抢占计数增1} while (0)#define ctx_sw_on_no_preempt() \ 允许内核抢占do { \atomic_dec(atomic_ptr_in_ctx_sw_off()); \ 当前进程的内核抢占计数减1} while (0)#define ctx_sw_on() \ 允许并完成内核抢占do { \if (atomic_dec_and_test(atomic_ptr_in_ctx_sw_off()) && \current->need_resched) \preempt_schedule(); \} while (0)#define spin_lock(lock) \do { \ctx_sw_off(); \ 进入自旋锁时禁止抢占_raw_spin_lock(lock); \} while(0)#define spin_trylock(lock) ({ctx_sw_off(); _raw_spin_trylock(lock) ? \锁定并测试原来是否上锁1 : ({ctx_sw_on(); 0;});})#define spin_unlock(lock) \do { \_raw_spin_unlock(lock); \ctx_sw_on(); \ 离开自旋锁时允许并完成内核抢占} while (0)#define read_lock(lock) ({ctx_sw_off(); _raw_read_lock(lock);})#define read_unlock(lock) ({_raw_read_unlock(lock); ctx_sw_on();})#define write_lock(lock) ({ctx_sw_off(); _raw_write_lock(lock);})#define write_unlock(lock) ({_raw_write_unlock(lock); ctx_sw_on();})#define write_trylock(lock) ({ctx_sw_off(); _raw_write_trylock(lock) ? \1 : ({ctx_sw_on(); 0;});})...include/asm/softirq.h:#define cpu_bh_disable(cpu) do { ctx_sw_off(); local_bh_count(cpu)++; barrier(); } while (0)#define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--;ctx_sw_on(); } while (0)kernel/schedule.c:#ifdef CONFIG_PREEMPTasmlinkage void preempt_schedule(void){while (current->need_resched) {ctx_sw_off();current->state = TASK_PREEMPTED;schedule();current->state &= ~TASK_PREEMPTED;ctx_sw_on_no_preempt();}}#endifasmlinkage void schedule(void){struct schedule_data * sched_data;struct task_struct *prev, *next, *p;struct list_head *tmp;int this_cpu, c;#ifdef CONFIG_PREEMPTctx_sw_off();#endifif (!
[1] [2] [3] 下一页 

current->active_mm) BUG();need_resched_back:prev = current;this_cpu = prev->processor;if (in_interrupt())goto scheduling_in_interrupt;release_kernel_lock(prev, this_cpu);/* Do "administrative" work here while we don't hold any locks */if (softirq_active(this_cpu) & softirq_mask(this_cpu))goto handle_softirq;handle_softirq_back:/** 'sched_data' is protected by the fact that we can run* only one process per CPU.*/sched_data = & aligned_data[this_cpu].schedule_data;spin_lock_irq(&runqueue_lock);/* move an exhausted RR process to be last.. */if (prev->policy == SCHED_RR)goto move_rr_last;move_rr_back:switch (prev->state) {case TASK_INTERRUPTIBLE:if (signal_pending(prev)) {prev->state = TASK_RUNNING;break;}default:#ifdef CONFIG_PREEMPTif (prev->state & TASK_PREEMPTED)break; 如果是内核抢占调度,则保留运行队列#endifdel_from_runqueue(prev);#ifdef CONFIG_PREEMPTcase TASK_PREEMPTED:#endifcase TASK_RUNNING:}prev->need_resched = 0;/** this is the scheduler proper:*/repeat_schedule:/** Default process to select..*/next = idle_task(this_cpu);c = -1000;if (task_on_runqueue(prev))goto still_running;still_running_back:list_for_each(tmp, &runqueue_head) {p = list_entry(tmp, struct task_struct, run_list);if (can_schedule(p, this_cpu)) {int weight = goodness(p, this_cpu, prev->active_mm);if (weight > c)c = weight, next = p;}}/* Do we need to re-calculate counters? */if (!c)goto recalculate;/** from this point on nothing can prevent us from* switching to the next task, save this fact in* sched_data.*/sched_data->curr = next;#ifdef CONFIG_SMPnext->has_cpu = 1;next->processor = this_cpu;#endifspin_unlock_irq(&runqueue_lock);if (prev == next)goto same_process;#ifdef CONFIG_SMP/** maintain the per-process 'last schedule' value.* (this has to be recalculated even if we reschedule to* the same process) Currently this is only used on SMP,* and it's approximate, so we do not have to maintain* it while holding the runqueue spinlock.*/sched_data->last_schedule = get_cycles();/** We drop the scheduler lock early (it's a global spinlock),* thus we have to lock the previous process from getting* rescheduled during switch_to().*/#endif /* CONFIG_SMP */kstat.context_swtch++;/** there are 3 processes which are affected by a context switch:** prev == .... ==> (last => next)** It's the 'much more previous' 'prev' that is on next's stack,* but prev is set to (the just run) 'last' process by switch_to().* This might sound slightly confusing but makes tons of sense.*/prepare_to_switch();{struct mm_struct *mm = next->mm;struct mm_struct *oldmm = prev->active_mm;if (!
上一页 [1] [2] [3] 下一页 

mm) {if (next->active_mm) BUG();next->active_mm = oldmm;atomic_inc(&oldmm->mm_count);enter_lazy_tlb(oldmm, next, this_cpu);} else {if (next->active_mm != mm) BUG();switch_mm(oldmm, mm, next, this_cpu);}if (!prev->mm) {prev->active_mm = NULL;mmdrop(oldmm);}}/** This just switches the register state and the* stack.*/switch_to(prev, next, prev);__schedule_tail(prev);same_process:reacquire_kernel_lock(current);if (current->need_resched)goto need_resched_back;#ifdef CONFIG_PREEMPTctx_sw_on_no_preempt();#endifreturn;recalculate:{struct task_struct *p;spin_unlock_irq(&runqueue_lock);read_lock(&tasklist_lock);for_each_task(p)p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);read_unlock(&tasklist_lock);spin_lock_irq(&runqueue_lock);}goto repeat_schedule;still_running:c = goodness(prev, this_cpu, prev->active_mm);next = prev;goto still_running_back;handle_softirq:do_softirq();goto handle_softirq_back;move_rr_last:if (!prev->counter) {prev->counter = NICE_TO_TICKS(prev->nice);move_last_runqueue(prev);}goto move_rr_back;scheduling_in_interrupt:printk("Scheduling in interrupt\n");BUG();return;}void schedule_tail(struct task_struct *prev){__schedule_tail(prev);#ifdef CONFIG_PREEMPTctx_sw_on();#endif}

(出处:http://www.sheup.com)


上一页 [1] [2] [3]