Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

调整pcb的sched_info和rwlock,以避免调度器死锁问题 #341

31 changes: 29 additions & 2 deletions kernel/src/libs/rwlock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ use core::{
sync::atomic::{AtomicU32, Ordering},
};

use crate::{process::ProcessManager, syscall::SystemError};
use crate::{
arch::CurrentIrqArch,
exception::{InterruptArch, IrqFlagsGuard},
process::ProcessManager,
syscall::SystemError,
};

///RwLock读写锁

Expand Down Expand Up @@ -56,6 +61,7 @@ pub struct RwLockUpgradableGuard<'a, T: 'a> {
pub struct RwLockWriteGuard<'a, T: 'a> {
data: *mut T,
inner: &'a RwLock<T>,
irq_guard: Option<IrqFlagsGuard>,
}

unsafe impl<T: Send> Send for RwLock<T> {}
Expand Down Expand Up @@ -195,6 +201,7 @@ impl<T> RwLock<T> {
return Some(RwLockWriteGuard {
data: unsafe { &mut *self.data.get() },
inner: self,
irq_guard: None,
});
} else {
return None;
Expand All @@ -213,6 +220,22 @@ impl<T> RwLock<T> {
}
}

#[allow(dead_code)]
#[inline]
/// @brief 获取WRITER守卫并关中断
pub fn write_irqsave(&self) -> RwLockWriteGuard<T> {
let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
loop {
match self.try_write() {
Some(mut guard) => {
guard.irq_guard = Some(irq_guard);
return guard;
}
None => spin_loop(),
}
}
}

#[allow(dead_code)]
#[inline]
/// @brief 尝试获得UPGRADER守卫
Expand Down Expand Up @@ -326,6 +349,7 @@ impl<'rwlock, T> RwLockUpgradableGuard<'rwlock, T> {
Ok(RwLockWriteGuard {
data: unsafe { &mut *inner.data.get() },
inner,
irq_guard: None,
})
} else {
Err(self)
Expand Down Expand Up @@ -496,7 +520,10 @@ impl<'rwlock, T> Drop for RwLockWriteGuard<'rwlock, T> {
self.inner
.lock
.fetch_and(!(WRITER | UPGRADED), Ordering::Release);

if let Some(_) = self.irq_guard {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

使用option的take方法即可

// 自动 drop 包含的 irq_guard,恢复中断
self.irq_guard = None;
}
ProcessManager::current_pcb().preempt_enable();
}
}
3 changes: 1 addition & 2 deletions kernel/src/process/idle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ impl ProcessManager {
};

assert!(idle_pcb.sched_info().on_cpu().is_none());
idle_pcb.sched_info_mut().set_on_cpu(Some(i as u32));

idle_pcb.sched_info().set_on_cpu(Some(i as u32));
v.push(idle_pcb);
}

Expand Down
78 changes: 63 additions & 15 deletions kernel/src/process/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use core::{
hash::{Hash, Hasher},
intrinsics::unlikely,
mem::ManuallyDrop,
ptr::null_mut,
sync::atomic::{compiler_fence, AtomicBool, AtomicIsize, AtomicUsize, Ordering},
sync::atomic::{compiler_fence, AtomicBool, AtomicI32, AtomicIsize, AtomicUsize, Ordering},
};

use alloc::{
Expand All @@ -14,7 +13,7 @@ use alloc::{
use hashbrown::HashMap;

use crate::{
arch::{asm::current::current_pcb, process::ArchPCBInfo},
arch::{cpu, process::ArchPCBInfo},
filesystem::vfs::{file::FileDescriptorVec, FileType},
include::bindings::bindings::CLONE_SIGNAL,
kdebug,
Expand All @@ -34,7 +33,10 @@ use crate::{
init::initial_kernel_thread,
kthread::{KernelThreadClosure, KernelThreadCreateInfo, KernelThreadMechanism},
},
sched::{core::CPU_EXECUTING, SchedPolicy, SchedPriority},
sched::{
core::{sched_enqueue, CPU_EXECUTING},
SchedPolicy, SchedPriority,
},
smp::kick_cpu,
syscall::SystemError,
};
Expand Down Expand Up @@ -399,6 +401,10 @@ impl ProcessControlBlock {
return self.sched_info.write();
}

pub fn sched_info_mut_irqsave(&self) -> RwLockWriteGuard<ProcessSchedulerInfo> {
return self.sched_info.write_irqsave();
}

pub fn worker_private(&self) -> SpinLockGuard<Option<WorkerPrivate>> {
return self.worker_private.lock();
}
Expand Down Expand Up @@ -527,10 +533,10 @@ impl ProcessBasicInfo {
#[derive(Debug)]
pub struct ProcessSchedulerInfo {
/// 当前进程所在的cpu
on_cpu: Option<u32>,
on_cpu: AtomicI32,
/// 如果当前进程等待被迁移到另一个cpu核心上(也就是flags中的PF_NEED_MIGRATE被置位),
/// 该字段存储要被迁移到的目标处理器核心号
migrate_to: Option<u32>,
migrate_to: AtomicI32,

/// 当前进程的状态
state: ProcessState,
Expand All @@ -546,9 +552,13 @@ pub struct ProcessSchedulerInfo {

impl ProcessSchedulerInfo {
pub fn new(on_cpu: Option<u32>) -> RwLock<Self> {
let cpu_id = match on_cpu {
Some(cpu_id) => cpu_id as i32,
None => -1,
};
return RwLock::new(Self {
on_cpu,
migrate_to: None,
on_cpu: AtomicI32::new(cpu_id),
migrate_to: AtomicI32::new(-1),
state: ProcessState::Blocked(false),
sched_policy: SchedPolicy::CFS,
virtual_runtime: AtomicIsize::new(0),
Expand All @@ -558,26 +568,44 @@ impl ProcessSchedulerInfo {
}

pub fn on_cpu(&self) -> Option<u32> {
return self.on_cpu;
let on_cpu = self.on_cpu.load(Ordering::SeqCst);
if on_cpu == -1 {
return None;
} else {
return Some(on_cpu as u32);
}
}

pub fn set_on_cpu(&mut self, on_cpu: Option<u32>) {
self.on_cpu = on_cpu;
pub fn set_on_cpu(&self, on_cpu: Option<u32>) {
if let Some(cpu_id) = on_cpu {
self.on_cpu.store(cpu_id as i32, Ordering::SeqCst);
} else {
self.on_cpu.store(-1, Ordering::SeqCst);
}
}

pub fn migrate_to(&self) -> Option<u32> {
return self.migrate_to;
let migrate_to = self.migrate_to.load(Ordering::SeqCst);
if migrate_to == -1 {
return None;
} else {
return Some(migrate_to as u32);
}
}

pub fn set_migrate_to(&mut self, migrate_to: Option<u32>) {
self.migrate_to = migrate_to;
pub fn set_migrate_to(&self, migrate_to: Option<u32>) {
if let Some(data) = migrate_to {
self.migrate_to.store(data as i32, Ordering::SeqCst);
} else {
self.migrate_to.store(-1, Ordering::SeqCst)
}
}

pub fn state(&self) -> ProcessState {
return self.state;
}

fn set_state(&mut self, state: ProcessState) {
pub fn set_state(&mut self, state: ProcessState) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里还没改哈哈

self.state = state;
}

Expand Down Expand Up @@ -711,3 +739,23 @@ impl Drop for KernelStack {
pub fn process_init() {
ProcessManager::init();
}

pub fn process_wakeup(pcb: Arc<ProcessControlBlock>) {
// c版本代码
// BUG_ON(pcb == NULL);
// if (pcb == NULL)
// return -EINVAL;
// // 如果pcb正在调度队列中,则不重复加入调度队列
// if (pcb->state & PROC_RUNNING)
// return 0;

// pcb->state |= PROC_RUNNING;
// sched_enqueue_old(pcb, true);
// return 0;

if pcb.sched_info().state() != ProcessState::Runnable {
pcb.sched_info_mut_irqsave()
.set_state(ProcessState::Runnable);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. 这里的代码移动到Processmanager里面的wakeup
  2. 这里有问题,更改state的工作,不应该由wakeup函数负责,而是在sched_enqueue里面改才会更合理

然后的话,获得写锁之后一定要再次确认它的state是否真的不为Runnable。
因为内核是处处存在并发的,很有可能,756、757两行之间,另外一个cpu加上了锁,然后改了state。只有写锁才是排它的。因此需要二次确认

sched_enqueue(pcb, true);
}
}
22 changes: 8 additions & 14 deletions kernel/src/sched/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@ use core::sync::atomic::compiler_fence;
use alloc::{sync::Arc, vec::Vec};

use crate::{
arch::{asm::current::current_pcb, cpu::current_cpu_id},
arch::cpu::current_cpu_id,
include::bindings::bindings::process_control_block,
include::bindings::bindings::smp_get_total_cpu,
include::bindings::bindings::{
process_control_block, MAX_CPU_NUM, PF_NEED_MIGRATE, SCHED_FIFO, SCHED_NORMAL, SCHED_RR,
},
kinfo,
mm::percpu::PerCpu,
process::{AtomicPid, Pid, ProcessControlBlock, ProcessFlags, ProcessManager},
syscall::SystemError,
process::{AtomicPid, Pid, ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState},
};

use super::rt::{sched_rt_init, SchedulerRT, __get_rt_scheduler};
Expand Down Expand Up @@ -67,7 +64,7 @@ pub fn loads_balance(pcb: Arc<ProcessControlBlock>) {
&& !pcb.flags().contains(ProcessFlags::NEED_MIGRATE))
{
pcb.flags().insert(ProcessFlags::NEED_MIGRATE);
pcb.sched_info_mut().set_migrate_to(Some(min_loads_cpu_id));
pcb.sched_info().set_migrate_to(Some(min_loads_cpu_id));
// kdebug!("set migrating, pcb:{:?}", pcb);
}
}
Expand Down Expand Up @@ -119,11 +116,9 @@ pub extern "C" fn sched_enqueue_old(pcb: &'static mut process_control_block, mut
/// @param reset_time 是否重置虚拟运行时间
pub fn sched_enqueue(pcb: Arc<ProcessControlBlock>, mut reset_time: bool) {
compiler_fence(core::sync::atomic::Ordering::SeqCst);

// 调度器不处理running位为0的进程,pcb重构后处理?
// if pcb.state & (PROC_RUNNING as u64) == 0 {
// return;
// }
if pcb.sched_info().state() != ProcessState::Runnable {
return;
}
let cfs_scheduler = __get_cfs_scheduler();
let rt_scheduler = __get_rt_scheduler();

Expand All @@ -135,8 +130,7 @@ pub fn sched_enqueue(pcb: Arc<ProcessControlBlock>, mut reset_time: bool) {
if pcb.flags().contains(ProcessFlags::NEED_MIGRATE) {
// kdebug!("migrating pcb:{:?}", pcb);
pcb.flags().remove(ProcessFlags::NEED_MIGRATE);
pcb.sched_info_mut()
.set_on_cpu(pcb.sched_info().migrate_to());
pcb.sched_info().set_on_cpu(pcb.sched_info().migrate_to());
reset_time = true;
}

Expand Down
2 changes: 1 addition & 1 deletion kernel/src/sched/syscall.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
arch::{asm::current::current_pcb, context::switch_process, CurrentIrqArch},
arch::CurrentIrqArch,
exception::InterruptArch,
process::ProcessManager,
syscall::{Syscall, SystemError},
Expand Down
Loading