RTAI API: base/wd/wd.c Source File

00001 /* 00002 * Development of this (watchdog) module was sponsored by Alcatel, Strasbourg 00003 * as part of general debugging enhancements to RTAI. 00004 * 00005 * Copyright (©) 2001 Ian Soanes <ians@lineo.com>, All rights reserved 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU General Public License as 00009 * published by the Free Software Foundation; either version 2 of the 00010 * License, or (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00020 */ 00021 00022 /******************************************************************************* 00023 * 00024 * RTAI Watchdog Module 00025 * -------------------- 00026 * 00027 * Module to provide various watchdog protection services to RTAI thereby 00028 * protecting it (and the host Linux OS) against programming errors in RTAI 00029 * applications. 00030 * 00031 * Services provided... 00032 * 00033 * 1. Detection of RT tasks that exceed their alloted time period. This will 00034 * detect tasks that have gone into infinite loops or are regularly 00035 * overunning. Normally such tasks would prevent other tasks (and Linux) 00036 * from being scheduled and possibly lock and/or crash the system. 00037 * 00038 * 2. The ability to adjust important parameters when inserting the watchdog 00039 * module and from other RT modules via a simple API. 00040 * 00041 * 3. Configurable policy to use on bad tasks. Currently available policies 00042 * are... 00043 * 00044 * o Do nothing, other than log some messages and keep a record of the bad 00045 * task. In reality you will probably never get the chance to see these 00046 * messages if the task is locking out the Linux task. This policy is not 00047 * usually recommended. 00048 * 00049 * o Resynchronise the task's frame time and nothing more. This is good for 00050 * tasks that occasionally overrun. Doing this should prevent the system 00051 * from locking up and crashing as the scheduler tries to catch up with the 00052 * missed deadlines. The maximum (if any) number of times to resynchronise 00053 * a task before permanently suspending it is configurable. 00054 * 00055 * o Debug policy, this is a special case of the above resync policy. It is 00056 * recommended when step and trace debugging RT tasks that use oneshot RT 00057 * timer mode. (See README.WATCHDOG for full details) 00058 * 00059 * o Stretch (increase) the period of the offending task until it no longer 00060 * overruns. The percentage increment (of the original period) is 00061 * configurable, as is the maximum (if any) number of times to increase 00062 * the period before permanently suspending the task. When a task's period 00063 * is increased in this way the scheduler is asked to resynchronise the 00064 * task's frame time in order to prevent the system locking up and crashing 00065 * as it tries to catch up with the missed deadlines. This policy could be 00066 * a useful development aid if you are not sure what period to use for a 00067 * task. 00068 * 00069 * o Slip the offending task by forcibly suspending it for a percentage of 00070 * its period. The percentage slip is configurable, as is the maximum (if 00071 * any) number of times to slip the task before it is permanently 00072 * suspended. By slipping the task, other tasks (including Linux) are 00073 * given the oppurtunity to run and the system doesn't lock up. 00074 * 00075 * o Suspend the offending task so that it no longer poses any threat to 00076 * the system. The task will still be known to the scheduler so it could 00077 * possibly be resumed sometime later. 00078 * 00079 * o Kill the offending task and remove all trace of it. 00080 * 00081 * 4. A safety limit that will suspend any task that overruns excessively. The 00082 * definition of 'excessive' is configurable and can also be disabled. This 00083 * is designed to deal with infinite loops no matter what the current policy. 00084 * The safety limit needs to be set sufficiently high so that it doesn't 00085 * interfere with the prevailing watchdog policy. This limit is automatically 00086 * disabled when the policy is set to 'Debug' in order not to suspend RT 00087 * tasks being step and trace debugged. 00088 * 00089 * 5. Keeps a record of bad tasks (apart from those that have been killed) that 00090 * can be examined via a /proc interface. (/proc/rtai/watchdog) 00091 * 00092 * ID: @(#)$Id: wd.c,v 1.5 2005/12/06 19:38:55 mante Exp $ 00093 * 00094 *******************************************************************************/ 00095 00096 #include <linux/module.h> 00097 #include <linux/init.h> 00098 #include <linux/version.h> 00099 #include <asm/io.h> 00100 00101 #ifdef CONFIG_PROC_FS 00102 #include <linux/stat.h> 00103 #include <linux/proc_fs.h> 00104 #include <rtai_proc_fs.h> 00105 static struct proc_dir_entry *wd_proc; 00106 static int wdog_read_proc(char *page, char **start, off_t off, int count, 00107 int *eof, void *data); 00108 #endif 00109 00110 #include <asm/rtai.h> 00111 #include <rtai_sched.h> 00112 #include <rtai_wd.h> 00113 00114 // Switches on LED heartbeat and extra logging 00115 //#define WDBUG 00116 #ifdef WDBUG 00117 #define LPT_PORT 0x378 00118 #define DBUG WDLOG 00119 #else 00120 #define DBUG(x...) 00121 #endif 00122 00123 // Leave this defined if you don't want to use RTAI dynamic memory management 00124 #define MY_ALLOC 00125 #ifndef MY_ALLOC 00126 #ifdef CONFIG_RTAI_MALLOC 00127 #include <rtai_malloc.h> 00128 #else 00129 #define MY_ALLOC // Not configured so we must use our own 00130 #endif 00131 #endif 00132 #ifdef MY_ALLOC 00133 #define BAD_TASK_MAX 100 // Feel free to change this 00134 00135 static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED; 00136 static BAD_RT_TASK bad_task_pool[BAD_TASK_MAX]; 00137 #endif 00138 00139 // The current version number 00140 static char version[] = "$Revision: 1.5 $"; 00141 static char ver[10]; 00142 00143 // User friendly policy names 00144 static char *policy_name[] = 00145 {"Nothing", "Resync", "Debug", "Stretch", "Slip", "Suspend", "Kill"}; 00146 00147 // Private data 00148 static int num_wdogs; // Number of watchdogs (and task lists) 00149 static RT_TASK wdog[NR_RT_CPUS]; // Watchdog tasks (1 per RT task list) 00150 static RT_TASK *tlists[NR_RT_CPUS]; // Scheduler's RT task lists 00151 static RT_TASK **smp_current; // SMP scheduler's rt_smp_current array 00152 static BAD_RT_TASK *bad_tl[NR_RT_CPUS]; // Bad task lists (1 per watchdog) 00153 static int sched; // Scheduler type (UP, SMP or MUP) 00154 00155 // -------------------------- CONFIGURABLE PARAMETERS -------------------------- 00156 // Module parameters 00157 static int TickPeriod = 10000000; // Task period in nano seconds 00158 MODULE_PARM(TickPeriod, "i"); // (should be shorter than all others) 00159 00160 static int wd_OneShot = 1; // One shot timer mode or not (periodic) 00161 MODULE_PARM(wd_OneShot, "i"); // (should be the same as other tasks) 00162 00163 static int Grace = 3; // How much a task can be overdue 00164 MODULE_PARM(Grace, "i"); // (in periods, always 1 in some modes) 00165 00166 static int GraceDiv = 1; // Divisor to allow Gracevalues < 1 00167 MODULE_PARM(GraceDiv, "i"); // overrun = period * Grace / Gracediv 00168 00169 static int Safety = 100; // Safety net to suspend infinite loops 00170 MODULE_PARM(Safety, "i"); // (overrides policy, -ve disables) 00171 00172 static wd_policy Policy = WD_SUSPEND; // How to punish misbehavers 00173 MODULE_PARM(Policy, "i"); // (see above and header for details) 00174 00175 static int Stretch = 10; // %ge to increase period by 00176 MODULE_PARM(Stretch, "i"); // (can be over 100%, 100% is doubling) 00177 00178 static int Slip = 10; // %ge of period to slip a task 00179 MODULE_PARM(Slip, "i"); // (can be over 100%) 00180 00181 static int Limit = 100; // Maximum number of offences 00182 MODULE_PARM(Limit, "i"); // (-ve means disabled ie. no limit) 00183 00184 // Parameter configuring API 00185 int rt_wdset_grace(int new) // How much a task can be overdue 00186 { 00187 int old = Grace; 00188 00189 if (Policy <= WD_STRETCH && new != 1) return -EINVAL; 00190 if (new < 1) return -EINVAL; 00191 Grace = new; 00192 return old; 00193 } 00194 00195 int rt_wdset_gracediv(int new) // Divisor for Gracevalues < 1 00196 { 00197 int old = GraceDiv; 00198 00199 if (Policy <= WD_STRETCH && new != 1) return -EINVAL; 00200 if (new < 1) return -EINVAL; 00201 GraceDiv = new; 00202 return old; 00203 } 00204 00205 int rt_wdset_safety(int new) // Safety net to suspend infinite loops 00206 { 00207 int old = Safety; 00208 00209 if (new >= 0 && new < Grace) return -EINVAL; 00210 Safety = new; 00211 return old; 00212 } 00213 00214 wd_policy rt_wdset_policy(wd_policy new) // How to punish misbehavers 00215 { 00216 wd_policy old = Policy; 00217 00218 if (new < WD_NOTHING || new > WD_KILL) return -EINVAL; 00219 if (new <= WD_STRETCH) Grace = GraceDiv = 1; 00220 if (new == WD_DEBUG) Safety = Limit = -1; 00221 Policy = new; 00222 return old; 00223 } 00224 00225 int rt_wdset_slip(int new) // %ge of period to slip a task 00226 { 00227 int old = Slip; 00228 00229 if (new < 0) return -EINVAL; 00230 Slip = new; 00231 return old; 00232 } 00233 00234 int rt_wdset_stretch(int new) // %ge to increase period by 00235 { 00236 int old = Stretch; 00237 00238 if (new < 0) return -EINVAL; 00239 Stretch = new; 00240 return old; 00241 } 00242 00243 int rt_wdset_limit(int new) // Maximum number of offences 00244 { 00245 int old = Limit; 00246 00247 Limit = new; 00248 return old; 00249 } 00250 00251 // ----------------------------- MEMORY MANAGEMENT ----------------------------- 00252 static BAD_RT_TASK *new_bad_task(void) 00253 { 00254 #ifdef MY_ALLOC 00255 int bt; 00256 00257 spin_lock(&alloc_lock); 00258 for (bt = 0; bt < BAD_TASK_MAX; bt++) { 00259 if (!(bad_task_pool[bt].in_use)) { 00260 bad_task_pool[bt].in_use = 1; 00261 spin_unlock(&alloc_lock); 00262 return &bad_task_pool[bt]; 00263 } 00264 } 00265 spin_unlock(&alloc_lock); 00266 return NULL; 00267 #else 00268 return rt_malloc(sizeof(BAD_RT_TASK)); 00269 #endif 00270 } 00271 00272 static void free_bad_task(BAD_RT_TASK *bt) 00273 { 00274 #ifdef MY_ALLOC 00275 bt->in_use = 0; 00276 #else 00277 rt_free(bt); 00278 #endif 00279 } 00280 00281 // -------------------------- LINKED LIST FUNCTIONS ---------------------------- 00282 static void append_bad_task(BAD_RT_TASK **list, BAD_RT_TASK *new) 00283 { 00284 BAD_RT_TASK *end = *list; 00285 00286 if (!end) { 00287 *list = new; 00288 } else { 00289 while (end->next) end = end->next; 00290 end->next = new; 00291 } 00292 } 00293 00294 static BAD_RT_TASK *delete_bad_task(BAD_RT_TASK **list, BAD_RT_TASK *del) 00295 { 00296 BAD_RT_TASK *rtn, *bt = *list; 00297 00298 if (bt == del) { 00299 rtn = *list = NULL; 00300 } else { 00301 while (bt->next != del) bt = bt->next; 00302 rtn = bt->next = del->next; 00303 } 00304 free_bad_task(del); 00305 return rtn; // Next in list 00306 } 00307 00308 static BAD_RT_TASK *find_bad_task(BAD_RT_TASK *list, RT_TASK *t) 00309 { 00310 BAD_RT_TASK *bt = list; 00311 00312 while (bt) { 00313 if (bt->task == t) break; 00314 bt = bt->next; 00315 } 00316 return bt; 00317 } 00318 00319 // ------------------------- WHICH CPU IS A TASK ON? --------------------------- 00320 static int which_cpu(RT_TASK *t) // Only reliable if task suspended 00321 { 00322 int cpuid; 00323 00324 switch (sched) { 00325 case RT_SCHED_UP: // There is only one possibility 00326 return 0; 00327 case RT_SCHED_MUP: // Same as calling watchdog task 00328 return hard_cpu_id(); 00329 case RT_SCHED_SMP: // Deduce from position in list 00330 for (cpuid = 0; cpuid < NR_RT_CPUS; cpuid++) { 00331 if (t == smp_current[cpuid]) { 00332 return cpuid; 00333 } 00334 } 00335 return hard_cpu_id(); // Assume same as calling watchdog 00336 } 00337 return -1; 00338 } 00339 00340 // ----------------------- SMP PROOF SUSPEND AND DELETE ------------------------ 00341 static void smpproof_task_suspend(RT_TASK *t) 00342 { 00343 int cpuid; 00344 00345 rt_task_suspend(t); 00346 if ((cpuid = which_cpu(t)) >= num_wdogs) { // Not really suspended 00347 DBUG("Resuming dummy watchdog %d\n", cpuid); 00348 rt_task_resume(&wdog[cpuid]); // ...until we do this!! 00349 } 00350 } 00351 00352 static void smpproof_task_delete(RT_TASK *t) 00353 { 00354 int cpuid; 00355 00356 rt_task_delete(t); 00357 if ((cpuid = which_cpu(t)) >= num_wdogs) { // Not really stopped 00358 DBUG("Resuming dummy watchdog %d\n", cpuid); 00359 rt_task_resume(&wdog[cpuid]); // ...until we do this!! 00360 } 00361 } 00362 00363 // ----------------------------- POLICY FUNCTIONS ------------------------------ 00364 static void stretch_badtask(RT_TASK *t, BAD_RT_TASK *bt, int cpuid) 00365 { 00366 // Stretch the task's period and ask scheduler to resync frame time 00367 t->period += llimd(bt->orig_period, Stretch, 100); 00368 t->resync_frame = 1; 00369 DBUG( "...by %d%% to %uns\n", 00370 Stretch, (int)count2nano_cpuid(t->period, cpuid)); 00371 } 00372 00373 static void start_slipping_badtask(RT_TASK *t, BAD_RT_TASK *bt, int cpuid) 00374 { 00375 // Mark task as slipping and work out how many watchdog ticks to suspend it 00376 bt->slipping = 1; 00377 bt->countdown = llimd( llimd(count2nano_cpuid(t->period, cpuid), Slip, 100), 00378 1, 00379 TickPeriod); 00380 DBUG( "Suspending task 0x%X for %d ticks (slip %d)\n", 00381 t, bt->countdown, bt->count); 00382 00383 // Suspend task - it will get resumed later 00384 smpproof_task_suspend(t); 00385 } 00386 00387 static void check_slipping_badtask(BAD_RT_TASK *bt) 00388 { 00389 // Resume task if it's been suspended long enough 00390 if (--(bt->countdown) <= 0) { 00391 bt->slipping = 0; 00392 rt_task_resume(bt->task); 00393 DBUG("Finished slip %d of task 0x%X, resuming\n", bt->count, bt->task); 00394 } 00395 } 00396 00397 // ------------------------- FUNCTION TO DECIDE POLICY ------------------------- 00398 static void handle_badtask(int wd, RT_TASK *t, BAD_RT_TASK *bt, RTIME overrun) 00399 { 00400 // Start 'criminal record' for first time offenders 00401 if (!bt) { 00402 bt = new_bad_task(); 00403 if (!bt) return; 00404 bt->task = t; 00405 bt->next = NULL; 00406 bt->slipping = 0; 00407 bt->count = 0; 00408 bt->countdown = 0; 00409 bt->valid = 1; 00410 bt->forced = 0; 00411 bt->orig_period = t->period; 00412 append_bad_task(&bad_tl[wd], bt); 00413 } 00414 00415 // Increment offence count and note current policy 00416 (bt->count)++; 00417 bt->policy = Policy; 00418 00419 // In severe cases we must suspend regardless of current policy 00420 if ((overrun >= (Safety * bt->orig_period)) && (Safety >= 0)) { 00421 WDLOG("Forcing suspension of severely overrun task %p\n", t); 00422 bt->forced = 1; 00423 smpproof_task_suspend(t); 00424 return; 00425 } 00426 00427 // Has it pushed its luck too far? 00428 if ((bt->count >= Limit) && (Limit >= 0)) { 00429 WDLOG("Task %p reached offence limit, suspending\n", t); 00430 bt->forced = 1; 00431 smpproof_task_suspend(t); 00432 return; 00433 } 00434 00435 // What to do depends on current policy 00436 switch (Policy) { 00437 00438 case WD_NOTHING: // Hope for the best 00439 break; 00440 00441 case WD_RESYNC: // Resynchronise frame time 00442 case WD_DEBUG: // Same thing for debugging 00443 WDLOG("Resynchronising task %p\n", t); 00444 t->resync_frame = 1; 00445 break; 00446 00447 case WD_STRETCH: // Stretch the task's period 00448 WDLOG("Stretching period of task %p\n", t); 00449 stretch_badtask(t, bt, wd); 00450 break; 00451 00452 case WD_SLIP: // Suspend but arrange to resume later 00453 WDLOG("Slipping task %p\n", t); 00454 start_slipping_badtask(t, bt, wd); 00455 break; 00456 00457 case WD_SUSPEND: // Suspend the task 00458 WDLOG("Suspending task %p\n", t); 00459 smpproof_task_suspend(t); 00460 break; 00461 00462 case WD_KILL: // Delete the task 00463 WDLOG("Killing task %p\n", t); 00464 smpproof_task_delete(t); 00465 break; 00466 00467 default: // Invalid 00468 WDLOG("Invalid policy (%d)\n", Policy); 00469 break; 00470 } 00471 } 00472 00473 // -------------------------- THE MAIN WATCHDOG TASK --------------------------- 00474 static void watchdog(long wd) 00475 { 00476 #ifdef WDBUG 00477 int led = 0; 00478 static int output = 0x0000; 00479 #endif 00480 RT_TASK *task, *self = rt_whoami(); 00481 BAD_RT_TASK *bt; 00482 RTIME now, overrun; 00483 int another, dog; 00484 00485 while (1) { 00486 #ifdef WDBUG 00487 // LED heartbeat visible on parallel port 00488 led = !led; 00489 if (led) output |= (1 << wd); 00490 else output &= ~(1 << wd); 00491 outb(output, LPT_PORT); 00492 #endif 00493 // Fix any overrun of our own (step and trace debug in oneshot mode) 00494 now = rt_get_time_cpuid(wd); 00495 if (now - self->resume_time >= self->period) { 00496 self->resync_frame = 1; 00497 rt_task_wait_period(); 00498 DBUG("Resynchronised watchdog %d\n", wd); 00499 continue; 00500 } 00501 00502 // Mark all this watchdog's bad tasks as invalid ready for check 00503 for (bt = bad_tl[wd]; bt; bt->valid = 0, bt = bt->next); 00504 00505 // Loop through all the RT tasks in this watchdog's list 00506 task = tlists[wd]; 00507 while ((task = task->next)) { 00508 00509 // Ignore ourself but note another watchdog 00510 if (task == self) continue; 00511 for (another = dog = 0; dog < num_wdogs; dog++) { 00512 if (task == &wdog[dog]) { 00513 another = 1 + dog; 00514 break; 00515 } 00516 } 00517 00518 // Search for any criminal record and check slipping tasks 00519 if ((bt = find_bad_task(bad_tl[wd], task))) { 00520 bt->valid = 1; 00521 if (bt->slipping) { 00522 check_slipping_badtask(bt); 00523 continue; 00524 } 00525 } 00526 00527 // Ignore non-periodic, resyncing, suspended or blocked tasks 00528 if (!task->period || task->resync_frame || task->state & 00529 (RT_SCHED_SUSPENDED|RT_SCHED_DELAYED|RT_SCHED_SEMAPHORE|RT_SCHED_SEND|RT_SCHED_RECEIVE|RT_SCHED_RPC|RT_SCHED_RETURN)) { 00530 continue; 00531 } 00532 00533 // Check for overrun and decide what to do (ignore other watchdogs) 00534 overrun = now - task->resume_time; 00535 if (overrun >= llimd(task->period, Grace, GraceDiv)) { 00536 if (another--) { 00537 WDLOG("WARNING: Watchdog %d is overrunning\n", another); 00538 } else { 00539 WDLOG("Found overrunning task %p (list %d)\n", task, wd); 00540 handle_badtask(wd, task, bt, overrun); 00541 } 00542 } 00543 } 00544 00545 // Clean up any bad tasks still marked invalid (their RT task has gone) 00546 for (bt = bad_tl[wd]; bt;) { 00547 if (!(bt->valid)) { 00548 bt = delete_bad_task(&bad_tl[wd], bt); 00549 } else { 00550 bt = bt->next; 00551 } 00552 } 00553 00554 // Wait for next watchdog 'tick' 00555 rt_task_wait_period(); 00556 } 00557 } 00558 00559 // -------------------------- DUMMY WATCHDOG TASK ------------------------------ 00560 static void dummy(long wd) 00561 { 00562 // Go straight back to sleep - see SMP proof suspend and delete 00563 while (1) { 00564 rt_task_suspend(&wdog[wd]); 00565 } 00566 } 00567 00568 // ----------------------------- PROC INTERFACE -------------------------------- 00569 #ifdef CONFIG_PROC_FS 00570 static int wdog_read_proc(char *page, char **start, off_t off, int count, 00571 int *eof, void *data) 00572 { 00573 PROC_PRINT_VARS; 00574 RT_TASK *task; 00575 BAD_RT_TASK *bt; 00576 long onsec, osec; 00577 long ansec, asec; 00578 int cpuid, tl, id = 1; 00579 char action[10]; 00580 00581 // Heading and parameters 00582 PROC_PRINT("\nRTAI Watchdog Status\n"); 00583 PROC_PRINT( "--------------------\n"); 00584 PROC_PRINT("%d Watchdog task%s running @ %dHz in %s mode\n", 00585 num_wdogs, num_wdogs > 1 ? "s" : "", 00586 (int)imuldiv(NSECS_PER_SEC, 1, TickPeriod), 00587 wd_OneShot ? "oneshot" : "periodic"); 00588 #ifdef MY_ALLOC 00589 PROC_PRINT("Using static memory management (%d entries)\n", BAD_TASK_MAX); 00590 #else 00591 PROC_PRINT("Using dynamic memory management\n"); 00592 #endif 00593 PROC_PRINT("Policy : '%s'\n", policy_name[Policy]); 00594 PROC_PRINT("Grace periods : %d%s\n", Grace, 00595 (Policy <= WD_STRETCH) ? " (forced)" : ""); 00596 PROC_PRINT("Grace divisor : %d%s\n", GraceDiv, 00597 (Policy <= WD_STRETCH) ? " (forced)" : ""); 00598 PROC_PRINT("Safety limit : "); 00599 if (Safety < 0) { 00600 PROC_PRINT("(disabled)\n"); 00601 } else { 00602 PROC_PRINT("%d period%s\n", Safety, Safety > 1 ? "s" : ""); 00603 } 00604 PROC_PRINT("Slip factor : %d%%\n", Slip); 00605 PROC_PRINT("Stretch factor : %d%%\n", Stretch); 00606 PROC_PRINT("Offense limit : "); 00607 if (Limit < 0) { 00608 PROC_PRINT("(disabled)\n"); 00609 } else { 00610 PROC_PRINT("%d\n", Limit); 00611 } 00612 00613 // List criminal records 00614 PROC_PRINT("\nBad tasks...\n\n"); 00615 PROC_PRINT("RT Task ID " 00616 "CPU%s " 00617 "Priority State Count " 00618 "Original period Adjusted period " 00619 "Action\n", 00620 (sched == RT_SCHED_SMP) ? "s" : ""); 00621 PROC_PRINT("---------- -- " 00622 "---%s " 00623 "-------- ----- ----- " 00624 "--------------- --------------- " 00625 "---------\n", 00626 (sched == RT_SCHED_SMP) ? "-" : ""); 00627 for (tl = 0; tl < num_wdogs; tl++) { 00628 task = tlists[tl]; 00629 while ((task = task->next)) { 00630 if ((bt = find_bad_task(bad_tl[tl], task))) { 00631 if (bt->forced) { 00632 sprintf(action, "%s *", policy_name[WD_SUSPEND]); 00633 } else { 00634 strcpy(action, policy_name[bt->policy]); 00635 } 00636 cpuid = (sched == RT_SCHED_MUP) ? task->runnable_on_cpus : 0; 00637 osec = ulldiv( count2nano_cpuid(bt->orig_period, cpuid), 00638 NSECS_PER_SEC, 00639 &onsec); 00640 asec = ulldiv( count2nano_cpuid(task->period, cpuid), 00641 NSECS_PER_SEC, 00642 &ansec); 00643 PROC_PRINT( "0x%08lx %-2d " 00644 "%s%-2d%s " 00645 "%-8d 0x%-3x %-5d " 00646 "%02ds %09dns %02ds %09dns " 00647 "%s\n", 00648 (long)task, id, 00649 (sched == RT_SCHED_SMP) ? "0x" : "", 00650 (sched == RT_SCHED_UP) ? 00651 0 : (int)task->runnable_on_cpus, 00652 (sched == RT_SCHED_SMP) ? "" : " ", 00653 task->priority, task->state, bt->count, 00654 (int)osec, (int)onsec, (int)asec, (int)ansec, 00655 action); 00656 } 00657 id++; 00658 } 00659 } 00660 PROC_PRINT_DONE; 00661 } 00662 #endif 00663 00664 // ----------------------------- MODULE CONTROL -------------------------------- 00665 int __rtai_wd_init(void) 00666 { 00667 RTIME period; 00668 int dog; 00669 RT_TASK *lnx0; 00670 struct apic_timer_setup_data apic_data[NR_RT_CPUS]; 00671 char *c; 00672 00673 // Some parameters have to be forced 00674 if (Policy <= WD_STRETCH) Grace = GraceDiv = 1; 00675 if (Policy == WD_DEBUG) Safety = Limit = -1; 00676 00677 // Deduce number of watchdogs needed from scheduler type 00678 switch (sched = rt_sched_type()) { 00679 case RT_SCHED_UP : // Fall through 00680 case RT_SCHED_SMP : num_wdogs = 1; break; 00681 case RT_SCHED_MUP : num_wdogs = NR_RT_CPUS; break; 00682 } 00683 00684 // Fill array of pointers to scheduler's task lists 00685 lnx0 = rt_get_base_linux_task(tlists); 00686 00687 // Register watchdogs with scheduler (SMP returns pointer to rt_smp_current) 00688 for (dog = 0; dog < NR_RT_CPUS; dog++) { 00689 if ((smp_current = rt_register_watchdog(&wdog[dog], dog)) < 0) { 00690 WDLOG("Failed to register watchdog %d with RTAI scheduler\n", dog); 00691 for (dog--; dog >= 0; dog--) rt_deregister_watchdog(&wdog[dog], dog); 00692 return -EBUSY; 00693 } 00694 } 00695 00696 // Set up chosen timer mode - MUP lets you have different modes per CPU, 00697 // but you'll have to edit the code below a bit if that's what you want. 00698 if (sched == RT_SCHED_MUP) { 00699 for (dog = 0; dog < num_wdogs; dog++) { 00700 apic_data[dog].mode = !wd_OneShot; // <--- This bit... 00701 apic_data[dog].count = TickPeriod; 00702 if (wd_OneShot) { 00703 // rt_preempt_always_cpuid(1, dog); // <--- ...and this! 00704 } 00705 } 00706 start_rt_apic_timers(apic_data, 0); 00707 } else { 00708 if (wd_OneShot) { 00709 rt_set_oneshot_mode(); 00710 // rt_preempt_always(1); 00711 } else { 00712 rt_set_periodic_mode(); 00713 } 00714 start_rt_timer((int)nano2count(TickPeriod)); 00715 } 00716 00717 // Set up and start watchdog tasks (on separate CPUs if MP). We run as 00718 // many real watchdogs as there are task lists. However we must protect 00719 // the remaining CPUs with dummy watchdogs to prevent them being hogged 00720 // by overrunning tasks (only relevant on SMP not MUP). 00721 for (dog = 0; dog < NR_RT_CPUS; dog++) { 00722 rt_task_init_cpuid( &wdog[dog], 00723 (dog < num_wdogs) ? watchdog : dummy, 00724 dog, 2000, RT_SCHED_HIGHEST_PRIORITY, 0, 0, dog); 00725 } 00726 for (dog = 0; dog < num_wdogs; dog++) { 00727 period = nano2count_cpuid(TickPeriod, dog); 00728 rt_task_make_periodic( &wdog[dog], 00729 rt_get_time_cpuid(dog) + period, 00730 period); 00731 } 00732 00733 // Tidy up version number 00734 if ((c = strchr(version, ' '))) { 00735 *(strchr(c, '$')) = '\0'; 00736 strcpy(ver, c + 1); 00737 } else { 00738 strcpy(ver, "? "); 00739 } 00740 00741 // Log initial parameters 00742 WDLOG( "loaded.\n"); 00743 WDLOG( "%d Watchdog task%s running @ %dHz in %s mode\n", 00744 num_wdogs, num_wdogs > 1 ? "s" : "", 00745 imuldiv(NSECS_PER_SEC, 1, TickPeriod), 00746 wd_OneShot ? "oneshot" : "periodic"); 00747 #ifdef MY_ALLOC 00748 WDLOG( "Using static memory management (%d entries)\n", BAD_TASK_MAX); 00749 #else 00750 WDLOG( "Using dynamic memory management\n"); 00751 #endif 00752 WDLOG( "Policy : '%s'\n", policy_name[Policy]); 00753 WDLOG( "Grace periods : %d%s\n", Grace, 00754 (Policy <= WD_STRETCH) ? " (forced)" : ""); 00755 WDLOG( "Grace divisor : %d%s\n", GraceDiv, 00756 (Policy <= WD_STRETCH) ? " (forced)" : ""); 00757 WDLOG( "Safety limit : "); 00758 if (Safety < 0) { 00759 rt_printk("(disabled)\n"); 00760 } else { 00761 rt_printk("%d period%s\n", Safety, Safety > 1 ? "s" : " "); 00762 } 00763 WDLOG( "Slip factor : %d%%\n", Slip); 00764 WDLOG( "Stretch factor : %d%%\n", Stretch); 00765 WDLOG( "Offense limit : "); 00766 if (Limit < 0) { 00767 rt_printk("(disabled)\n"); 00768 } else { 00769 rt_printk("%d\n", Limit); 00770 } 00771 00772 #ifdef CONFIG_PROC_FS 00773 // Register /proc interface 00774 wd_proc = create_proc_entry("watchdog", 0, rtai_proc_root); 00775 wd_proc->read_proc = wdog_read_proc; 00776 #endif 00777 return 0; 00778 } 00779 00780 void __rtai_wd_exit(void) 00781 { 00782 BAD_RT_TASK *bt; 00783 int dog; 00784 00785 #ifdef CONFIG_PROC_FS 00786 // Remove /proc interface 00787 remove_proc_entry("watchdog", rtai_proc_root); 00788 #endif 00789 // Deregister all watchdogs and shutdown the timer 00790 for (dog = 0; dog < NR_RT_CPUS; dog++) { 00791 rt_deregister_watchdog(&wdog[dog], dog); 00792 } 00793 stop_rt_timer(); 00794 rt_busy_sleep(TickPeriod); 00795 00796 // Cleanup and remove all watchdogs and bad task lists 00797 for (dog = 0; dog < NR_RT_CPUS; dog++) { 00798 rt_task_delete(&wdog[dog]); 00799 if (dog < num_wdogs) { 00800 for (bt = bad_tl[dog]; bt;) { 00801 bt = delete_bad_task(&bad_tl[dog], bt); 00802 } 00803 } 00804 } 00805 00806 // It's all over :( 00807 WDLOG("unloaded.\n"); 00808 } 00809 00810 module_init(__rtai_wd_init); 00811 module_exit(__rtai_wd_exit); 00812 00813 EXPORT_SYMBOL(rt_wdset_grace); 00814 EXPORT_SYMBOL(rt_wdset_gracediv); 00815 EXPORT_SYMBOL(rt_wdset_safety); 00816 EXPORT_SYMBOL(rt_wdset_policy); 00817 EXPORT_SYMBOL(rt_wdset_slip); 00818 EXPORT_SYMBOL(rt_wdset_stretch); 00819 EXPORT_SYMBOL(rt_wdset_limit);