base/wd/wd.c

Go to the documentation of this file.
00001 /*
00002  * Development of this (watchdog) module was sponsored by Alcatel, Strasbourg
00003  * as part of general debugging enhancements to RTAI.
00004  *
00005  * Copyright (©) 2001 Ian Soanes <ians@lineo.com>, All rights reserved
00006  *
00007  * Rechecked and updated 2009: Kenneth Jacker   <khj@cs.appstate.edu>
00008  *                             Paolo Mantegazza <mantegazza@aero.polimi.it>
00009  *
00010  * This program is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU General Public License as
00012  * published by the Free Software Foundation; either version 2 of the
00013  * License, or (at your option) any later version.
00014  *
00015  * This program is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License
00021  * along with this program; if not, write to the Free Software
00022  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00023  */
00024 
00025 /*******************************************************************************
00026  *
00027  *                           RTAI Watchdog Module
00028  *                           --------------------
00029  *
00030  * Module to provide various watchdog protection services to RTAI thereby 
00031  * protecting it (and the host Linux OS) against programming errors in RTAI
00032  * applications.
00033  *
00034  * Services provided...
00035  *
00036  * 1. Detection of RT tasks that exceed their alloted time period. This will
00037  *    detect tasks that have gone into infinite loops or are regularly
00038  *    overunning. Normally such tasks would prevent other tasks (and Linux)
00039  *    from being scheduled and possibly lock and/or crash the system.
00040  *
00041  * 2. The ability to adjust important parameters when inserting the watchdog 
00042  *    module and from other RT modules via a simple API.
00043  *
00044  * 3. Configurable policy to use on bad tasks. Currently available policies 
00045  *    are...
00046  *
00047  *    o Do nothing, other than log some messages and keep a record of the bad 
00048  *      task. In reality you will probably never get the chance to see these 
00049  *      messages if the task is locking out the Linux task. This policy is not 
00050  *      usually recommended.
00051  *
00052  *    o Resynchronise the task's frame time and nothing more. This is good for
00053  *      tasks that occasionally overrun. Doing this should prevent the system 
00054  *      from locking up and crashing as the scheduler tries to catch up with the
00055  *      missed deadlines. The maximum (if any) number of times to resynchronise 
00056  *      a task before permanently suspending it is configurable. 
00057  *
00058  *    o Debug policy, this is a special case of the above resync policy. It is 
00059  *      recommended when step and trace debugging RT tasks that use oneshot RT 
00060  *      timer mode. (See README.WATCHDOG for full details)
00061  *
00062  *    o Stretch (increase) the period of the offending task until it no longer
00063  *      overruns. The percentage increment (of the original period) is 
00064  *      configurable, as is the maximum (if any) number of times to increase 
00065  *      the period before permanently suspending the task. When a task's period 
00066  *      is increased in this way the scheduler is asked to resynchronise the 
00067  *      task's frame time in order to prevent the system locking up and crashing
00068  *      as it tries to catch up with the missed deadlines. This policy could be 
00069  *      a useful development aid if you are not sure what period to use for a 
00070  *      task.
00071  *
00072  *    o Slip the offending task by forcibly suspending it for a percentage of
00073  *      its period. The percentage slip is configurable, as is the maximum (if
00074  *      any) number of times to slip the task before it is permanently 
00075  *      suspended. By slipping the task, other tasks (including Linux) are 
00076  *      given the oppurtunity to run and the system doesn't lock up.
00077  *
00078  *    o Suspend the offending task so that it no longer poses any threat to 
00079  *      the system. The task will still be known to the scheduler so it could 
00080  *      possibly be resumed sometime later.
00081  *
00082  *    o Kill the offending task and remove all trace of it.
00083  *
00084  * 4. A safety limit that will suspend any task that overruns excessively. The
00085  *    definition of 'excessive' is configurable and can also be disabled. This 
00086  *    is designed to deal with infinite loops no matter what the current policy.
00087  *    The safety limit needs to be set sufficiently high so that it doesn't 
00088  *    interfere with the prevailing watchdog policy. This limit is automatically
00089  *    disabled when the policy is set to 'Debug' in order not to suspend RT 
00090  *    tasks being step and trace debugged.
00091  *
00092  * 5. Keeps a record of bad tasks (apart from those that have been killed) that 
00093  *    can be examined via a /proc interface. (/proc/rtai/watchdog)
00094  * 
00095  * ID: @(#)$Id: wd.c,v 1.14 2009/03/18 23:15:48 mante Exp $
00096  *
00097  *******************************************************************************/
00098 
00099 #include <linux/module.h>
00100 #include <linux/init.h>
00101 #include <linux/version.h>
00102 #include <asm/io.h>
00103 
00104 MODULE_LICENSE("GPL");
00105 
00106 #ifdef CONFIG_PROC_FS
00107 #include <linux/stat.h>
00108 #include <linux/proc_fs.h>
00109 #include <rtai_proc_fs.h>
00110 static struct proc_dir_entry *wd_proc;
00111 static int    wdog_read_proc(char *page, char **start, off_t off, int count,
00112                              int *eof, void *data);
00113 #endif
00114 
00115 #include <rtai_sched.h>
00116 #include <rtai_wd.h>
00117 
00118 // Switches on LED heartbeat and extra logging
00119 //#define WDBUG
00120 #ifdef WDBUG
00121 #define LPT_PORT 0x378
00122 #define DBUG WDLOG
00123 #else
00124 #define DBUG(x...)
00125 #endif
00126 
00127 // Leave this defined if you don't want to use RTAI dynamic memory management
00128 #define MY_ALLOC
00129 #ifndef MY_ALLOC
00130 #ifdef CONFIG_RTAI_MALLOC
00131 #include <rtai_malloc.h>
00132 #else
00133 #define MY_ALLOC        // Not configured so we must use our own
00134 #endif
00135 #endif
00136 #ifdef MY_ALLOC
00137 #define BAD_TASK_MAX 100    // Feel free to change this
00138 
00139 static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED;
00140 static BAD_RT_TASK bad_task_pool[BAD_TASK_MAX];
00141 #endif
00142 
00143 // The current version number
00144 static char version[] = "$Revision: 1.14 $";
00145 static char ver[10];
00146 
00147 // User friendly policy names
00148 static char *policy_name[] = 
00149     {"Nothing", "Resync", "Debug", "Stretch", "Slip", "Suspend", "Kill"};
00150 
00151 // Private data
00152 static int      num_wdogs;      // Number of watchdogs (and task lists)
00153 static RT_TASK      wdog[NR_RT_CPUS];   // Watchdog tasks (1 per CPU)
00154 static RT_TASK     *tlists[NR_RT_CPUS]; // Scheduler's RT task lists
00155 static BAD_RT_TASK *bad_tl[NR_RT_CPUS]; // Bad task lists (1 per watchdog)
00156 
00157 // -------------------------- CONFIGURABLE PARAMETERS --------------------------
00158 // Module parameters
00159 static int TickPeriod = 10000000;   // Task period in nano seconds
00160 RTAI_MODULE_PARM(TickPeriod, int);  // (should be shorter than all others)
00161 
00162 static int wd_OneShot = 1;      // One shot timer mode or not (periodic)
00163 RTAI_MODULE_PARM(wd_OneShot, int);  // (should be the same as other tasks)
00164 
00165 static int Grace = 3;           // How much a task can be overdue
00166 RTAI_MODULE_PARM(Grace, int);       // (in periods, always 1 in some modes)
00167 
00168 static int GraceDiv = 1;        // Divisor to allow Gracevalues < 1
00169 RTAI_MODULE_PARM(GraceDiv, int);    // overrun = period * Grace / Gracediv
00170 
00171 static int Safety = 100;        // Safety net to suspend infinite loops
00172 RTAI_MODULE_PARM(Safety, int);      // (overrides policy, -ve disables)
00173 
00174 static int Policy = WD_SUSPEND;     // How to punish misbehavers
00175 RTAI_MODULE_PARM(Policy, int);      // (see above and header for details)
00176 
00177 static int Stretch = 10;        // %ge to increase period by
00178 RTAI_MODULE_PARM(Stretch, int);     // (can be over 100%, 100% is doubling)
00179 
00180 static int Slip = 10;           // %ge of period to slip a task
00181 RTAI_MODULE_PARM(Slip, int);        // (can be over 100%)
00182 
00183 static int Limit = 100;         // Maximum number of offences
00184 RTAI_MODULE_PARM(Limit, int);       // (-ve means disabled ie. no limit)
00185 
00186 static int LooperTimeLimit = 100;   // Maximum looper time ms
00187 RTAI_MODULE_PARM(LooperTimeLimit, int); // (care it combines with wd period)
00188 static int LooperLimit;
00189 
00190 // Parameter configuring API
00191 RTAI_SYSCALL_MODE int rt_wdset_grace(int new) // How much a task can be overdue
00192 {
00193     int old = Grace;
00194 
00195     if (Policy <= WD_STRETCH && new != 1)   return -EINVAL;
00196     if (new < 1)                return -EINVAL;
00197     Grace = new;
00198     return old;
00199 }
00200 
00201 RTAI_SYSCALL_MODE int rt_wdset_gracediv(int new) // Divisor for Gracevalues < 1
00202 {
00203     int old = GraceDiv;
00204 
00205     if (Policy <= WD_STRETCH && new != 1)   return -EINVAL;
00206     if (new < 1)                return -EINVAL;
00207     GraceDiv = new;
00208     return old;
00209 }
00210 
00211 RTAI_SYSCALL_MODE int rt_wdset_safety(int new) // Safety net to suspend infinite loops
00212 {
00213     int old = Safety;
00214 
00215     if (new >= 0 && new < Grace)        return -EINVAL;
00216     Safety = new;
00217     return old;
00218 }
00219 
00220 RTAI_SYSCALL_MODE wd_policy rt_wdset_policy(wd_policy new) // How to punish misbehavers
00221 {
00222     wd_policy old = Policy;
00223 
00224     if (new < WD_NOTHING || new > WD_KILL)   return -EINVAL;
00225     if (new <= WD_STRETCH)                Grace  = GraceDiv = 1;
00226     if (new == WD_DEBUG)                  Safety = Limit = -1;
00227     Policy = new;
00228     return old;
00229 }
00230 
00231 RTAI_SYSCALL_MODE int rt_wdset_slip(int new)  // %ge of period to slip a task
00232 {
00233     int old = Slip;
00234 
00235     if (new < 0)            return -EINVAL;
00236     Slip = new;
00237     return old;
00238 }
00239 
00240 RTAI_SYSCALL_MODE int rt_wdset_stretch(int new)  // %ge to increase period by
00241 {
00242     int old = Stretch;
00243 
00244     if (new < 0)            return -EINVAL;
00245     Stretch = new;
00246     return old;
00247 }
00248 
00249 RTAI_SYSCALL_MODE int rt_wdset_limit(int new)  // Maximum number of offences
00250 {
00251     int old = Limit;
00252 
00253     Limit = new;
00254     return old;
00255 }
00256 
00257 // ----------------------------- MEMORY MANAGEMENT -----------------------------
00258 static BAD_RT_TASK *new_bad_task(void)
00259 {
00260 #ifdef MY_ALLOC
00261     int     bt;
00262 
00263     spin_lock(&alloc_lock);
00264     for (bt = 0; bt < BAD_TASK_MAX; bt++) {
00265     if (!(bad_task_pool[bt].in_use)) {
00266         bad_task_pool[bt].in_use = 1;
00267         spin_unlock(&alloc_lock);
00268         return &bad_task_pool[bt];
00269     }
00270     }
00271     spin_unlock(&alloc_lock);
00272     return NULL;
00273 #else
00274     return rt_malloc(sizeof(BAD_RT_TASK));
00275 #endif
00276 }
00277 
00278 static void free_bad_task(BAD_RT_TASK *bt)
00279 {
00280 #ifdef MY_ALLOC
00281     bt->in_use = 0;
00282 #else
00283     rt_free(bt);
00284 #endif
00285 }
00286 
00287 // -------------------------- LINKED LIST FUNCTIONS ----------------------------
00288 static void append_bad_task(BAD_RT_TASK **list, BAD_RT_TASK *new)
00289 {
00290     BAD_RT_TASK *end = *list;
00291 
00292     if (!end) {
00293     *list = new;
00294     } else {
00295     while (end->next) end = end->next;
00296     end->next = new;
00297     }
00298 }
00299 
00300 static BAD_RT_TASK *delete_bad_task(BAD_RT_TASK **list, BAD_RT_TASK *del)
00301 {
00302     BAD_RT_TASK *rtn, *bt = *list;
00303 
00304     if (bt == del) {
00305     rtn = *list = NULL;
00306     } else {
00307     while (bt->next != del) bt = bt->next;
00308     rtn = bt->next = del->next;
00309     }
00310     free_bad_task(del);
00311     return rtn;     // Next in list
00312 }
00313 
00314 static BAD_RT_TASK *find_bad_task(BAD_RT_TASK *list, RT_TASK *t)
00315 {
00316     BAD_RT_TASK *bt = list;
00317 
00318     while (bt) {
00319     if (bt->task == t) break;
00320     bt = bt->next;
00321     }
00322     return bt;
00323 }
00324 
00325 // ------------------------- WHICH CPU IS A TASK ON? ---------------------------
00326 static inline int which_cpu(RT_TASK *t)
00327 {
00328     return t->runnable_on_cpus;
00329 }
00330 
00331 // ------------------------ MP PROOF SUSPEND AND DELETE ------------------------
00332 static void smpproof_task_suspend(RT_TASK *t)
00333 {
00334     int cpuid;
00335 
00336     rt_task_suspend(t);
00337     if ((cpuid = which_cpu(t)) != rtai_cpuid()) {   // Not really suspended
00338     DBUG("Resuming dummy watchdog %d\n", cpuid);
00339     rt_task_resume(&wdog[cpuid]);           // ...until we do this!
00340     }
00341 }
00342 
00343 static void smpproof_task_delete(RT_TASK *t)
00344 {
00345     int cpuid;
00346 
00347     rt_task_delete(t);
00348     if ((cpuid = which_cpu(t)) != rtai_cpuid()) {   // Not really suspended
00349     DBUG("Resuming dummy watchdog %d\n", cpuid);
00350     rt_task_resume(&wdog[cpuid]);           // ...until we do this!
00351     }
00352 }
00353 
00354 // ----------------------------- POLICY FUNCTIONS ------------------------------
00355 static void stretch_badtask(RT_TASK *t, BAD_RT_TASK *bt, int cpuid)
00356 {
00357     // Stretch the task's period and ask scheduler to resync frame time
00358     t->period      += llimd(bt->orig_period, Stretch, 100);
00359     t->resync_frame = 1;
00360     DBUG( "...by %d%% to %uns\n", 
00361       Stretch, (int)count2nano_cpuid(t->period, cpuid));
00362 }
00363 
00364 static void start_slipping_badtask(RT_TASK *t, BAD_RT_TASK *bt, int cpuid)
00365 {
00366     // Mark task as slipping and work out how many watchdog ticks to suspend it
00367     bt->slipping  = 1;
00368     bt->countdown = llimd( llimd(count2nano_cpuid(t->period, cpuid), Slip, 100), 
00369                    1, 
00370                TickPeriod);
00371     DBUG( "Suspending task 0x%X for %d ticks (slip %d)\n", 
00372       t, bt->countdown, bt->count);
00373 
00374     // Suspend task - it will get resumed later
00375     smpproof_task_suspend(t);
00376 }
00377 
00378 static void check_slipping_badtask(BAD_RT_TASK *bt)
00379 {
00380     // Resume task if it's been suspended long enough
00381     if (--(bt->countdown) <= 0) {
00382     bt->slipping = 0;
00383     rt_task_resume(bt->task);
00384     DBUG("Finished slip %d of task 0x%X, resuming\n", bt->count, bt->task);
00385     }
00386 }
00387 
00388 // ------------------------- FUNCTION TO DECIDE POLICY -------------------------
00389 static void handle_badtask(int wd, RT_TASK *t, BAD_RT_TASK *bt, RTIME overrun)
00390 {
00391     // Start 'criminal record' for first time offenders
00392     if (!bt) {
00393     bt = new_bad_task();
00394     if (!bt) return;
00395     bt->task        = t;
00396     bt->next        = NULL;
00397     bt->slipping    = 0;
00398     bt->count       = 0;
00399     bt->countdown   = 0;
00400     bt->valid       = 1;
00401     bt->forced      = 0;
00402     bt->orig_period = t->period;
00403     append_bad_task(&bad_tl[wd], bt);
00404     }
00405 
00406     // Increment offence count and note current policy
00407     (bt->count)++;
00408     bt->policy = Policy;
00409 
00410     // Pure loopers must be suspend always
00411     if (!overrun) {
00412     bt->count = - LooperTimeLimit;
00413     bt->forced = 1;
00414         bt->policy = WD_SUSPEND;
00415     bt->orig_period = 0;
00416     WDLOG("Suspending task %p\n", t);
00417     smpproof_task_suspend(t);  
00418     return;
00419     }
00420 
00421     // In severe cases we must suspend regardless of current policy
00422     if ((overrun >= (Safety * bt->orig_period)) && (Safety >= 0)) {
00423     WDLOG("Forcing suspension of severely overrun task %p\n", t);
00424     bt->forced = 1;
00425     smpproof_task_suspend(t);  
00426     return;
00427     }
00428 
00429     // Has it pushed its luck too far?
00430     if ((bt->count >= Limit) && (Limit >= 0)) {
00431     WDLOG("Task %p reached offence limit, suspending\n", t);
00432     bt->forced = 1;
00433     smpproof_task_suspend(t);
00434     return;
00435     }
00436 
00437     // What to do depends on current policy
00438     switch (Policy) {
00439 
00440     case WD_NOTHING:    // Hope for the best
00441             break;
00442 
00443     case WD_RESYNC: // Resynchronise frame time
00444     case WD_DEBUG:  // Same thing for debugging
00445         WDLOG("Resynchronising task %p\n", t);
00446         t->resync_frame = 1;
00447         break;
00448 
00449     case WD_STRETCH:    // Stretch the task's period
00450         WDLOG("Stretching period of task %p\n", t);
00451         stretch_badtask(t, bt, wd);
00452         break;
00453 
00454     case WD_SLIP:       // Suspend but arrange to resume later
00455         WDLOG("Slipping task %p\n", t);
00456         start_slipping_badtask(t, bt, wd);
00457         break;
00458 
00459     case WD_SUSPEND:    // Suspend the task
00460         WDLOG("Suspending task %p\n", t);
00461         smpproof_task_suspend(t);  
00462         break;
00463 
00464     case WD_KILL:       // Delete the task
00465         WDLOG("Killing task %p\n", t);
00466         smpproof_task_delete(t);   
00467         break;
00468 
00469     default:        // Invalid
00470         WDLOG("Invalid policy (%d)\n", Policy);
00471         break;
00472     }
00473 }
00474 
00475 static void watch_looper(int cpuid, void *self, BAD_RT_TASK *bt)
00476 {
00477     extern RT_TASK rt_smp_linux_task[];
00478     extern RT_TASK *lxrt_prev_task[];
00479     static RT_TASK *prev_task[NR_RT_CPUS];     // task we preempted
00480     static int     prev_task_cnt[NR_RT_CPUS];  // count the same preempted
00481     RT_TASK *prev = lxrt_prev_task[cpuid];
00482     if (prev == prev_task[cpuid] && prev != &rt_smp_linux_task[cpuid] && prev != self && !prev->resync_frame && !prev->period) {
00483         if (++prev_task_cnt[cpuid] == LooperLimit) {
00484             WDLOG("Found looper task %p (list %d)\n", prev, cpuid);
00485             handle_badtask(cpuid, prev, bt, 0);
00486         }
00487     } else {
00488         prev_task[cpuid] = prev;
00489         prev_task_cnt[cpuid] = 0;
00490     }
00491 }
00492 
00493 // -------------------------- THE MAIN WATCHDOG TASK ---------------------------
00494 static void watchdog(long wd)
00495 {
00496 #ifdef WDBUG
00497     int      led    = 0;
00498     static int   output = 0x0000;
00499 #endif
00500     RT_TASK     *task, *self = rt_whoami();
00501     BAD_RT_TASK *bt;
00502     RTIME    now, overrun;
00503     int      another, dog;
00504 
00505     while (1) {
00506 #ifdef WDBUG
00507     // LED heartbeat visible on parallel port
00508     led = !led;
00509     if (led) output |=  (1 << wd); 
00510     else     output &= ~(1 << wd);
00511     outb(output, LPT_PORT);
00512 #endif
00513     // Fix any overrun of our own (step and trace debug in oneshot mode)
00514     now = rt_get_time_cpuid(wd);
00515     if (now - self->resume_time >= self->period) {
00516         self->resync_frame = 1;
00517         rt_task_wait_period();
00518         DBUG("Resynchronised watchdog %d\n", wd);
00519         continue;
00520     }
00521 
00522     // Mark all this watchdog's bad tasks as invalid ready for check
00523     for (bt = bad_tl[wd]; bt; bt->valid = 0, bt = bt->next);
00524 
00525     // Loop through all the RT tasks in this watchdog's list
00526     task = tlists[wd];
00527     while ((task = task->next)) {
00528 
00529         // Ignore ourself but note another watchdog
00530         if (task == self) continue;
00531         for (another = dog = 0; dog < num_wdogs; dog++) {
00532         if (task == &wdog[dog]) {
00533             another = 1 + dog;
00534             break;
00535         }
00536         }
00537 
00538         // Search for any criminal record and check slipping tasks
00539         if ((bt = find_bad_task(bad_tl[wd], task))) {
00540         bt->valid = 1;
00541         if (bt->slipping) {
00542             check_slipping_badtask(bt);
00543             continue;
00544         }
00545         }
00546 
00547         // Ignore non-periodic, resyncing, suspended or blocked tasks
00548         if (!task->period || task->resync_frame || task->state & 
00549             (RT_SCHED_SUSPENDED|RT_SCHED_DELAYED|RT_SCHED_SEMAPHORE|RT_SCHED_SEND|RT_SCHED_RECEIVE|RT_SCHED_RPC|RT_SCHED_RETURN)) {
00550         continue;
00551         }
00552 
00553         // Check for overrun and decide what to do (ignore other watchdogs)
00554         overrun = now - task->resume_time;
00555         if (overrun >= llimd(task->period, Grace, GraceDiv)) {
00556         if (another--) {
00557             WDLOG("WARNING: Watchdog %d is overrunning\n", another);
00558         } else {
00559             WDLOG("Found overrunning task %p (list %d)\n", task, wd);
00560             handle_badtask(wd, task, bt, overrun);
00561         }
00562         }
00563     }
00564 
00565     watch_looper(wd, self, bt);
00566 
00567     // Clean up any bad tasks still marked invalid (their RT task has gone)
00568     for (bt = bad_tl[wd]; bt;) {
00569         if (!(bt->valid)) {
00570         bt = delete_bad_task(&bad_tl[wd], bt);
00571         } else {
00572         bt = bt->next;
00573         }
00574     }
00575 
00576     // Wait for next watchdog 'tick'
00577     rt_task_wait_period();
00578     }
00579 }
00580 
00581 // -------------------------- DUMMY WATCHDOG TASK ------------------------------
00582 static void dummy(long wd)
00583 {
00584     // Go straight back to sleep - old legacy SMP proof suspend and delete
00585     while (1) {
00586     rt_task_suspend(&wdog[wd]);
00587     }
00588 }
00589 
00590 // ----------------------------- PROC INTERFACE --------------------------------
00591 #ifdef CONFIG_PROC_FS
00592 static int wdog_read_proc(char *page, char **start, off_t off, int count,
00593                           int *eof, void *data)
00594 {
00595     PROC_PRINT_VARS;
00596     RT_TASK *task;
00597     BAD_RT_TASK *bt;
00598     long     onsec, osec;
00599     long     ansec, asec;
00600     int      cpuid, tl, id = 1;
00601     char     action[10];
00602 
00603     // Heading and parameters
00604     PROC_PRINT("\nRTAI Watchdog Status\n");
00605     PROC_PRINT(  "--------------------\n");
00606     PROC_PRINT("%d Watchdog task%s running @ %dHz in %s mode\n", 
00607            num_wdogs, num_wdogs > 1 ? "s" : "",
00608            (int)imuldiv(NSECS_PER_SEC, 1, TickPeriod), 
00609            wd_OneShot ? "oneshot" : "periodic");
00610 #ifdef MY_ALLOC
00611     PROC_PRINT("Using static memory management (%d entries)\n", BAD_TASK_MAX);
00612 #else
00613     PROC_PRINT("Using dynamic memory management\n");
00614 #endif
00615     PROC_PRINT("Policy         : '%s'\n", policy_name[Policy]);
00616     PROC_PRINT("Grace periods  : %d%s\n", Grace, 
00617            (Policy <= WD_STRETCH) ? " (forced)" : "");
00618     PROC_PRINT("Grace divisor  : %d%s\n", GraceDiv, 
00619            (Policy <= WD_STRETCH) ? " (forced)" : "");
00620     PROC_PRINT("Safety limit   : ");
00621     if (Safety < 0) {
00622     PROC_PRINT("(disabled)\n");
00623     } else {
00624     PROC_PRINT("%d period%s\n", Safety, Safety > 1 ? "s" : "");
00625     }
00626     PROC_PRINT("Slip factor    : %d%%\n", Slip);
00627     PROC_PRINT("Stretch factor : %d%%\n", Stretch);
00628     PROC_PRINT("Offense limit  : ");
00629     if (Limit < 0) {
00630     PROC_PRINT("(disabled)\n");
00631     } else {
00632     PROC_PRINT("%d\n", Limit);
00633     }
00634     PROC_PRINT("Loopers limit  : %d ms\n", LooperTimeLimit);
00635 
00636     // List criminal records
00637     PROC_PRINT("\nBad tasks...\n\n");
00638     PROC_PRINT("RT Task    ID "
00639            "CPU%s "
00640            "Priority State Count "
00641            "Original period Adjusted period "
00642            "Action\n",
00643            "");
00644     PROC_PRINT("---------- -- "
00645            "---%s "
00646            "-------- ----- ----- "
00647            "--------------- --------------- "
00648            "---------\n",
00649            "");
00650     for (tl = 0; tl < num_wdogs; tl++) {
00651     task = tlists[tl];
00652     while ((task = task->next)) {
00653         if ((bt = find_bad_task(bad_tl[tl], task))) {
00654         if (bt->forced) {
00655             sprintf(action, "%s *", policy_name[WD_SUSPEND]);
00656         } else {
00657             strcpy(action, policy_name[bt->policy]);
00658         }
00659         cpuid = task->runnable_on_cpus;
00660         osec  = ulldiv( count2nano_cpuid(bt->orig_period, cpuid),
00661                     NSECS_PER_SEC, 
00662                     &onsec);
00663         asec  = ulldiv( count2nano_cpuid(task->period, cpuid),
00664                     NSECS_PER_SEC, 
00665                     &ansec);
00666         PROC_PRINT( "0x%08lx %-2d "
00667                 "%s%-2d%s "
00668                 "%-8d 0x%-3x %-5d "
00669                 "%02ds %09dns %02ds %09dns "
00670                 "%s\n",
00671                 (long)task, id, 
00672                 "",
00673                 (int)task->runnable_on_cpus,
00674                 " ",
00675                 task->priority, task->state, bt->count,
00676                 (int)osec, (int)onsec, (int)asec, (int)ansec, 
00677                 action);
00678         }
00679         id++;
00680     }
00681     }
00682     PROC_PRINT_DONE;
00683 }
00684 #endif
00685 
00686 // ------------------------ WATCHDOG ENTRIES TABLE -----------------------------
00687 
00688 #include <rtai_lxrt.h>
00689 
00690 static struct rt_fun_entry rt_watchdog_fun[]  __attribute__ ((__unused__));
00691 
00692 static struct rt_fun_entry rt_watchdog_fun[] = {
00693         [WD_SET_GRACE]    = { 0, rt_wdset_grace },
00694         [WD_SET_GRACEDIV] = { 0, rt_wdset_gracediv },
00695         [WD_SET_SAFETY]   = { 0, rt_wdset_safety },
00696         [WD_SET_POLICY]   = { 0, rt_wdset_policy },
00697         [WD_SET_SLIP]     = { 0, rt_wdset_slip },
00698         [WD_SET_STRETCH]  = { 0, rt_wdset_stretch },
00699         [WD_SET_LIMIT]    = { 0, rt_wdset_limit }
00700 };
00701 
00702 // ----------------------------- MODULE CONTROL --------------------------------
00703 int __rtai_wd_init(void)
00704 {
00705     RTIME    period;
00706     int      dog;
00707     RT_TASK *lnx0;
00708     char    *c;
00709 
00710     if(set_rt_fun_ext_index(rt_watchdog_fun, WD_INDX)) {
00711     printk("Recompile your module with a different index\n");
00712     return -EACCES;
00713     }
00714     // Some parameters have to be forced
00715     if (Policy <= WD_STRETCH) Grace  = GraceDiv = 1;
00716     if (Policy == WD_DEBUG)   Safety = Limit = -1;
00717     LooperLimit = llimd(LooperTimeLimit, 1000000, TickPeriod);
00718 
00719     // Deduce number of watchdogs needed from scheduler type
00720     num_wdogs = num_online_cpus();
00721 
00722     // Fill array of pointers to scheduler's task lists
00723     lnx0 = rt_get_base_linux_task(tlists);
00724 
00725     // Register watchdogs with scheduler
00726     for (dog = 0; dog < num_online_cpus(); dog++) {
00727     if (rt_register_watchdog(&wdog[dog], dog) < 0) {
00728         WDLOG("Failed to register watchdog %d with RTAI scheduler\n", dog);
00729         for (dog--; dog >= 0; dog--) rt_deregister_watchdog(&wdog[dog], dog);
00730         return -EBUSY;
00731     }
00732     }
00733 
00734     // Set up chosen timer mode and hard timing
00735     if (wd_OneShot) {
00736     start_rt_timer(0);
00737     } else {
00738     rt_set_periodic_mode();
00739     start_rt_timer((int)nano2count(TickPeriod));
00740     }
00741 
00742     // Set up and start watchdog tasks (on separate CPUs if MP). We run as 
00743     // many real watchdogs as there are CPUs.
00744     for (dog = 0; dog < num_online_cpus(); dog++) {
00745     rt_task_init_cpuid(&wdog[dog], 
00746                (dog < num_wdogs) ? watchdog : dummy, 
00747                 dog, 2000, RT_SCHED_HIGHEST_PRIORITY, 0, 0, dog);
00748     }
00749     for (dog = 0; dog < num_wdogs; dog++) {
00750     period = nano2count_cpuid(TickPeriod, dog);
00751     rt_task_make_periodic(&wdog[dog], 
00752                    rt_get_time_cpuid(dog) + period, 
00753                    period);
00754     }
00755 
00756     // Tidy up version number
00757     if ((c = strchr(version, ' '))) {
00758     *(strchr(c, '$')) = '\0';
00759     strcpy(ver, c + 1);
00760     } else {
00761     strcpy(ver, "? ");
00762     }
00763 
00764     // Log initial parameters
00765     WDLOG( "loaded.\n");
00766     WDLOG( "%d Watchdog task%s running @ %dHz in %s mode\n", 
00767        num_wdogs, num_wdogs > 1 ? "s" : "",
00768        imuldiv(NSECS_PER_SEC, 1, TickPeriod), 
00769        wd_OneShot ? "oneshot" : "periodic");
00770 #ifdef MY_ALLOC
00771     WDLOG( "Using static memory management (%d entries)\n", BAD_TASK_MAX);
00772 #else
00773     WDLOG( "Using dynamic memory management\n");
00774 #endif
00775     WDLOG( "Policy         : '%s'\n", policy_name[Policy]);
00776     WDLOG( "Grace periods  : %d%s\n", Grace, 
00777        (Policy <= WD_STRETCH) ? " (forced)" : "");
00778     WDLOG( "Grace divisor  : %d%s\n", GraceDiv, 
00779        (Policy <= WD_STRETCH) ? " (forced)" : "");
00780     WDLOG( "Safety limit   : ");
00781     if (Safety < 0) {
00782     rt_printk("(disabled)\n");
00783     } else {
00784     rt_printk("%d period%s\n", Safety, Safety > 1 ? "s" : " ");
00785     }
00786     WDLOG( "Slip factor    : %d%%\n", Slip);
00787     WDLOG( "Stretch factor : %d%%\n", Stretch);
00788     WDLOG( "Offense limit  : ");
00789     if (Limit < 0) {
00790     rt_printk("(disabled)\n");
00791     } else {
00792     rt_printk("%d\n", Limit);
00793     }
00794     WDLOG( "Loopers limit  : %d ms\n", LooperTimeLimit);
00795 
00796 #ifdef CONFIG_PROC_FS
00797     // Register /proc interface
00798     wd_proc = create_proc_entry("watchdog", 0, rtai_proc_root);
00799     wd_proc->read_proc = wdog_read_proc;
00800 #endif
00801     return 0;
00802 }
00803 
00804 void __rtai_wd_exit(void)
00805 {
00806     BAD_RT_TASK *bt;
00807     int      dog;
00808 
00809 #ifdef CONFIG_PROC_FS
00810     // Remove /proc interface
00811     remove_proc_entry("watchdog", rtai_proc_root);
00812 #endif
00813     // Deregister all watchdogs and shutdown the timer
00814     for (dog = 0; dog < num_online_cpus(); dog++) {
00815     rt_deregister_watchdog(&wdog[dog], dog);
00816     }
00817     stop_rt_timer();
00818     rt_busy_sleep(TickPeriod);
00819 
00820     // Cleanup and remove all watchdogs and bad task lists
00821     for (dog = 0; dog < num_online_cpus(); dog++) {
00822     rt_task_delete(&wdog[dog]);
00823     if (dog < num_wdogs) {
00824         for (bt = bad_tl[dog]; bt;) {
00825         bt = delete_bad_task(&bad_tl[dog], bt);
00826         }
00827     }
00828     }
00829 
00830     reset_rt_fun_ext_index(rt_watchdog_fun, WD_INDX);
00831 
00832     // It's all over :(
00833     WDLOG("unloaded.\n");
00834 }
00835 
00836 module_init(__rtai_wd_init);
00837 module_exit(__rtai_wd_exit);
00838 
00839 EXPORT_SYMBOL(rt_wdset_grace);
00840 EXPORT_SYMBOL(rt_wdset_gracediv);
00841 EXPORT_SYMBOL(rt_wdset_safety);
00842 EXPORT_SYMBOL(rt_wdset_policy);
00843 EXPORT_SYMBOL(rt_wdset_slip);
00844 EXPORT_SYMBOL(rt_wdset_stretch);
00845 EXPORT_SYMBOL(rt_wdset_limit);

Generated on Tue Feb 2 17:46:05 2010 for RTAI API by  doxygen 1.4.7