edac_core.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /*
  2. * Defines, structures, APIs for edac_core module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. */
  19. #ifndef _EDAC_CORE_H_
  20. #define _EDAC_CORE_H_
  21. #include <linux/kernel.h>
  22. #include <linux/types.h>
  23. #include <linux/module.h>
  24. #include <linux/spinlock.h>
  25. #include <linux/smp.h>
  26. #include <linux/pci.h>
  27. #include <linux/time.h>
  28. #include <linux/nmi.h>
  29. #include <linux/rcupdate.h>
  30. #include <linux/completion.h>
  31. #include <linux/kobject.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/workqueue.h>
  34. #include <linux/edac.h>
  35. #define EDAC_DEVICE_NAME_LEN 31
  36. #define EDAC_ATTRIB_VALUE_LEN 15
  37. #if PAGE_SHIFT < 20
  38. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  39. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  40. #else /* PAGE_SHIFT > 20 */
  41. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  42. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  43. #endif
  44. #define edac_printk(level, prefix, fmt, arg...) \
  45. printk(level "EDAC " prefix ": " fmt, ##arg)
  46. #define edac_mc_printk(mci, level, fmt, arg...) \
  47. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  48. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  49. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  50. #define edac_device_printk(ctl, level, fmt, arg...) \
  51. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  52. #define edac_pci_printk(ctl, level, fmt, arg...) \
  53. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  54. /* prefixes for edac_printk() and edac_mc_printk() */
  55. #define EDAC_MC "MC"
  56. #define EDAC_PCI "PCI"
  57. #define EDAC_DEBUG "DEBUG"
  58. extern const char * const edac_mem_types[];
  59. #ifdef CONFIG_EDAC_DEBUG
  60. extern int edac_debug_level;
  61. #define edac_dbg(level, fmt, ...) \
  62. do { \
  63. if (level <= edac_debug_level) \
  64. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  65. "%s: " fmt, __func__, ##__VA_ARGS__); \
  66. } while (0)
  67. #else /* !CONFIG_EDAC_DEBUG */
  68. #define edac_dbg(level, fmt, ...) \
  69. do { \
  70. if (0) \
  71. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  72. "%s: " fmt, __func__, ##__VA_ARGS__); \
  73. } while (0)
  74. #endif /* !CONFIG_EDAC_DEBUG */
  75. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  76. PCI_DEVICE_ID_ ## vend ## _ ## dev
  77. #define edac_dev_name(dev) (dev)->dev_name
  78. /*
  79. * The following are the structures to provide for a generic
  80. * or abstract 'edac_device'. This set of structures and the
  81. * code that implements the APIs for the same, provide for
  82. * registering EDAC type devices which are NOT standard memory.
  83. *
  84. * CPU caches (L1 and L2)
  85. * DMA engines
  86. * Core CPU switches
  87. * Fabric switch units
  88. * PCIe interface controllers
  89. * other EDAC/ECC type devices that can be monitored for
  90. * errors, etc.
  91. *
  92. * It allows for a 2 level set of hierarchy. For example:
  93. *
  94. * cache could be composed of L1, L2 and L3 levels of cache.
  95. * Each CPU core would have its own L1 cache, while sharing
  96. * L2 and maybe L3 caches.
  97. *
  98. * View them arranged, via the sysfs presentation:
  99. * /sys/devices/system/edac/..
  100. *
  101. * mc/ <existing memory device directory>
  102. * cpu/cpu0/.. <L1 and L2 block directory>
  103. * /L1-cache/ce_count
  104. * /ue_count
  105. * /L2-cache/ce_count
  106. * /ue_count
  107. * cpu/cpu1/.. <L1 and L2 block directory>
  108. * /L1-cache/ce_count
  109. * /ue_count
  110. * /L2-cache/ce_count
  111. * /ue_count
  112. * ...
  113. *
  114. * the L1 and L2 directories would be "edac_device_block's"
  115. */
  116. struct edac_device_counter {
  117. u32 ue_count;
  118. u32 ce_count;
  119. };
  120. /* forward reference */
  121. struct edac_device_ctl_info;
  122. struct edac_device_block;
  123. /* edac_dev_sysfs_attribute structure
  124. * used for driver sysfs attributes in mem_ctl_info
  125. * for extra controls and attributes:
  126. * like high level error Injection controls
  127. */
  128. struct edac_dev_sysfs_attribute {
  129. struct attribute attr;
  130. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  131. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  132. };
  133. /* edac_dev_sysfs_block_attribute structure
  134. *
  135. * used in leaf 'block' nodes for adding controls/attributes
  136. *
  137. * each block in each instance of the containing control structure
  138. * can have an array of the following. The show and store functions
  139. * will be filled in with the show/store function in the
  140. * low level driver.
  141. *
  142. * The 'value' field will be the actual value field used for
  143. * counting
  144. */
  145. struct edac_dev_sysfs_block_attribute {
  146. struct attribute attr;
  147. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  148. ssize_t (*store)(struct kobject *, struct attribute *,
  149. const char *, size_t);
  150. struct edac_device_block *block;
  151. unsigned int value;
  152. };
  153. /* device block control structure */
  154. struct edac_device_block {
  155. struct edac_device_instance *instance; /* Up Pointer */
  156. char name[EDAC_DEVICE_NAME_LEN + 1];
  157. struct edac_device_counter counters; /* basic UE and CE counters */
  158. int nr_attribs; /* how many attributes */
  159. /* this block's attributes, could be NULL */
  160. struct edac_dev_sysfs_block_attribute *block_attributes;
  161. /* edac sysfs device control */
  162. struct kobject kobj;
  163. };
  164. /* device instance control structure */
  165. struct edac_device_instance {
  166. struct edac_device_ctl_info *ctl; /* Up pointer */
  167. char name[EDAC_DEVICE_NAME_LEN + 4];
  168. struct edac_device_counter counters; /* instance counters */
  169. u32 nr_blocks; /* how many blocks */
  170. struct edac_device_block *blocks; /* block array */
  171. /* edac sysfs device control */
  172. struct kobject kobj;
  173. };
  174. /*
  175. * Abstract edac_device control info structure
  176. *
  177. */
  178. struct edac_device_ctl_info {
  179. /* for global list of edac_device_ctl_info structs */
  180. struct list_head link;
  181. struct module *owner; /* Module owner of this control struct */
  182. int dev_idx;
  183. /* Per instance controls for this edac_device */
  184. int log_ue; /* boolean for logging UEs */
  185. int log_ce; /* boolean for logging CEs */
  186. int panic_on_ue; /* boolean for panic'ing on an UE */
  187. unsigned poll_msec; /* number of milliseconds to poll interval */
  188. unsigned long delay; /* number of jiffies for poll_msec */
  189. /* Additional top controller level attributes, but specified
  190. * by the low level driver.
  191. *
  192. * Set by the low level driver to provide attributes at the
  193. * controller level, same level as 'ue_count' and 'ce_count' above.
  194. * An array of structures, NULL terminated
  195. *
  196. * If attributes are desired, then set to array of attributes
  197. * If no attributes are desired, leave NULL
  198. */
  199. struct edac_dev_sysfs_attribute *sysfs_attributes;
  200. /* pointer to main 'edac' subsys in sysfs */
  201. struct bus_type *edac_subsys;
  202. /* the internal state of this controller instance */
  203. int op_state;
  204. /* work struct for this instance */
  205. struct delayed_work work;
  206. /* pointer to edac polling checking routine:
  207. * If NOT NULL: points to polling check routine
  208. * If NULL: Then assumes INTERRUPT operation, where
  209. * MC driver will receive events
  210. */
  211. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  212. struct device *dev; /* pointer to device structure */
  213. const char *mod_name; /* module name */
  214. const char *ctl_name; /* edac controller name */
  215. const char *dev_name; /* pci/platform/etc... name */
  216. void *pvt_info; /* pointer to 'private driver' info */
  217. unsigned long start_time; /* edac_device load start time (jiffies) */
  218. struct completion removal_complete;
  219. /* sysfs top name under 'edac' directory
  220. * and instance name:
  221. * cpu/cpu0/...
  222. * cpu/cpu1/...
  223. * cpu/cpu2/...
  224. * ...
  225. */
  226. char name[EDAC_DEVICE_NAME_LEN + 1];
  227. /* Number of instances supported on this control structure
  228. * and the array of those instances
  229. */
  230. u32 nr_instances;
  231. struct edac_device_instance *instances;
  232. /* Event counters for the this whole EDAC Device */
  233. struct edac_device_counter counters;
  234. /* edac sysfs device control for the 'name'
  235. * device this structure controls
  236. */
  237. struct kobject kobj;
  238. };
  239. /* To get from the instance's wq to the beginning of the ctl structure */
  240. #define to_edac_mem_ctl_work(w) \
  241. container_of(w, struct mem_ctl_info, work)
  242. #define to_edac_device_ctl_work(w) \
  243. container_of(w,struct edac_device_ctl_info,work)
  244. /*
  245. * The alloc() and free() functions for the 'edac_device' control info
  246. * structure. A MC driver will allocate one of these for each edac_device
  247. * it is going to control/register with the EDAC CORE.
  248. */
  249. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  250. unsigned sizeof_private,
  251. char *edac_device_name, unsigned nr_instances,
  252. char *edac_block_name, unsigned nr_blocks,
  253. unsigned offset_value,
  254. struct edac_dev_sysfs_block_attribute *block_attributes,
  255. unsigned nr_attribs,
  256. int device_index);
  257. /* The offset value can be:
  258. * -1 indicating no offset value
  259. * 0 for zero-based block numbers
  260. * 1 for 1-based block number
  261. * other for other-based block number
  262. */
  263. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  264. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  265. #ifdef CONFIG_PCI
  266. struct edac_pci_counter {
  267. atomic_t pe_count;
  268. atomic_t npe_count;
  269. };
  270. /*
  271. * Abstract edac_pci control info structure
  272. *
  273. */
  274. struct edac_pci_ctl_info {
  275. /* for global list of edac_pci_ctl_info structs */
  276. struct list_head link;
  277. int pci_idx;
  278. struct bus_type *edac_subsys; /* pointer to subsystem */
  279. /* the internal state of this controller instance */
  280. int op_state;
  281. /* work struct for this instance */
  282. struct delayed_work work;
  283. /* pointer to edac polling checking routine:
  284. * If NOT NULL: points to polling check routine
  285. * If NULL: Then assumes INTERRUPT operation, where
  286. * MC driver will receive events
  287. */
  288. void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
  289. struct device *dev; /* pointer to device structure */
  290. const char *mod_name; /* module name */
  291. const char *ctl_name; /* edac controller name */
  292. const char *dev_name; /* pci/platform/etc... name */
  293. void *pvt_info; /* pointer to 'private driver' info */
  294. unsigned long start_time; /* edac_pci load start time (jiffies) */
  295. struct completion complete;
  296. /* sysfs top name under 'edac' directory
  297. * and instance name:
  298. * cpu/cpu0/...
  299. * cpu/cpu1/...
  300. * cpu/cpu2/...
  301. * ...
  302. */
  303. char name[EDAC_DEVICE_NAME_LEN + 1];
  304. /* Event counters for the this whole EDAC Device */
  305. struct edac_pci_counter counters;
  306. /* edac sysfs device control for the 'name'
  307. * device this structure controls
  308. */
  309. struct kobject kobj;
  310. struct completion kobj_complete;
  311. };
  312. #define to_edac_pci_ctl_work(w) \
  313. container_of(w, struct edac_pci_ctl_info,work)
  314. /* write all or some bits in a byte-register*/
  315. static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
  316. u8 mask)
  317. {
  318. if (mask != 0xff) {
  319. u8 buf;
  320. pci_read_config_byte(pdev, offset, &buf);
  321. value &= mask;
  322. buf &= ~mask;
  323. value |= buf;
  324. }
  325. pci_write_config_byte(pdev, offset, value);
  326. }
  327. /* write all or some bits in a word-register*/
  328. static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
  329. u16 value, u16 mask)
  330. {
  331. if (mask != 0xffff) {
  332. u16 buf;
  333. pci_read_config_word(pdev, offset, &buf);
  334. value &= mask;
  335. buf &= ~mask;
  336. value |= buf;
  337. }
  338. pci_write_config_word(pdev, offset, value);
  339. }
  340. /*
  341. * pci_write_bits32
  342. *
  343. * edac local routine to do pci_write_config_dword, but adds
  344. * a mask parameter. If mask is all ones, ignore the mask.
  345. * Otherwise utilize the mask to isolate specified bits
  346. *
  347. * write all or some bits in a dword-register
  348. */
  349. static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
  350. u32 value, u32 mask)
  351. {
  352. if (mask != 0xffffffff) {
  353. u32 buf;
  354. pci_read_config_dword(pdev, offset, &buf);
  355. value &= mask;
  356. buf &= ~mask;
  357. value |= buf;
  358. }
  359. pci_write_config_dword(pdev, offset, value);
  360. }
  361. #endif /* CONFIG_PCI */
  362. struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
  363. unsigned n_layers,
  364. struct edac_mc_layer *layers,
  365. unsigned sz_pvt);
  366. extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
  367. const struct attribute_group **groups);
  368. #define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL)
  369. extern void edac_mc_free(struct mem_ctl_info *mci);
  370. extern struct mem_ctl_info *edac_mc_find(int idx);
  371. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  372. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  373. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  374. unsigned long page);
  375. void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
  376. struct mem_ctl_info *mci,
  377. struct edac_raw_error_desc *e);
  378. void edac_mc_handle_error(const enum hw_event_mc_err_type type,
  379. struct mem_ctl_info *mci,
  380. const u16 error_count,
  381. const unsigned long page_frame_number,
  382. const unsigned long offset_in_page,
  383. const unsigned long syndrome,
  384. const int top_layer,
  385. const int mid_layer,
  386. const int low_layer,
  387. const char *msg,
  388. const char *other_detail);
  389. /*
  390. * edac_device APIs
  391. */
  392. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  393. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  394. extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  395. int inst_nr, int block_nr, const char *msg);
  396. extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  397. int inst_nr, int block_nr, const char *msg);
  398. extern int edac_device_alloc_index(void);
  399. extern const char *edac_layer_name[];
  400. /*
  401. * edac_pci APIs
  402. */
  403. extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
  404. const char *edac_pci_name);
  405. extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
  406. extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
  407. unsigned long value);
  408. extern int edac_pci_alloc_index(void);
  409. extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
  410. extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
  411. extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
  412. struct device *dev,
  413. const char *mod_name);
  414. extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
  415. extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
  416. extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
  417. /*
  418. * edac misc APIs
  419. */
  420. extern char *edac_op_state_to_string(int op_state);
  421. #endif /* _EDAC_CORE_H_ */