thunderx_edac.c 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152
  1. /*
  2. * Cavium ThunderX memory controller kernel module
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
  9. *
  10. */
  11. #include <linux/module.h>
  12. #include <linux/pci.h>
  13. #include <linux/edac.h>
  14. #include <linux/interrupt.h>
  15. #include <linux/string.h>
  16. #include <linux/stop_machine.h>
  17. #include <linux/delay.h>
  18. #include <linux/sizes.h>
  19. #include <linux/atomic.h>
  20. #include <linux/bitfield.h>
  21. #include <linux/circ_buf.h>
  22. #include <asm/page.h>
  23. #include "edac_module.h"
  24. #define phys_to_pfn(phys) (PFN_DOWN(phys))
  25. #define THUNDERX_NODE GENMASK(45, 44)
  26. enum {
  27. ERR_CORRECTED = 1,
  28. ERR_UNCORRECTED = 2,
  29. ERR_UNKNOWN = 3,
  30. };
  31. #define MAX_SYNDROME_REGS 4
  32. struct error_syndrome {
  33. u64 reg[MAX_SYNDROME_REGS];
  34. };
  35. struct error_descr {
  36. int type;
  37. u64 mask;
  38. char *descr;
  39. };
  40. static void decode_register(char *str, size_t size,
  41. const struct error_descr *descr,
  42. const uint64_t reg)
  43. {
  44. int ret = 0;
  45. while (descr->type && descr->mask && descr->descr) {
  46. if (reg & descr->mask) {
  47. ret = snprintf(str, size, "\n\t%s, %s",
  48. descr->type == ERR_CORRECTED ?
  49. "Corrected" : "Uncorrected",
  50. descr->descr);
  51. str += ret;
  52. size -= ret;
  53. }
  54. descr++;
  55. }
  56. }
  57. static unsigned long get_bits(unsigned long data, int pos, int width)
  58. {
  59. return (data >> pos) & ((1 << width) - 1);
  60. }
  61. #define L2C_CTL 0x87E080800000
  62. #define L2C_CTL_DISIDXALIAS BIT(0)
  63. #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
  64. #define LMC_FADR 0x20
  65. #define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
  66. #define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
  67. #define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
  68. #define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
  69. #define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
  70. #define LMC_NXM_FADR 0x28
  71. #define LMC_ECC_SYND 0x38
  72. #define LMC_ECC_PARITY_TEST 0x108
  73. #define LMC_INT_W1S 0x150
  74. #define LMC_INT_ENA_W1C 0x158
  75. #define LMC_INT_ENA_W1S 0x160
  76. #define LMC_CONFIG 0x188
  77. #define LMC_CONFIG_BG2 BIT(62)
  78. #define LMC_CONFIG_RANK_ENA BIT(42)
  79. #define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
  80. #define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
  81. #define LMC_CONTROL 0x190
  82. #define LMC_CONTROL_XOR_BANK BIT(16)
  83. #define LMC_INT 0x1F0
  84. #define LMC_INT_DDR_ERR BIT(11)
  85. #define LMC_INT_DED_ERR (0xFUL << 5)
  86. #define LMC_INT_SEC_ERR (0xFUL << 1)
  87. #define LMC_INT_NXM_WR_MASK BIT(0)
  88. #define LMC_DDR_PLL_CTL 0x258
  89. #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
  90. #define LMC_FADR_SCRAMBLED 0x330
  91. #define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
  92. LMC_INT_NXM_WR_MASK)
  93. #define LMC_INT_CE (LMC_INT_SEC_ERR)
  94. static const struct error_descr lmc_errors[] = {
  95. {
  96. .type = ERR_CORRECTED,
  97. .mask = LMC_INT_SEC_ERR,
  98. .descr = "Single-bit ECC error",
  99. },
  100. {
  101. .type = ERR_UNCORRECTED,
  102. .mask = LMC_INT_DDR_ERR,
  103. .descr = "DDR chip error",
  104. },
  105. {
  106. .type = ERR_UNCORRECTED,
  107. .mask = LMC_INT_DED_ERR,
  108. .descr = "Double-bit ECC error",
  109. },
  110. {
  111. .type = ERR_UNCORRECTED,
  112. .mask = LMC_INT_NXM_WR_MASK,
  113. .descr = "Non-existent memory write",
  114. },
  115. {0, 0, NULL},
  116. };
  117. #define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
  118. #define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
  119. #define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
  120. #define LMC_INT_INTR_DED_ENA BIT(2)
  121. #define LMC_INT_INTR_SEC_ENA BIT(1)
  122. #define LMC_INT_INTR_NXM_WR_ENA BIT(0)
  123. #define LMC_INT_ENA_ALL GENMASK(5, 0)
  124. #define LMC_DDR_PLL_CTL 0x258
  125. #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
  126. #define LMC_CONTROL 0x190
  127. #define LMC_CONTROL_RDIMM BIT(0)
  128. #define LMC_SCRAM_FADR 0x330
  129. #define LMC_CHAR_MASK0 0x228
  130. #define LMC_CHAR_MASK2 0x238
  131. #define RING_ENTRIES 8
  132. struct debugfs_entry {
  133. const char *name;
  134. umode_t mode;
  135. const struct file_operations fops;
  136. };
  137. struct lmc_err_ctx {
  138. u64 reg_int;
  139. u64 reg_fadr;
  140. u64 reg_nxm_fadr;
  141. u64 reg_scram_fadr;
  142. u64 reg_ecc_synd;
  143. };
  144. struct thunderx_lmc {
  145. void __iomem *regs;
  146. struct pci_dev *pdev;
  147. struct msix_entry msix_ent;
  148. atomic_t ecc_int;
  149. u64 mask0;
  150. u64 mask2;
  151. u64 parity_test;
  152. u64 node;
  153. int xbits;
  154. int bank_width;
  155. int pbank_lsb;
  156. int dimm_lsb;
  157. int rank_lsb;
  158. int bank_lsb;
  159. int row_lsb;
  160. int col_hi_lsb;
  161. int xor_bank;
  162. int l2c_alias;
  163. struct page *mem;
  164. struct lmc_err_ctx err_ctx[RING_ENTRIES];
  165. unsigned long ring_head;
  166. unsigned long ring_tail;
  167. };
  168. #define ring_pos(pos, size) ((pos) & (size - 1))
  169. #define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
  170. static struct debugfs_entry debugfs_##_name = { \
  171. .name = __stringify(_name), \
  172. .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
  173. .fops = { \
  174. .open = simple_open, \
  175. .write = _write, \
  176. .read = _read, \
  177. .llseek = generic_file_llseek, \
  178. }, \
  179. }
  180. #define DEBUGFS_FIELD_ATTR(_type, _field) \
  181. static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
  182. char __user *data, \
  183. size_t count, loff_t *ppos) \
  184. { \
  185. struct thunderx_##_type *pdata = file->private_data; \
  186. char buf[20]; \
  187. \
  188. snprintf(buf, count, "0x%016llx", pdata->_field); \
  189. return simple_read_from_buffer(data, count, ppos, \
  190. buf, sizeof(buf)); \
  191. } \
  192. \
  193. static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
  194. const char __user *data, \
  195. size_t count, loff_t *ppos) \
  196. { \
  197. struct thunderx_##_type *pdata = file->private_data; \
  198. int res; \
  199. \
  200. res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
  201. \
  202. return res ? res : count; \
  203. } \
  204. \
  205. DEBUGFS_STRUCT(_field, 0600, \
  206. thunderx_##_type##_##_field##_write, \
  207. thunderx_##_type##_##_field##_read) \
  208. #define DEBUGFS_REG_ATTR(_type, _name, _reg) \
  209. static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
  210. char __user *data, \
  211. size_t count, loff_t *ppos) \
  212. { \
  213. struct thunderx_##_type *pdata = file->private_data; \
  214. char buf[20]; \
  215. \
  216. sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
  217. return simple_read_from_buffer(data, count, ppos, \
  218. buf, sizeof(buf)); \
  219. } \
  220. \
  221. static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
  222. const char __user *data, \
  223. size_t count, loff_t *ppos) \
  224. { \
  225. struct thunderx_##_type *pdata = file->private_data; \
  226. u64 val; \
  227. int res; \
  228. \
  229. res = kstrtoull_from_user(data, count, 0, &val); \
  230. \
  231. if (!res) { \
  232. writeq(val, pdata->regs + _reg); \
  233. res = count; \
  234. } \
  235. \
  236. return res; \
  237. } \
  238. \
  239. DEBUGFS_STRUCT(_name, 0600, \
  240. thunderx_##_type##_##_name##_write, \
  241. thunderx_##_type##_##_name##_read)
  242. #define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
  243. /*
  244. * To get an ECC error injected, the following steps are needed:
  245. * - Setup the ECC injection by writing the appropriate parameters:
  246. * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
  247. * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
  248. * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
  249. * - Do the actual injection:
  250. * echo 1 > /sys/kernel/debug/<device number>/inject_ecc
  251. */
  252. static ssize_t thunderx_lmc_inject_int_write(struct file *file,
  253. const char __user *data,
  254. size_t count, loff_t *ppos)
  255. {
  256. struct thunderx_lmc *lmc = file->private_data;
  257. u64 val;
  258. int res;
  259. res = kstrtoull_from_user(data, count, 0, &val);
  260. if (!res) {
  261. /* Trigger the interrupt */
  262. writeq(val, lmc->regs + LMC_INT_W1S);
  263. res = count;
  264. }
  265. return res;
  266. }
  267. static ssize_t thunderx_lmc_int_read(struct file *file,
  268. char __user *data,
  269. size_t count, loff_t *ppos)
  270. {
  271. struct thunderx_lmc *lmc = file->private_data;
  272. char buf[20];
  273. u64 lmc_int = readq(lmc->regs + LMC_INT);
  274. snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
  275. return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
  276. }
  277. #define TEST_PATTERN 0xa5
  278. static int inject_ecc_fn(void *arg)
  279. {
  280. struct thunderx_lmc *lmc = arg;
  281. uintptr_t addr, phys;
  282. unsigned int cline_size = cache_line_size();
  283. const unsigned int lines = PAGE_SIZE / cline_size;
  284. unsigned int i, cl_idx;
  285. addr = (uintptr_t)page_address(lmc->mem);
  286. phys = (uintptr_t)page_to_phys(lmc->mem);
  287. cl_idx = (phys & 0x7f) >> 4;
  288. lmc->parity_test &= ~(7ULL << 8);
  289. lmc->parity_test |= (cl_idx << 8);
  290. writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
  291. writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
  292. writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
  293. readq(lmc->regs + LMC_CHAR_MASK0);
  294. readq(lmc->regs + LMC_CHAR_MASK2);
  295. readq(lmc->regs + LMC_ECC_PARITY_TEST);
  296. for (i = 0; i < lines; i++) {
  297. memset((void *)addr, TEST_PATTERN, cline_size);
  298. barrier();
  299. /*
  300. * Flush L1 cachelines to the PoC (L2).
  301. * This will cause cacheline eviction to the L2.
  302. */
  303. asm volatile("dc civac, %0\n"
  304. "dsb sy\n"
  305. : : "r"(addr + i * cline_size));
  306. }
  307. for (i = 0; i < lines; i++) {
  308. /*
  309. * Flush L2 cachelines to the DRAM.
  310. * This will cause cacheline eviction to the DRAM
  311. * and ECC corruption according to the masks set.
  312. */
  313. __asm__ volatile("sys #0,c11,C1,#2, %0\n"
  314. : : "r"(phys + i * cline_size));
  315. }
  316. for (i = 0; i < lines; i++) {
  317. /*
  318. * Invalidate L2 cachelines.
  319. * The subsequent load will cause cacheline fetch
  320. * from the DRAM and an error interrupt
  321. */
  322. __asm__ volatile("sys #0,c11,C1,#1, %0"
  323. : : "r"(phys + i * cline_size));
  324. }
  325. for (i = 0; i < lines; i++) {
  326. /*
  327. * Invalidate L1 cachelines.
  328. * The subsequent load will cause cacheline fetch
  329. * from the L2 and/or DRAM
  330. */
  331. asm volatile("dc ivac, %0\n"
  332. "dsb sy\n"
  333. : : "r"(addr + i * cline_size));
  334. }
  335. return 0;
  336. }
  337. static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
  338. const char __user *data,
  339. size_t count, loff_t *ppos)
  340. {
  341. struct thunderx_lmc *lmc = file->private_data;
  342. unsigned int cline_size = cache_line_size();
  343. u8 *tmp;
  344. void __iomem *addr;
  345. unsigned int offs, timeout = 100000;
  346. atomic_set(&lmc->ecc_int, 0);
  347. lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
  348. if (!lmc->mem)
  349. return -ENOMEM;
  350. tmp = kmalloc(cline_size, GFP_KERNEL);
  351. if (!tmp) {
  352. __free_pages(lmc->mem, 0);
  353. return -ENOMEM;
  354. }
  355. addr = page_address(lmc->mem);
  356. while (!atomic_read(&lmc->ecc_int) && timeout--) {
  357. stop_machine(inject_ecc_fn, lmc, NULL);
  358. for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
  359. /*
  360. * Do a load from the previously rigged location
  361. * This should generate an error interrupt.
  362. */
  363. memcpy(tmp, addr + offs, cline_size);
  364. asm volatile("dsb ld\n");
  365. }
  366. }
  367. kfree(tmp);
  368. __free_pages(lmc->mem, 0);
  369. return count;
  370. }
  371. LMC_DEBUGFS_ENT(mask0);
  372. LMC_DEBUGFS_ENT(mask2);
  373. LMC_DEBUGFS_ENT(parity_test);
  374. DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
  375. DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
  376. DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
  377. struct debugfs_entry *lmc_dfs_ents[] = {
  378. &debugfs_mask0,
  379. &debugfs_mask2,
  380. &debugfs_parity_test,
  381. &debugfs_inject_ecc,
  382. &debugfs_inject_int,
  383. &debugfs_int_w1c,
  384. };
  385. static int thunderx_create_debugfs_nodes(struct dentry *parent,
  386. struct debugfs_entry *attrs[],
  387. void *data,
  388. size_t num)
  389. {
  390. int i;
  391. struct dentry *ent;
  392. if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
  393. return 0;
  394. if (!parent)
  395. return -ENOENT;
  396. for (i = 0; i < num; i++) {
  397. ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
  398. parent, data, &attrs[i]->fops);
  399. if (!ent)
  400. break;
  401. }
  402. return i;
  403. }
  404. static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
  405. {
  406. phys_addr_t addr = 0;
  407. int bank, xbits;
  408. addr |= lmc->node << 40;
  409. addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
  410. addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
  411. addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
  412. addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
  413. bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
  414. if (lmc->xor_bank)
  415. bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
  416. addr |= bank << lmc->bank_lsb;
  417. xbits = PCI_FUNC(lmc->pdev->devfn);
  418. if (lmc->l2c_alias)
  419. xbits ^= get_bits(addr, 20, lmc->xbits) ^
  420. get_bits(addr, 12, lmc->xbits);
  421. addr |= xbits << 7;
  422. return addr;
  423. }
  424. static unsigned int thunderx_get_num_lmcs(unsigned int node)
  425. {
  426. unsigned int number = 0;
  427. struct pci_dev *pdev = NULL;
  428. do {
  429. pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
  430. PCI_DEVICE_ID_THUNDER_LMC,
  431. pdev);
  432. if (pdev) {
  433. #ifdef CONFIG_NUMA
  434. if (pdev->dev.numa_node == node)
  435. number++;
  436. #else
  437. number++;
  438. #endif
  439. }
  440. } while (pdev);
  441. return number;
  442. }
  443. #define LMC_MESSAGE_SIZE 120
  444. #define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
  445. static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
  446. {
  447. struct mem_ctl_info *mci = dev_id;
  448. struct thunderx_lmc *lmc = mci->pvt_info;
  449. unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
  450. struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
  451. writeq(0, lmc->regs + LMC_CHAR_MASK0);
  452. writeq(0, lmc->regs + LMC_CHAR_MASK2);
  453. writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
  454. ctx->reg_int = readq(lmc->regs + LMC_INT);
  455. ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
  456. ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
  457. ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
  458. ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
  459. lmc->ring_head++;
  460. atomic_set(&lmc->ecc_int, 1);
  461. /* Clear the interrupt */
  462. writeq(ctx->reg_int, lmc->regs + LMC_INT);
  463. return IRQ_WAKE_THREAD;
  464. }
  465. static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
  466. {
  467. struct mem_ctl_info *mci = dev_id;
  468. struct thunderx_lmc *lmc = mci->pvt_info;
  469. phys_addr_t phys_addr;
  470. unsigned long tail;
  471. struct lmc_err_ctx *ctx;
  472. irqreturn_t ret = IRQ_NONE;
  473. char *msg;
  474. char *other;
  475. msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
  476. other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
  477. if (!msg || !other)
  478. goto err_free;
  479. while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
  480. ARRAY_SIZE(lmc->err_ctx))) {
  481. tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
  482. ctx = &lmc->err_ctx[tail];
  483. dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
  484. ctx->reg_int);
  485. dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
  486. ctx->reg_fadr);
  487. dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
  488. ctx->reg_nxm_fadr);
  489. dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
  490. ctx->reg_scram_fadr);
  491. dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
  492. ctx->reg_ecc_synd);
  493. snprintf(msg, LMC_MESSAGE_SIZE,
  494. "DIMM %lld rank %lld bank %lld row %lld col %lld",
  495. LMC_FADR_FDIMM(ctx->reg_scram_fadr),
  496. LMC_FADR_FBUNK(ctx->reg_scram_fadr),
  497. LMC_FADR_FBANK(ctx->reg_scram_fadr),
  498. LMC_FADR_FROW(ctx->reg_scram_fadr),
  499. LMC_FADR_FCOL(ctx->reg_scram_fadr));
  500. decode_register(other, LMC_OTHER_SIZE, lmc_errors,
  501. ctx->reg_int);
  502. phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
  503. if (ctx->reg_int & LMC_INT_UE)
  504. edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
  505. phys_to_pfn(phys_addr),
  506. offset_in_page(phys_addr),
  507. 0, -1, -1, -1, msg, other);
  508. else if (ctx->reg_int & LMC_INT_CE)
  509. edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
  510. phys_to_pfn(phys_addr),
  511. offset_in_page(phys_addr),
  512. 0, -1, -1, -1, msg, other);
  513. lmc->ring_tail++;
  514. }
  515. ret = IRQ_HANDLED;
  516. err_free:
  517. kfree(msg);
  518. kfree(other);
  519. return ret;
  520. }
  521. static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
  522. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
  523. { 0, },
  524. };
  525. static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
  526. {
  527. int node = dev_to_node(&pdev->dev);
  528. int ret = PCI_FUNC(pdev->devfn);
  529. ret += max(node, 0) << 3;
  530. return ret;
  531. }
  532. static int thunderx_lmc_probe(struct pci_dev *pdev,
  533. const struct pci_device_id *id)
  534. {
  535. struct thunderx_lmc *lmc;
  536. struct edac_mc_layer layer;
  537. struct mem_ctl_info *mci;
  538. u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
  539. int ret;
  540. u64 lmc_int;
  541. void *l2c_ioaddr;
  542. layer.type = EDAC_MC_LAYER_SLOT;
  543. layer.size = 2;
  544. layer.is_virt_csrow = false;
  545. ret = pcim_enable_device(pdev);
  546. if (ret) {
  547. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  548. return ret;
  549. }
  550. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
  551. if (ret) {
  552. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  553. return ret;
  554. }
  555. mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
  556. sizeof(struct thunderx_lmc));
  557. if (!mci)
  558. return -ENOMEM;
  559. mci->pdev = &pdev->dev;
  560. lmc = mci->pvt_info;
  561. pci_set_drvdata(pdev, mci);
  562. lmc->regs = pcim_iomap_table(pdev)[0];
  563. lmc_control = readq(lmc->regs + LMC_CONTROL);
  564. lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
  565. lmc_config = readq(lmc->regs + LMC_CONFIG);
  566. if (lmc_control & LMC_CONTROL_RDIMM) {
  567. mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
  568. lmc_ddr_pll_ctl) ?
  569. MEM_RDDR4 : MEM_RDDR3;
  570. } else {
  571. mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
  572. lmc_ddr_pll_ctl) ?
  573. MEM_DDR4 : MEM_DDR3;
  574. }
  575. mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
  576. mci->edac_cap = EDAC_FLAG_SECDED;
  577. mci->mod_name = "thunderx-lmc";
  578. mci->ctl_name = "thunderx-lmc";
  579. mci->dev_name = dev_name(&pdev->dev);
  580. mci->scrub_mode = SCRUB_NONE;
  581. lmc->pdev = pdev;
  582. lmc->msix_ent.entry = 0;
  583. lmc->ring_head = 0;
  584. lmc->ring_tail = 0;
  585. ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
  586. if (ret) {
  587. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  588. goto err_free;
  589. }
  590. ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
  591. thunderx_lmc_err_isr,
  592. thunderx_lmc_threaded_isr, 0,
  593. "[EDAC] ThunderX LMC", mci);
  594. if (ret) {
  595. dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
  596. goto err_free;
  597. }
  598. lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
  599. lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
  600. lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
  601. FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
  602. lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
  603. lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
  604. lmc->rank_lsb = lmc->dimm_lsb;
  605. lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
  606. lmc->bank_lsb = 7 + lmc->xbits;
  607. lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
  608. lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
  609. lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
  610. l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
  611. if (!l2c_ioaddr) {
  612. dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
  613. ret = -ENOMEM;
  614. goto err_free;
  615. }
  616. lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
  617. iounmap(l2c_ioaddr);
  618. ret = edac_mc_add_mc(mci);
  619. if (ret) {
  620. dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
  621. goto err_free;
  622. }
  623. lmc_int = readq(lmc->regs + LMC_INT);
  624. writeq(lmc_int, lmc->regs + LMC_INT);
  625. writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
  626. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  627. ret = thunderx_create_debugfs_nodes(mci->debugfs,
  628. lmc_dfs_ents,
  629. lmc,
  630. ARRAY_SIZE(lmc_dfs_ents));
  631. if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
  632. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  633. ret, ret >= 0 ? " created" : "");
  634. }
  635. }
  636. return 0;
  637. err_free:
  638. pci_set_drvdata(pdev, NULL);
  639. edac_mc_free(mci);
  640. return ret;
  641. }
  642. static void thunderx_lmc_remove(struct pci_dev *pdev)
  643. {
  644. struct mem_ctl_info *mci = pci_get_drvdata(pdev);
  645. struct thunderx_lmc *lmc = mci->pvt_info;
  646. writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
  647. edac_mc_del_mc(&pdev->dev);
  648. edac_mc_free(mci);
  649. }
  650. MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
  651. static struct pci_driver thunderx_lmc_driver = {
  652. .name = "thunderx_lmc_edac",
  653. .probe = thunderx_lmc_probe,
  654. .remove = thunderx_lmc_remove,
  655. .id_table = thunderx_lmc_pci_tbl,
  656. };
  657. /*---------------------- OCX driver ---------------------------------*/
  658. #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
  659. #define OCX_LINK_INTS 3
  660. #define OCX_INTS (OCX_LINK_INTS + 1)
  661. #define OCX_RX_LANES 24
  662. #define OCX_RX_LANE_STATS 15
  663. #define OCX_COM_INT 0x100
  664. #define OCX_COM_INT_W1S 0x108
  665. #define OCX_COM_INT_ENA_W1S 0x110
  666. #define OCX_COM_INT_ENA_W1C 0x118
  667. #define OCX_COM_IO_BADID BIT(54)
  668. #define OCX_COM_MEM_BADID BIT(53)
  669. #define OCX_COM_COPR_BADID BIT(52)
  670. #define OCX_COM_WIN_REQ_BADID BIT(51)
  671. #define OCX_COM_WIN_REQ_TOUT BIT(50)
  672. #define OCX_COM_RX_LANE GENMASK(23, 0)
  673. #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
  674. OCX_COM_MEM_BADID | \
  675. OCX_COM_COPR_BADID | \
  676. OCX_COM_WIN_REQ_BADID | \
  677. OCX_COM_WIN_REQ_TOUT)
  678. static const struct error_descr ocx_com_errors[] = {
  679. {
  680. .type = ERR_CORRECTED,
  681. .mask = OCX_COM_IO_BADID,
  682. .descr = "Invalid IO transaction node ID",
  683. },
  684. {
  685. .type = ERR_CORRECTED,
  686. .mask = OCX_COM_MEM_BADID,
  687. .descr = "Invalid memory transaction node ID",
  688. },
  689. {
  690. .type = ERR_CORRECTED,
  691. .mask = OCX_COM_COPR_BADID,
  692. .descr = "Invalid coprocessor transaction node ID",
  693. },
  694. {
  695. .type = ERR_CORRECTED,
  696. .mask = OCX_COM_WIN_REQ_BADID,
  697. .descr = "Invalid SLI transaction node ID",
  698. },
  699. {
  700. .type = ERR_CORRECTED,
  701. .mask = OCX_COM_WIN_REQ_TOUT,
  702. .descr = "Window/core request timeout",
  703. },
  704. {0, 0, NULL},
  705. };
  706. #define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
  707. #define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
  708. #define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
  709. #define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
  710. #define OCX_COM_LINK_BAD_WORD BIT(13)
  711. #define OCX_COM_LINK_ALIGN_FAIL BIT(12)
  712. #define OCX_COM_LINK_ALIGN_DONE BIT(11)
  713. #define OCX_COM_LINK_UP BIT(10)
  714. #define OCX_COM_LINK_STOP BIT(9)
  715. #define OCX_COM_LINK_BLK_ERR BIT(8)
  716. #define OCX_COM_LINK_REINIT BIT(7)
  717. #define OCX_COM_LINK_LNK_DATA BIT(6)
  718. #define OCX_COM_LINK_RXFIFO_DBE BIT(5)
  719. #define OCX_COM_LINK_RXFIFO_SBE BIT(4)
  720. #define OCX_COM_LINK_TXFIFO_DBE BIT(3)
  721. #define OCX_COM_LINK_TXFIFO_SBE BIT(2)
  722. #define OCX_COM_LINK_REPLAY_DBE BIT(1)
  723. #define OCX_COM_LINK_REPLAY_SBE BIT(0)
  724. static const struct error_descr ocx_com_link_errors[] = {
  725. {
  726. .type = ERR_CORRECTED,
  727. .mask = OCX_COM_LINK_REPLAY_SBE,
  728. .descr = "Replay buffer single-bit error",
  729. },
  730. {
  731. .type = ERR_CORRECTED,
  732. .mask = OCX_COM_LINK_TXFIFO_SBE,
  733. .descr = "TX FIFO single-bit error",
  734. },
  735. {
  736. .type = ERR_CORRECTED,
  737. .mask = OCX_COM_LINK_RXFIFO_SBE,
  738. .descr = "RX FIFO single-bit error",
  739. },
  740. {
  741. .type = ERR_CORRECTED,
  742. .mask = OCX_COM_LINK_BLK_ERR,
  743. .descr = "Block code error",
  744. },
  745. {
  746. .type = ERR_CORRECTED,
  747. .mask = OCX_COM_LINK_ALIGN_FAIL,
  748. .descr = "Link alignment failure",
  749. },
  750. {
  751. .type = ERR_CORRECTED,
  752. .mask = OCX_COM_LINK_BAD_WORD,
  753. .descr = "Bad code word",
  754. },
  755. {
  756. .type = ERR_UNCORRECTED,
  757. .mask = OCX_COM_LINK_REPLAY_DBE,
  758. .descr = "Replay buffer double-bit error",
  759. },
  760. {
  761. .type = ERR_UNCORRECTED,
  762. .mask = OCX_COM_LINK_TXFIFO_DBE,
  763. .descr = "TX FIFO double-bit error",
  764. },
  765. {
  766. .type = ERR_UNCORRECTED,
  767. .mask = OCX_COM_LINK_RXFIFO_DBE,
  768. .descr = "RX FIFO double-bit error",
  769. },
  770. {
  771. .type = ERR_UNCORRECTED,
  772. .mask = OCX_COM_LINK_STOP,
  773. .descr = "Link stopped",
  774. },
  775. {0, 0, NULL},
  776. };
  777. #define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
  778. OCX_COM_LINK_TXFIFO_DBE | \
  779. OCX_COM_LINK_RXFIFO_DBE | \
  780. OCX_COM_LINK_STOP)
  781. #define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
  782. OCX_COM_LINK_TXFIFO_SBE | \
  783. OCX_COM_LINK_RXFIFO_SBE | \
  784. OCX_COM_LINK_BLK_ERR | \
  785. OCX_COM_LINK_ALIGN_FAIL | \
  786. OCX_COM_LINK_BAD_WORD)
  787. #define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
  788. #define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
  789. #define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
  790. #define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
  791. #define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
  792. #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
  793. #define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
  794. #define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
  795. #define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
  796. #define OCX_LANE_BAD_64B67B BIT(8)
  797. #define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
  798. #define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
  799. #define OCX_LANE_UKWN_CNTL_WORD BIT(3)
  800. #define OCX_LANE_CRC32_ERR BIT(2)
  801. #define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
  802. #define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
  803. #define OCX_COM_LANE_INT_UE (0)
  804. #define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
  805. OCX_LANE_BDRY_SYNC_LOSS | \
  806. OCX_LANE_CRC32_ERR | \
  807. OCX_LANE_UKWN_CNTL_WORD | \
  808. OCX_LANE_SCRM_SYNC_LOSS | \
  809. OCX_LANE_DSKEW_FIFO_OVFL | \
  810. OCX_LANE_BAD_64B67B)
  811. static const struct error_descr ocx_lane_errors[] = {
  812. {
  813. .type = ERR_CORRECTED,
  814. .mask = OCX_LANE_SERDES_LOCK_LOSS,
  815. .descr = "RX SerDes lock lost",
  816. },
  817. {
  818. .type = ERR_CORRECTED,
  819. .mask = OCX_LANE_BDRY_SYNC_LOSS,
  820. .descr = "RX word boundary lost",
  821. },
  822. {
  823. .type = ERR_CORRECTED,
  824. .mask = OCX_LANE_CRC32_ERR,
  825. .descr = "CRC32 error",
  826. },
  827. {
  828. .type = ERR_CORRECTED,
  829. .mask = OCX_LANE_UKWN_CNTL_WORD,
  830. .descr = "Unknown control word",
  831. },
  832. {
  833. .type = ERR_CORRECTED,
  834. .mask = OCX_LANE_SCRM_SYNC_LOSS,
  835. .descr = "Scrambler synchronization lost",
  836. },
  837. {
  838. .type = ERR_CORRECTED,
  839. .mask = OCX_LANE_DSKEW_FIFO_OVFL,
  840. .descr = "RX deskew FIFO overflow",
  841. },
  842. {
  843. .type = ERR_CORRECTED,
  844. .mask = OCX_LANE_BAD_64B67B,
  845. .descr = "Bad 64B/67B codeword",
  846. },
  847. {0, 0, NULL},
  848. };
  849. #define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
  850. #define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
  851. #define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
  852. GENMASK(9, 7) | GENMASK(5, 0))
  853. #define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
  854. #define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
  855. struct ocx_com_err_ctx {
  856. u64 reg_com_int;
  857. u64 reg_lane_int[OCX_RX_LANES];
  858. u64 reg_lane_stat11[OCX_RX_LANES];
  859. };
  860. struct ocx_link_err_ctx {
  861. u64 reg_com_link_int;
  862. int link;
  863. };
  864. struct thunderx_ocx {
  865. void __iomem *regs;
  866. int com_link;
  867. struct pci_dev *pdev;
  868. struct edac_device_ctl_info *edac_dev;
  869. struct dentry *debugfs;
  870. struct msix_entry msix_ent[OCX_INTS];
  871. struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
  872. struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
  873. unsigned long com_ring_head;
  874. unsigned long com_ring_tail;
  875. unsigned long link_ring_head;
  876. unsigned long link_ring_tail;
  877. };
  878. #define OCX_MESSAGE_SIZE SZ_1K
  879. #define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
  880. /* This handler is threaded */
  881. static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
  882. {
  883. struct msix_entry *msix = irq_id;
  884. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  885. msix_ent[msix->entry]);
  886. int lane;
  887. unsigned long head = ring_pos(ocx->com_ring_head,
  888. ARRAY_SIZE(ocx->com_err_ctx));
  889. struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
  890. ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
  891. for (lane = 0; lane < OCX_RX_LANES; lane++) {
  892. ctx->reg_lane_int[lane] =
  893. readq(ocx->regs + OCX_LNE_INT(lane));
  894. ctx->reg_lane_stat11[lane] =
  895. readq(ocx->regs + OCX_LNE_STAT(lane, 11));
  896. writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
  897. }
  898. writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
  899. ocx->com_ring_head++;
  900. return IRQ_WAKE_THREAD;
  901. }
  902. static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
  903. {
  904. struct msix_entry *msix = irq_id;
  905. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  906. msix_ent[msix->entry]);
  907. irqreturn_t ret = IRQ_NONE;
  908. unsigned long tail;
  909. struct ocx_com_err_ctx *ctx;
  910. int lane;
  911. char *msg;
  912. char *other;
  913. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  914. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  915. if (!msg || !other)
  916. goto err_free;
  917. while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
  918. ARRAY_SIZE(ocx->com_err_ctx))) {
  919. tail = ring_pos(ocx->com_ring_tail,
  920. ARRAY_SIZE(ocx->com_err_ctx));
  921. ctx = &ocx->com_err_ctx[tail];
  922. snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
  923. ocx->edac_dev->ctl_name, ctx->reg_com_int);
  924. decode_register(other, OCX_OTHER_SIZE,
  925. ocx_com_errors, ctx->reg_com_int);
  926. strncat(msg, other, OCX_MESSAGE_SIZE);
  927. for (lane = 0; lane < OCX_RX_LANES; lane++)
  928. if (ctx->reg_com_int & BIT(lane)) {
  929. snprintf(other, OCX_OTHER_SIZE,
  930. "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
  931. lane, ctx->reg_lane_int[lane],
  932. lane, ctx->reg_lane_stat11[lane]);
  933. strncat(msg, other, OCX_MESSAGE_SIZE);
  934. decode_register(other, OCX_OTHER_SIZE,
  935. ocx_lane_errors,
  936. ctx->reg_lane_int[lane]);
  937. strncat(msg, other, OCX_MESSAGE_SIZE);
  938. }
  939. if (ctx->reg_com_int & OCX_COM_INT_CE)
  940. edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
  941. ocx->com_ring_tail++;
  942. }
  943. ret = IRQ_HANDLED;
  944. err_free:
  945. kfree(other);
  946. kfree(msg);
  947. return ret;
  948. }
  949. static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
  950. {
  951. struct msix_entry *msix = irq_id;
  952. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  953. msix_ent[msix->entry]);
  954. unsigned long head = ring_pos(ocx->link_ring_head,
  955. ARRAY_SIZE(ocx->link_err_ctx));
  956. struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
  957. ctx->link = msix->entry;
  958. ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
  959. writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
  960. ocx->link_ring_head++;
  961. return IRQ_WAKE_THREAD;
  962. }
  963. static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
  964. {
  965. struct msix_entry *msix = irq_id;
  966. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  967. msix_ent[msix->entry]);
  968. irqreturn_t ret = IRQ_NONE;
  969. unsigned long tail;
  970. struct ocx_link_err_ctx *ctx;
  971. char *msg;
  972. char *other;
  973. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  974. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  975. if (!msg || !other)
  976. goto err_free;
  977. while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
  978. ARRAY_SIZE(ocx->link_err_ctx))) {
  979. tail = ring_pos(ocx->link_ring_head,
  980. ARRAY_SIZE(ocx->link_err_ctx));
  981. ctx = &ocx->link_err_ctx[tail];
  982. snprintf(msg, OCX_MESSAGE_SIZE,
  983. "%s: OCX_COM_LINK_INT[%d]: %016llx",
  984. ocx->edac_dev->ctl_name,
  985. ctx->link, ctx->reg_com_link_int);
  986. decode_register(other, OCX_OTHER_SIZE,
  987. ocx_com_link_errors, ctx->reg_com_link_int);
  988. strncat(msg, other, OCX_MESSAGE_SIZE);
  989. if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
  990. edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
  991. else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
  992. edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
  993. ocx->link_ring_tail++;
  994. }
  995. ret = IRQ_HANDLED;
  996. err_free:
  997. kfree(other);
  998. kfree(msg);
  999. return ret;
  1000. }
  1001. #define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg)
  1002. OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
  1003. OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
  1004. OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
  1005. OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
  1006. OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
  1007. OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
  1008. OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
  1009. OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
  1010. OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
  1011. OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
  1012. OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
  1013. OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
  1014. OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
  1015. OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
  1016. OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
  1017. OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
  1018. OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
  1019. OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
  1020. OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
  1021. OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
  1022. OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
  1023. OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
  1024. OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
  1025. OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
  1026. OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
  1027. OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
  1028. OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
  1029. OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
  1030. OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
  1031. OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
  1032. OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
  1033. OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
  1034. OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
  1035. OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
  1036. struct debugfs_entry *ocx_dfs_ents[] = {
  1037. &debugfs_tlk0_ecc_ctl,
  1038. &debugfs_tlk1_ecc_ctl,
  1039. &debugfs_tlk2_ecc_ctl,
  1040. &debugfs_rlk0_ecc_ctl,
  1041. &debugfs_rlk1_ecc_ctl,
  1042. &debugfs_rlk2_ecc_ctl,
  1043. &debugfs_com_link0_int,
  1044. &debugfs_com_link1_int,
  1045. &debugfs_com_link2_int,
  1046. &debugfs_lne00_badcnt,
  1047. &debugfs_lne01_badcnt,
  1048. &debugfs_lne02_badcnt,
  1049. &debugfs_lne03_badcnt,
  1050. &debugfs_lne04_badcnt,
  1051. &debugfs_lne05_badcnt,
  1052. &debugfs_lne06_badcnt,
  1053. &debugfs_lne07_badcnt,
  1054. &debugfs_lne08_badcnt,
  1055. &debugfs_lne09_badcnt,
  1056. &debugfs_lne10_badcnt,
  1057. &debugfs_lne11_badcnt,
  1058. &debugfs_lne12_badcnt,
  1059. &debugfs_lne13_badcnt,
  1060. &debugfs_lne14_badcnt,
  1061. &debugfs_lne15_badcnt,
  1062. &debugfs_lne16_badcnt,
  1063. &debugfs_lne17_badcnt,
  1064. &debugfs_lne18_badcnt,
  1065. &debugfs_lne19_badcnt,
  1066. &debugfs_lne20_badcnt,
  1067. &debugfs_lne21_badcnt,
  1068. &debugfs_lne22_badcnt,
  1069. &debugfs_lne23_badcnt,
  1070. &debugfs_com_int,
  1071. };
  1072. static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
  1073. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
  1074. { 0, },
  1075. };
  1076. static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
  1077. {
  1078. int lane, stat, cfg;
  1079. for (lane = 0; lane < OCX_RX_LANES; lane++) {
  1080. cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
  1081. cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
  1082. cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
  1083. writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
  1084. for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
  1085. readq(ocx->regs + OCX_LNE_STAT(lane, stat));
  1086. }
  1087. }
  1088. static int thunderx_ocx_probe(struct pci_dev *pdev,
  1089. const struct pci_device_id *id)
  1090. {
  1091. struct thunderx_ocx *ocx;
  1092. struct edac_device_ctl_info *edac_dev;
  1093. char name[32];
  1094. int idx;
  1095. int i;
  1096. int ret;
  1097. u64 reg;
  1098. ret = pcim_enable_device(pdev);
  1099. if (ret) {
  1100. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  1101. return ret;
  1102. }
  1103. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
  1104. if (ret) {
  1105. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  1106. return ret;
  1107. }
  1108. idx = edac_device_alloc_index();
  1109. snprintf(name, sizeof(name), "OCX%d", idx);
  1110. edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
  1111. name, 1, "CCPI", 1,
  1112. 0, NULL, 0, idx);
  1113. if (!edac_dev) {
  1114. dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
  1115. return -ENOMEM;
  1116. }
  1117. ocx = edac_dev->pvt_info;
  1118. ocx->edac_dev = edac_dev;
  1119. ocx->com_ring_head = 0;
  1120. ocx->com_ring_tail = 0;
  1121. ocx->link_ring_head = 0;
  1122. ocx->link_ring_tail = 0;
  1123. ocx->regs = pcim_iomap_table(pdev)[0];
  1124. if (!ocx->regs) {
  1125. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  1126. ret = -ENODEV;
  1127. goto err_free;
  1128. }
  1129. ocx->pdev = pdev;
  1130. for (i = 0; i < OCX_INTS; i++) {
  1131. ocx->msix_ent[i].entry = i;
  1132. ocx->msix_ent[i].vector = 0;
  1133. }
  1134. ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
  1135. if (ret) {
  1136. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  1137. goto err_free;
  1138. }
  1139. for (i = 0; i < OCX_INTS; i++) {
  1140. ret = devm_request_threaded_irq(&pdev->dev,
  1141. ocx->msix_ent[i].vector,
  1142. (i == 3) ?
  1143. thunderx_ocx_com_isr :
  1144. thunderx_ocx_lnk_isr,
  1145. (i == 3) ?
  1146. thunderx_ocx_com_threaded_isr :
  1147. thunderx_ocx_lnk_threaded_isr,
  1148. 0, "[EDAC] ThunderX OCX",
  1149. &ocx->msix_ent[i]);
  1150. if (ret)
  1151. goto err_free;
  1152. }
  1153. edac_dev->dev = &pdev->dev;
  1154. edac_dev->dev_name = dev_name(&pdev->dev);
  1155. edac_dev->mod_name = "thunderx-ocx";
  1156. edac_dev->ctl_name = "thunderx-ocx";
  1157. ret = edac_device_add_device(edac_dev);
  1158. if (ret) {
  1159. dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
  1160. goto err_free;
  1161. }
  1162. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  1163. ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
  1164. ret = thunderx_create_debugfs_nodes(ocx->debugfs,
  1165. ocx_dfs_ents,
  1166. ocx,
  1167. ARRAY_SIZE(ocx_dfs_ents));
  1168. if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
  1169. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  1170. ret, ret >= 0 ? " created" : "");
  1171. }
  1172. }
  1173. pci_set_drvdata(pdev, edac_dev);
  1174. thunderx_ocx_clearstats(ocx);
  1175. for (i = 0; i < OCX_RX_LANES; i++) {
  1176. writeq(OCX_LNE_INT_ENA_ALL,
  1177. ocx->regs + OCX_LNE_INT_EN(i));
  1178. reg = readq(ocx->regs + OCX_LNE_INT(i));
  1179. writeq(reg, ocx->regs + OCX_LNE_INT(i));
  1180. }
  1181. for (i = 0; i < OCX_LINK_INTS; i++) {
  1182. reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
  1183. writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
  1184. writeq(OCX_COM_LINKX_INT_ENA_ALL,
  1185. ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
  1186. }
  1187. reg = readq(ocx->regs + OCX_COM_INT);
  1188. writeq(reg, ocx->regs + OCX_COM_INT);
  1189. writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
  1190. return 0;
  1191. err_free:
  1192. edac_device_free_ctl_info(edac_dev);
  1193. return ret;
  1194. }
  1195. static void thunderx_ocx_remove(struct pci_dev *pdev)
  1196. {
  1197. struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
  1198. struct thunderx_ocx *ocx = edac_dev->pvt_info;
  1199. int i;
  1200. writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
  1201. for (i = 0; i < OCX_INTS; i++) {
  1202. writeq(OCX_COM_LINKX_INT_ENA_ALL,
  1203. ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
  1204. }
  1205. edac_debugfs_remove_recursive(ocx->debugfs);
  1206. edac_device_del_device(&pdev->dev);
  1207. edac_device_free_ctl_info(edac_dev);
  1208. }
  1209. MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
  1210. static struct pci_driver thunderx_ocx_driver = {
  1211. .name = "thunderx_ocx_edac",
  1212. .probe = thunderx_ocx_probe,
  1213. .remove = thunderx_ocx_remove,
  1214. .id_table = thunderx_ocx_pci_tbl,
  1215. };
  1216. /*---------------------- L2C driver ---------------------------------*/
  1217. #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
  1218. #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
  1219. #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
  1220. #define L2C_TAD_INT_W1C 0x40000
  1221. #define L2C_TAD_INT_W1S 0x40008
  1222. #define L2C_TAD_INT_ENA_W1C 0x40020
  1223. #define L2C_TAD_INT_ENA_W1S 0x40028
  1224. #define L2C_TAD_INT_L2DDBE BIT(1)
  1225. #define L2C_TAD_INT_SBFSBE BIT(2)
  1226. #define L2C_TAD_INT_SBFDBE BIT(3)
  1227. #define L2C_TAD_INT_FBFSBE BIT(4)
  1228. #define L2C_TAD_INT_FBFDBE BIT(5)
  1229. #define L2C_TAD_INT_TAGDBE BIT(9)
  1230. #define L2C_TAD_INT_RDDISLMC BIT(15)
  1231. #define L2C_TAD_INT_WRDISLMC BIT(16)
  1232. #define L2C_TAD_INT_LFBTO BIT(17)
  1233. #define L2C_TAD_INT_GSYNCTO BIT(18)
  1234. #define L2C_TAD_INT_RTGSBE BIT(32)
  1235. #define L2C_TAD_INT_RTGDBE BIT(33)
  1236. #define L2C_TAD_INT_RDDISOCI BIT(34)
  1237. #define L2C_TAD_INT_WRDISOCI BIT(35)
  1238. #define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \
  1239. L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
  1240. L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
  1241. #define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \
  1242. L2C_TAD_INT_FBFSBE)
  1243. #define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \
  1244. L2C_TAD_INT_SBFDBE | \
  1245. L2C_TAD_INT_FBFDBE | \
  1246. L2C_TAD_INT_TAGDBE | \
  1247. L2C_TAD_INT_RTGDBE | \
  1248. L2C_TAD_INT_WRDISOCI | \
  1249. L2C_TAD_INT_RDDISOCI | \
  1250. L2C_TAD_INT_WRDISLMC | \
  1251. L2C_TAD_INT_RDDISLMC | \
  1252. L2C_TAD_INT_LFBTO | \
  1253. L2C_TAD_INT_GSYNCTO)
  1254. static const struct error_descr l2_tad_errors[] = {
  1255. {
  1256. .type = ERR_CORRECTED,
  1257. .mask = L2C_TAD_INT_SBFSBE,
  1258. .descr = "SBF single-bit error",
  1259. },
  1260. {
  1261. .type = ERR_CORRECTED,
  1262. .mask = L2C_TAD_INT_FBFSBE,
  1263. .descr = "FBF single-bit error",
  1264. },
  1265. {
  1266. .type = ERR_UNCORRECTED,
  1267. .mask = L2C_TAD_INT_L2DDBE,
  1268. .descr = "L2D double-bit error",
  1269. },
  1270. {
  1271. .type = ERR_UNCORRECTED,
  1272. .mask = L2C_TAD_INT_SBFDBE,
  1273. .descr = "SBF double-bit error",
  1274. },
  1275. {
  1276. .type = ERR_UNCORRECTED,
  1277. .mask = L2C_TAD_INT_FBFDBE,
  1278. .descr = "FBF double-bit error",
  1279. },
  1280. {
  1281. .type = ERR_UNCORRECTED,
  1282. .mask = L2C_TAD_INT_TAGDBE,
  1283. .descr = "TAG double-bit error",
  1284. },
  1285. {
  1286. .type = ERR_UNCORRECTED,
  1287. .mask = L2C_TAD_INT_RTGDBE,
  1288. .descr = "RTG double-bit error",
  1289. },
  1290. {
  1291. .type = ERR_UNCORRECTED,
  1292. .mask = L2C_TAD_INT_WRDISOCI,
  1293. .descr = "Write to a disabled CCPI",
  1294. },
  1295. {
  1296. .type = ERR_UNCORRECTED,
  1297. .mask = L2C_TAD_INT_RDDISOCI,
  1298. .descr = "Read from a disabled CCPI",
  1299. },
  1300. {
  1301. .type = ERR_UNCORRECTED,
  1302. .mask = L2C_TAD_INT_WRDISLMC,
  1303. .descr = "Write to a disabled LMC",
  1304. },
  1305. {
  1306. .type = ERR_UNCORRECTED,
  1307. .mask = L2C_TAD_INT_RDDISLMC,
  1308. .descr = "Read from a disabled LMC",
  1309. },
  1310. {
  1311. .type = ERR_UNCORRECTED,
  1312. .mask = L2C_TAD_INT_LFBTO,
  1313. .descr = "LFB entry timeout",
  1314. },
  1315. {
  1316. .type = ERR_UNCORRECTED,
  1317. .mask = L2C_TAD_INT_GSYNCTO,
  1318. .descr = "Global sync CCPI timeout",
  1319. },
  1320. {0, 0, NULL},
  1321. };
  1322. #define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE)
  1323. #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE)
  1324. #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
  1325. #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
  1326. #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
  1327. L2C_TAD_INT_RTG | \
  1328. L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
  1329. L2C_TAD_INT_LFBTO)
  1330. #define L2C_TAD_TIMETWO 0x50000
  1331. #define L2C_TAD_TIMEOUT 0x50100
  1332. #define L2C_TAD_ERR 0x60000
  1333. #define L2C_TAD_TQD_ERR 0x60100
  1334. #define L2C_TAD_TTG_ERR 0x60200
  1335. #define L2C_CBC_INT_W1C 0x60000
  1336. #define L2C_CBC_INT_RSDSBE BIT(0)
  1337. #define L2C_CBC_INT_RSDDBE BIT(1)
  1338. #define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
  1339. #define L2C_CBC_INT_MIBSBE BIT(4)
  1340. #define L2C_CBC_INT_MIBDBE BIT(5)
  1341. #define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
  1342. #define L2C_CBC_INT_IORDDISOCI BIT(6)
  1343. #define L2C_CBC_INT_IOWRDISOCI BIT(7)
  1344. #define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \
  1345. L2C_CBC_INT_IOWRDISOCI)
  1346. #define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
  1347. #define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
  1348. static const struct error_descr l2_cbc_errors[] = {
  1349. {
  1350. .type = ERR_CORRECTED,
  1351. .mask = L2C_CBC_INT_RSDSBE,
  1352. .descr = "RSD single-bit error",
  1353. },
  1354. {
  1355. .type = ERR_CORRECTED,
  1356. .mask = L2C_CBC_INT_MIBSBE,
  1357. .descr = "MIB single-bit error",
  1358. },
  1359. {
  1360. .type = ERR_UNCORRECTED,
  1361. .mask = L2C_CBC_INT_RSDDBE,
  1362. .descr = "RSD double-bit error",
  1363. },
  1364. {
  1365. .type = ERR_UNCORRECTED,
  1366. .mask = L2C_CBC_INT_MIBDBE,
  1367. .descr = "MIB double-bit error",
  1368. },
  1369. {
  1370. .type = ERR_UNCORRECTED,
  1371. .mask = L2C_CBC_INT_IORDDISOCI,
  1372. .descr = "Read from a disabled CCPI",
  1373. },
  1374. {
  1375. .type = ERR_UNCORRECTED,
  1376. .mask = L2C_CBC_INT_IOWRDISOCI,
  1377. .descr = "Write to a disabled CCPI",
  1378. },
  1379. {0, 0, NULL},
  1380. };
  1381. #define L2C_CBC_INT_W1S 0x60008
  1382. #define L2C_CBC_INT_ENA_W1C 0x60020
  1383. #define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
  1384. L2C_CBC_INT_IODISOCI)
  1385. #define L2C_CBC_INT_ENA_W1S 0x60028
  1386. #define L2C_CBC_IODISOCIERR 0x80008
  1387. #define L2C_CBC_IOCERR 0x80010
  1388. #define L2C_CBC_RSDERR 0x80018
  1389. #define L2C_CBC_MIBERR 0x80020
  1390. #define L2C_MCI_INT_W1C 0x0
  1391. #define L2C_MCI_INT_VBFSBE BIT(0)
  1392. #define L2C_MCI_INT_VBFDBE BIT(1)
  1393. static const struct error_descr l2_mci_errors[] = {
  1394. {
  1395. .type = ERR_CORRECTED,
  1396. .mask = L2C_MCI_INT_VBFSBE,
  1397. .descr = "VBF single-bit error",
  1398. },
  1399. {
  1400. .type = ERR_UNCORRECTED,
  1401. .mask = L2C_MCI_INT_VBFDBE,
  1402. .descr = "VBF double-bit error",
  1403. },
  1404. {0, 0, NULL},
  1405. };
  1406. #define L2C_MCI_INT_W1S 0x8
  1407. #define L2C_MCI_INT_ENA_W1C 0x20
  1408. #define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
  1409. #define L2C_MCI_INT_ENA_W1S 0x28
  1410. #define L2C_MCI_ERR 0x10000
  1411. #define L2C_MESSAGE_SIZE SZ_1K
  1412. #define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors))
  1413. struct l2c_err_ctx {
  1414. char *reg_ext_name;
  1415. u64 reg_int;
  1416. u64 reg_ext;
  1417. };
  1418. struct thunderx_l2c {
  1419. void __iomem *regs;
  1420. struct pci_dev *pdev;
  1421. struct edac_device_ctl_info *edac_dev;
  1422. struct dentry *debugfs;
  1423. int index;
  1424. struct msix_entry msix_ent;
  1425. struct l2c_err_ctx err_ctx[RING_ENTRIES];
  1426. unsigned long ring_head;
  1427. unsigned long ring_tail;
  1428. };
  1429. static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
  1430. {
  1431. struct msix_entry *msix = irq_id;
  1432. struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
  1433. msix_ent);
  1434. unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
  1435. struct l2c_err_ctx *ctx = &tad->err_ctx[head];
  1436. ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
  1437. if (ctx->reg_int & L2C_TAD_INT_ECC) {
  1438. ctx->reg_ext_name = "TQD_ERR";
  1439. ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
  1440. } else if (ctx->reg_int & L2C_TAD_INT_TAG) {
  1441. ctx->reg_ext_name = "TTG_ERR";
  1442. ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
  1443. } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
  1444. ctx->reg_ext_name = "TIMEOUT";
  1445. ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
  1446. } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
  1447. ctx->reg_ext_name = "ERR";
  1448. ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
  1449. }
  1450. writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
  1451. tad->ring_head++;
  1452. return IRQ_WAKE_THREAD;
  1453. }
  1454. static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
  1455. {
  1456. struct msix_entry *msix = irq_id;
  1457. struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
  1458. msix_ent);
  1459. unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
  1460. struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
  1461. ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
  1462. if (ctx->reg_int & L2C_CBC_INT_RSD) {
  1463. ctx->reg_ext_name = "RSDERR";
  1464. ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
  1465. } else if (ctx->reg_int & L2C_CBC_INT_MIB) {
  1466. ctx->reg_ext_name = "MIBERR";
  1467. ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
  1468. } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
  1469. ctx->reg_ext_name = "IODISOCIERR";
  1470. ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
  1471. }
  1472. writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
  1473. cbc->ring_head++;
  1474. return IRQ_WAKE_THREAD;
  1475. }
  1476. static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
  1477. {
  1478. struct msix_entry *msix = irq_id;
  1479. struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
  1480. msix_ent);
  1481. unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
  1482. struct l2c_err_ctx *ctx = &mci->err_ctx[head];
  1483. ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
  1484. ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
  1485. writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
  1486. ctx->reg_ext_name = "ERR";
  1487. mci->ring_head++;
  1488. return IRQ_WAKE_THREAD;
  1489. }
  1490. static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
  1491. {
  1492. struct msix_entry *msix = irq_id;
  1493. struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
  1494. msix_ent);
  1495. unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
  1496. struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
  1497. irqreturn_t ret = IRQ_NONE;
  1498. u64 mask_ue, mask_ce;
  1499. const struct error_descr *l2_errors;
  1500. char *reg_int_name;
  1501. char *msg;
  1502. char *other;
  1503. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  1504. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  1505. if (!msg || !other)
  1506. goto err_free;
  1507. switch (l2c->pdev->device) {
  1508. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1509. reg_int_name = "L2C_TAD_INT";
  1510. mask_ue = L2C_TAD_INT_UE;
  1511. mask_ce = L2C_TAD_INT_CE;
  1512. l2_errors = l2_tad_errors;
  1513. break;
  1514. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1515. reg_int_name = "L2C_CBC_INT";
  1516. mask_ue = L2C_CBC_INT_UE;
  1517. mask_ce = L2C_CBC_INT_CE;
  1518. l2_errors = l2_cbc_errors;
  1519. break;
  1520. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1521. reg_int_name = "L2C_MCI_INT";
  1522. mask_ue = L2C_MCI_INT_VBFDBE;
  1523. mask_ce = L2C_MCI_INT_VBFSBE;
  1524. l2_errors = l2_mci_errors;
  1525. break;
  1526. default:
  1527. dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
  1528. l2c->pdev->device);
  1529. goto err_free;
  1530. }
  1531. while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
  1532. ARRAY_SIZE(l2c->err_ctx))) {
  1533. snprintf(msg, L2C_MESSAGE_SIZE,
  1534. "%s: %s: %016llx, %s: %016llx",
  1535. l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
  1536. ctx->reg_ext_name, ctx->reg_ext);
  1537. decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
  1538. strncat(msg, other, L2C_MESSAGE_SIZE);
  1539. if (ctx->reg_int & mask_ue)
  1540. edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
  1541. else if (ctx->reg_int & mask_ce)
  1542. edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
  1543. l2c->ring_tail++;
  1544. }
  1545. ret = IRQ_HANDLED;
  1546. err_free:
  1547. kfree(other);
  1548. kfree(msg);
  1549. return ret;
  1550. }
  1551. #define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg)
  1552. L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
  1553. struct debugfs_entry *l2c_tad_dfs_ents[] = {
  1554. &debugfs_tad_int,
  1555. };
  1556. L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
  1557. struct debugfs_entry *l2c_cbc_dfs_ents[] = {
  1558. &debugfs_cbc_int,
  1559. };
  1560. L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
  1561. struct debugfs_entry *l2c_mci_dfs_ents[] = {
  1562. &debugfs_mci_int,
  1563. };
  1564. static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
  1565. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
  1566. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
  1567. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
  1568. { 0, },
  1569. };
  1570. static int thunderx_l2c_probe(struct pci_dev *pdev,
  1571. const struct pci_device_id *id)
  1572. {
  1573. struct thunderx_l2c *l2c;
  1574. struct edac_device_ctl_info *edac_dev;
  1575. struct debugfs_entry **l2c_devattr;
  1576. size_t dfs_entries;
  1577. irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
  1578. char name[32];
  1579. const char *fmt;
  1580. u64 reg_en_offs, reg_en_mask;
  1581. int idx;
  1582. int ret;
  1583. ret = pcim_enable_device(pdev);
  1584. if (ret) {
  1585. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  1586. return ret;
  1587. }
  1588. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
  1589. if (ret) {
  1590. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  1591. return ret;
  1592. }
  1593. switch (pdev->device) {
  1594. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1595. thunderx_l2c_isr = thunderx_l2c_tad_isr;
  1596. l2c_devattr = l2c_tad_dfs_ents;
  1597. dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
  1598. fmt = "L2C-TAD%d";
  1599. reg_en_offs = L2C_TAD_INT_ENA_W1S;
  1600. reg_en_mask = L2C_TAD_INT_ENA_ALL;
  1601. break;
  1602. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1603. thunderx_l2c_isr = thunderx_l2c_cbc_isr;
  1604. l2c_devattr = l2c_cbc_dfs_ents;
  1605. dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
  1606. fmt = "L2C-CBC%d";
  1607. reg_en_offs = L2C_CBC_INT_ENA_W1S;
  1608. reg_en_mask = L2C_CBC_INT_ENA_ALL;
  1609. break;
  1610. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1611. thunderx_l2c_isr = thunderx_l2c_mci_isr;
  1612. l2c_devattr = l2c_mci_dfs_ents;
  1613. dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
  1614. fmt = "L2C-MCI%d";
  1615. reg_en_offs = L2C_MCI_INT_ENA_W1S;
  1616. reg_en_mask = L2C_MCI_INT_ENA_ALL;
  1617. break;
  1618. default:
  1619. //Should never ever get here
  1620. dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
  1621. pdev->device);
  1622. return -EINVAL;
  1623. }
  1624. idx = edac_device_alloc_index();
  1625. snprintf(name, sizeof(name), fmt, idx);
  1626. edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
  1627. name, 1, "L2C", 1, 0,
  1628. NULL, 0, idx);
  1629. if (!edac_dev) {
  1630. dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
  1631. return -ENOMEM;
  1632. }
  1633. l2c = edac_dev->pvt_info;
  1634. l2c->edac_dev = edac_dev;
  1635. l2c->regs = pcim_iomap_table(pdev)[0];
  1636. if (!l2c->regs) {
  1637. dev_err(&pdev->dev, "Cannot map PCI resources\n");
  1638. ret = -ENODEV;
  1639. goto err_free;
  1640. }
  1641. l2c->pdev = pdev;
  1642. l2c->ring_head = 0;
  1643. l2c->ring_tail = 0;
  1644. l2c->msix_ent.entry = 0;
  1645. l2c->msix_ent.vector = 0;
  1646. ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
  1647. if (ret) {
  1648. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  1649. goto err_free;
  1650. }
  1651. ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
  1652. thunderx_l2c_isr,
  1653. thunderx_l2c_threaded_isr,
  1654. 0, "[EDAC] ThunderX L2C",
  1655. &l2c->msix_ent);
  1656. if (ret)
  1657. goto err_free;
  1658. edac_dev->dev = &pdev->dev;
  1659. edac_dev->dev_name = dev_name(&pdev->dev);
  1660. edac_dev->mod_name = "thunderx-l2c";
  1661. edac_dev->ctl_name = "thunderx-l2c";
  1662. ret = edac_device_add_device(edac_dev);
  1663. if (ret) {
  1664. dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
  1665. goto err_free;
  1666. }
  1667. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  1668. l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
  1669. ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
  1670. l2c, dfs_entries);
  1671. if (ret != dfs_entries) {
  1672. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  1673. ret, ret >= 0 ? " created" : "");
  1674. }
  1675. }
  1676. pci_set_drvdata(pdev, edac_dev);
  1677. writeq(reg_en_mask, l2c->regs + reg_en_offs);
  1678. return 0;
  1679. err_free:
  1680. edac_device_free_ctl_info(edac_dev);
  1681. return ret;
  1682. }
  1683. static void thunderx_l2c_remove(struct pci_dev *pdev)
  1684. {
  1685. struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
  1686. struct thunderx_l2c *l2c = edac_dev->pvt_info;
  1687. switch (pdev->device) {
  1688. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1689. writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
  1690. break;
  1691. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1692. writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
  1693. break;
  1694. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1695. writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
  1696. break;
  1697. }
  1698. edac_debugfs_remove_recursive(l2c->debugfs);
  1699. edac_device_del_device(&pdev->dev);
  1700. edac_device_free_ctl_info(edac_dev);
  1701. }
  1702. MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
  1703. static struct pci_driver thunderx_l2c_driver = {
  1704. .name = "thunderx_l2c_edac",
  1705. .probe = thunderx_l2c_probe,
  1706. .remove = thunderx_l2c_remove,
  1707. .id_table = thunderx_l2c_pci_tbl,
  1708. };
  1709. static int __init thunderx_edac_init(void)
  1710. {
  1711. int rc = 0;
  1712. rc = pci_register_driver(&thunderx_lmc_driver);
  1713. if (rc)
  1714. return rc;
  1715. rc = pci_register_driver(&thunderx_ocx_driver);
  1716. if (rc)
  1717. goto err_lmc;
  1718. rc = pci_register_driver(&thunderx_l2c_driver);
  1719. if (rc)
  1720. goto err_ocx;
  1721. return rc;
  1722. err_ocx:
  1723. pci_unregister_driver(&thunderx_ocx_driver);
  1724. err_lmc:
  1725. pci_unregister_driver(&thunderx_lmc_driver);
  1726. return rc;
  1727. }
  1728. static void __exit thunderx_edac_exit(void)
  1729. {
  1730. pci_unregister_driver(&thunderx_l2c_driver);
  1731. pci_unregister_driver(&thunderx_ocx_driver);
  1732. pci_unregister_driver(&thunderx_lmc_driver);
  1733. }
  1734. module_init(thunderx_edac_init);
  1735. module_exit(thunderx_edac_exit);
  1736. MODULE_LICENSE("GPL v2");
  1737. MODULE_AUTHOR("Cavium, Inc.");
  1738. MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");