CsvIngestion.scala 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * Copyright (C) 2020 Prasoon Joshi
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. */
  17. package ingestion
  18. import com.github.tototoshi.csv._
  19. import models.MilliEntity
  20. import utils.FileUtils
  21. import models._
  22. import graphql.SchemaInputObjects._
  23. import graphql.SchemaInputObjects.ValidField._
  24. object CsvIngestion {
  25. def ingestCsvWithFieldMap(filePath: String = "conf/partner-data/sample.csv",
  26. cw: Crosswalk,
  27. partner: Partner): List[MilliEntity] = {
  28. val csvReader = CSVReader.open(filePath)
  29. csvReader
  30. .allWithHeaders()
  31. .map(fm => FlatRecordToEntity[MilliEntity].map(fm, cw, partner))
  32. }
  33. }
  34. /**
  35. * FlatRecord is just a Map[String, String]
  36. */
  37. trait FlatRecordToEntity[E <: NiosxEntity] {
  38. def map(r: FlatRecord, cw: Crosswalk, partner: Partner): E
  39. }
  40. object FlatRecordToEntity {
  41. def apply[E <: NiosxEntity](implicit c: FlatRecordToEntity[E]):
  42. FlatRecordToEntity[E] = c
  43. def instance[E <: NiosxEntity](
  44. f: (FlatRecord, Crosswalk, Partner) => E): FlatRecordToEntity[E] =
  45. new FlatRecordToEntity[E] {
  46. def map(r: FlatRecord, cw: Crosswalk, partner: Partner) =
  47. f(r, cw, partner)
  48. }
  49. def getFieldValue(
  50. field: ValidField.Value,
  51. r: FlatRecord,
  52. cw: Crosswalk): List[String] = {
  53. val ms: List[FlatRecordMapper] = cw.filter(_.field == field)
  54. ms.map({ m: FlatRecordMapper =>
  55. r.getOrElse(m.columnName, m.default)})
  56. }
  57. def getOptionalValue(
  58. field: AllowedField,
  59. r: FlatRecord,
  60. cw: Crosswalk): Option[String] = {
  61. //TODO: Fix this to accept default values as well.
  62. val frm: Option[FlatRecordMapper] = cw.find(_.field == field)
  63. frm.flatMap({ m: FlatRecordMapper =>
  64. Some(r.getOrElse(m.columnName, m.default))
  65. })
  66. }
  67. implicit val me: FlatRecordToEntity[MilliEntity] =
  68. instance[MilliEntity]({(r,cw, partner) =>
  69. new MilliEntity(
  70. graphId = FileUtils.getNewGraphId,
  71. agencyCode = getFieldValue(ValidField.ME_agencyCode, r, cw).last,
  72. recordId = getFieldValue(ValidField.ME_recordId,r, cw).last,
  73. unitId = getFieldValue(ValidField.ME_unitId, r, cw).last,
  74. title = getFieldValue(ValidField.ME_title, r, cw).last,
  75. creator = getFieldValue(ValidField.ME_creator, r, cw).last,
  76. dateOfCreation = getFieldValue(ValidField.ME_dateOfCreation, r, cw).last,
  77. extent = getFieldValue(ValidField.ME_extent, r, cw).last,
  78. level = getFieldValue(ValidField.ME_level, r, cw).last,
  79. partner = partner,
  80. description = getOptionalValue(ME_description, r, cw),
  81. location = getOptionalValue(ME_location, r, cw),
  82. accessRestrict = getOptionalValue(ME_accessRestrict, r, cw),
  83. useRestrict = getOptionalValue(ME_useRestrict, r, cw),
  84. language = getOptionalValue(ME_language, r, cw),
  85. unitType = getOptionalValue(ME_unitType, r, cw),
  86. format = getOptionalValue(ME_format, r, cw),
  87. images =
  88. getFieldValue(ValidField.ME_image, r, cw)
  89. .map({ src: String =>
  90. Image(
  91. graphId = FileUtils.getNewGraphId,
  92. src = src,
  93. size = ImageSize.MEDIUM)
  94. }),
  95. subjects =
  96. getFieldValue(ValidField.ME_subject, r, cw)
  97. .map({ subject: String =>
  98. val graphId = FileUtils.getNewGraphId
  99. Subject(
  100. graphId = graphId,
  101. prefLabel = subject,
  102. inScheme = "https://api.milli.link/terms",
  103. id = "https://api.milli.link/authority/" + graphId
  104. )
  105. })
  106. )
  107. })
  108. }
  109. //TODO: Include these in the MilliEntity model
  110. // images: [Image!]