emaildocument.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. * This file is part of QtEmailFetcher project: Email parser for C++ Qt.
  3. *
  4. * GPLv3+ (c) acetone, 2023
  5. */
  6. #include "emaildocument.h"
  7. #include <QDebug>
  8. #include <QRegularExpression>
  9. EmailDocument::EmailDocument()
  10. {
  11. }
  12. enum class ParseState {
  13. subject,
  14. from,
  15. other
  16. };
  17. void EmailDocument::parse(const QByteArray &data)
  18. {
  19. m_rawData = data;
  20. QTextStream stream(data);
  21. QString line = stream.readLine();
  22. QByteArray buffer;
  23. ParseState state = ParseState::other;
  24. for (; not stream.atEnd(); line = stream.readLine())
  25. {
  26. if (state == ParseState::subject)
  27. {
  28. if (not line.startsWith('\t') and not line.startsWith(' '))
  29. {
  30. m_subject = buffer.trimmed();
  31. while (m_subject.contains(" ")) m_subject.replace(" ", " ");
  32. buffer.clear();
  33. state = ParseState::other;
  34. }
  35. else
  36. {
  37. QByteArray tmp = decodeMimeString(line);
  38. if (tmp.isEmpty())
  39. {
  40. buffer += " " + line.toUtf8().trimmed();
  41. }
  42. else
  43. {
  44. buffer += " " + tmp.trimmed();
  45. }
  46. continue;
  47. }
  48. }
  49. else if (state == ParseState::from)
  50. {
  51. if (not line.startsWith('\t') and not line.startsWith(' '))
  52. {
  53. m_from.address = extractAddress(buffer);
  54. m_from.name = extractName(buffer);
  55. buffer.clear();
  56. state = ParseState::other;
  57. }
  58. else
  59. {
  60. buffer += line.trimmed().toUtf8();
  61. }
  62. }
  63. if (line.isEmpty())
  64. {
  65. parseBody();
  66. return;
  67. }
  68. if (line.startsWith("Return-Path:", Qt::CaseInsensitive))
  69. {
  70. m_returnPath = extractAddress(line.remove(0,11));
  71. }
  72. else if (line.startsWith("From:", Qt::CaseInsensitive))
  73. {
  74. buffer = line.remove(0,5).trimmed().toUtf8();
  75. state = ParseState::from;
  76. }
  77. else if (line.startsWith("To:", Qt::CaseInsensitive))
  78. {
  79. m_to.address = extractAddress(line.remove(0,3));
  80. m_to.name = extractName(line);
  81. }
  82. else if (line.startsWith("Subject:", Qt::CaseInsensitive))
  83. {
  84. buffer = decodeMimeString(line);
  85. if (buffer.isEmpty()) // not encoded or really empty
  86. {
  87. buffer = line.remove(0, 9).toUtf8().trimmed();
  88. }
  89. if (state != ParseState::subject)
  90. {
  91. state = ParseState::subject;
  92. }
  93. }
  94. else if (line.startsWith("Date:"))
  95. {
  96. m_dateTime = decodeTimeString(line.remove(0,5).trimmed());
  97. }
  98. }
  99. }
  100. QByteArray EmailDocument::decodeMimeString(const QString &mimeString)
  101. {
  102. int charset_pos = mimeString.indexOf('?');
  103. if (charset_pos < 0)
  104. {
  105. // qDebug() << __FUNCTION__ << "Missing Q codec separator for charset (first)";
  106. return QByteArray();
  107. }
  108. int method_pos = mimeString.indexOf('?', charset_pos+1);
  109. if (method_pos < 0)
  110. {
  111. // qDebug() << __FUNCTION__ << "Missing Q codec separator for charset (second)";
  112. return QByteArray();
  113. }
  114. QString charset = mimeString.mid(charset_pos + 1, method_pos - charset_pos - 1).toUpper();
  115. if (charset.isEmpty())
  116. {
  117. // qDebug() << __FUNCTION__ << "Missing Q codec charset";
  118. return QByteArray();
  119. }
  120. if (mimeString.size() <= method_pos+1)
  121. {
  122. qDebug() << __FUNCTION__ << "Input string too small";
  123. return QByteArray();
  124. }
  125. QChar encodeType = mimeString.at(method_pos+1).toUpper();
  126. bool base64 = false;
  127. if (encodeType == 'B')
  128. {
  129. base64 = true;
  130. }
  131. else if (encodeType != 'Q')
  132. {
  133. // qDebug() << __FUNCTION__ << "Unsupported encoding type:" << encodeType;
  134. return QByteArray();
  135. }
  136. int content_pos = mimeString.indexOf('?', method_pos + 1);
  137. if (content_pos < 0)
  138. {
  139. // qDebug() << __FUNCTION__ << "Missing Q codec separator for content (first)";
  140. return QByteArray();
  141. }
  142. int content_end_pos = mimeString.indexOf('?', content_pos + 1);
  143. if (content_end_pos < 0)
  144. {
  145. // qDebug() << __FUNCTION__ << "Missing Q codec separator for content (second)";
  146. return QByteArray();
  147. }
  148. QString input = mimeString.mid(content_pos + 1, content_end_pos - content_pos - 1);
  149. input.remove("\r");
  150. input.remove("\n");
  151. if (base64)
  152. {
  153. QByteArray result = QByteArray::fromBase64(input.toUtf8());
  154. QTextStream stream(result);
  155. stream.setCodec(charset.toStdString().c_str());
  156. return stream.readAll().toUtf8();
  157. }
  158. while (input.contains("==")) input.remove("==");
  159. QByteArray decodedText;
  160. for (int i = 0; i < input.length(); i++) {
  161. QChar c = input.at(i);
  162. if (c == '=' and input.length() > i+2)
  163. {
  164. QString hexCode = input.mid(i + 1, 2);
  165. decodedText.push_back(QByteArray::fromHex(hexCode.toUtf8()));
  166. i += 2;
  167. }
  168. else
  169. {
  170. decodedText.push_back(c.toLatin1());
  171. }
  172. }
  173. QTextStream stream(decodedText);
  174. stream.setCodec(charset.toStdString().c_str());
  175. return stream.readAll().toUtf8();
  176. }
  177. QString EmailDocument::extractAddress(const QString &string)
  178. {
  179. if (string.contains('<'))
  180. {
  181. static const QRegularExpression rgxBegin("^.*<");
  182. static const QRegularExpression rgxEnd(">.*$");
  183. return QString(string).remove(rgxBegin).remove(rgxEnd).trimmed();
  184. }
  185. else
  186. {
  187. static const QRegularExpression rgx("^.* ");
  188. return QString(string).remove(rgx);
  189. }
  190. }
  191. QString EmailDocument::extractName(const QString &string)
  192. {
  193. if (not string.contains('<') or not string.contains('>')) return QString();
  194. static const QRegularExpression rgx("<.*$");
  195. QString raw = QString(string).remove(rgx).trimmed();
  196. QString decoded = decodeMimeString(raw);
  197. return decoded.isEmpty() ? raw : decoded;
  198. }
  199. QDateTime EmailDocument::decodeTimeString(const QString &stringRFC822_1123)
  200. {
  201. if (stringRFC822_1123.size() < 25) return QDateTime();
  202. QString timeString (stringRFC822_1123);
  203. timeString.remove(0,4); // "ddd,
  204. int timeShortWordPos = timeString.indexOf('(');
  205. if (timeShortWordPos > 0)
  206. {
  207. timeString.remove(timeShortWordPos, timeString.size()-timeShortWordPos);
  208. }
  209. int plusPos = timeString.indexOf('+');
  210. if (plusPos < 0 or plusPos >= timeString.size()+3) return QDateTime();
  211. int receivedUtcOffset = timeString.mid(plusPos+1).trimmed().remove("0").toInt() * 60 * 60;
  212. timeString.remove(plusPos-1, timeString.size()-plusPos+1);
  213. static const std::map<const char*, const char*> MONTH_CODES {
  214. {"Jan", "01"},
  215. {"Feb", "02"},
  216. {"Mar", "03"},
  217. {"Apr", "04"},
  218. {"May", "05"},
  219. {"Jun", "06"},
  220. {"Jul", "07"},
  221. {"Aug", "08"},
  222. {"Sep", "09"},
  223. {"Oct", "10"},
  224. {"Nov", "11"},
  225. {"Dec", "12"}
  226. };
  227. for (const auto& m: MONTH_CODES)
  228. {
  229. if (timeString.contains(m.first))
  230. {
  231. timeString.replace(m.first, m.second);
  232. break;
  233. }
  234. }
  235. auto result = QDateTime::fromString(timeString.trimmed(), "d MM yyyy hh:mm:ss");
  236. result.setOffsetFromUtc(receivedUtcOffset);
  237. return result;
  238. }
  239. void EmailDocument::parseBody()
  240. {
  241. QSharedPointer<EmailDocumentEntry> entry(new EmailDocumentEntry(&m_content));
  242. entry->parse(m_rawData);
  243. m_content.push_front(entry); // main object to begin
  244. QList<int> toRemove;
  245. int counter = 0;
  246. for (QListIterator<QSharedPointer<EmailDocumentEntry>> iter(m_content); iter.hasNext(); counter++)
  247. {
  248. auto n = iter.next()->contentType();
  249. auto type = n.enumerate;
  250. if (type == EmailDocumentEntry::ContentType::Enum::multipartRelated or
  251. type == EmailDocumentEntry::ContentType::Enum::multipartMixed or
  252. type == EmailDocumentEntry::ContentType::Enum::multipartAlternative or
  253. type == EmailDocumentEntry::ContentType::Enum::multipartOther)
  254. {
  255. toRemove.push_front(counter);
  256. }
  257. }
  258. // Store only text/files, not a abstract structures
  259. for (QListIterator<int> iter(toRemove); iter.hasNext(); )
  260. {
  261. m_content.removeAt(iter.next());
  262. }
  263. }