2 Commits d85275e6d7 ... 0c9b7d46c1

Author SHA1 Message Date
  NanashiNoGombe 0c9b7d46c1 HTML2Dat: implement generator using itest.5ch.net json 5 months ago
  NanashiNoGombe 88ff2e2b91 HTML2Dat: send 5chClassic cookie to avoid redirection to itest 5 months ago
3 changed files with 162 additions and 0 deletions
  1. 10 0
      BBS2chProxyConnection.cpp
  2. 141 0
      BBS2chProxyHTML2Dat.cpp
  3. 11 0
      BBS2chProxyHTML2Dat.h

+ 10 - 0
BBS2chProxyConnection.cpp

@@ -617,6 +617,16 @@ beginHandleRequest:
 				html2dat.setRequestHeaders(requestHeaders);
 				statusCode = datProxy(html2dat, method, requestHeaders);
 			}
+#if 0
+			// this will be used when legacy read.cgi stops working completely
+			else if (threadIdentifier.host.substr(threadIdentifier.hostPrefix.size()) == ".5ch.net") {
+				isHttps = true;
+				log_printf(1, "Retrieving thread via itest json...\n");
+				BBS2chProxyHTML2Dat5chItest html2dat(threadCache, threadIdentifier, curl);
+				html2dat.setRequestHeaders(requestHeaders);
+				statusCode = datProxy(html2dat, method, requestHeaders);
+			}
+#endif
 			else {
 				log_printf(1, "Retrieving thread via read.cgi...\n");
 				BBS2chProxyHTML2Dat5ch html2dat(threadCache, threadIdentifier, force_5chnet_https || isHttps, curl);

+ 141 - 0
BBS2chProxyHTML2Dat.cpp

@@ -109,6 +109,17 @@ BBS2chProxyHTML2DatTalkHTML::BBS2chProxyHTML2DatTalkHTML(BBS2chProxyThreadCache
 	_url += '/';
 }
 
+BBS2chProxyHTML2Dat5chItest::BBS2chProxyHTML2Dat5chItest(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
+	: BBS2chProxyHTML2DatTalk(cache, identifier, curl)
+{
+	_url = "https://itest.5ch.net/public/newapi/client.php?subdomain=";
+	_url += identifier.hostPrefix;
+	_url += "&board=";
+	_url += identifier.board;
+	_url += "&dat=";
+	_url += identifier.key;
+}
+
 std::vector<char> IBBS2chProxyHTML2Dat::getHtmlFromURL(const std::string &url, long *outStatusCode)
 {
 	CURLcode res;
@@ -171,6 +182,7 @@ std::string BBS2chProxyHTML2Dat5ch::generateDatFrom(int startFrom, time_t *lastM
 	} else {
 		tmpURL += "1-";
 	}
+	curl_easy_setopt(_curl, CURLOPT_COOKIE, "5chClassic=on");
 	std::vector<char> html = getHtmlFromURL(tmpURL, outStatusCode);
 	return html2dat(html, startFrom, lastModifiedOut, useCache);
 }
@@ -211,6 +223,18 @@ std::string BBS2chProxyHTML2DatTalkHTML::generateDatFrom(int startFrom, time_t *
 	return json2dat(threadData, startFrom, lastModifiedOut, useCache);
 }
 
+std::string BBS2chProxyHTML2Dat5chItest::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
+{
+	if (!_cachedJson) {
+		std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
+		if (json.empty()) return "";
+		json.push_back(0);
+		_cachedJson = json_parse_string(&json.front());
+	}
+	if (!_cachedJson) return "";
+	return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
+}
+
 std::string BBS2chProxyHTML2Dat5ch::html2dat_old(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
 {
 	char *ptr = &html.front();
@@ -1139,3 +1163,120 @@ std::string BBS2chProxyHTML2DatTalk::json2dat(JSON_Value *json, int startFrom, t
 	if (lastModifiedOut) *lastModifiedOut = lastModified;
 	return out;
 }
+
+std::string BBS2chProxyHTML2Dat5chItest::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
+{
+	std::string out;
+	if (!json || json_type(json) != JSONObject) {
+		return "";
+	}
+	JSON_Object *root = json_object(json);
+	JSON_Array *threadMeta = json_object_get_array(root, "thread");
+	time_t lastModified = json_array_get_number(threadMeta, 0);
+	const char *boardAndKey = json_array_get_string(threadMeta, 3);
+	const char *title = json_array_get_string(threadMeta, 5);
+	JSON_Array *comments = json_object_get_array(root, "comments");
+	if (!title || !*title || !comments) {
+		return "";
+	}
+	if (startFrom < 1) startFrom = 1;
+	int prevNumber = startFrom - 1;
+	size_t cachedSize = 0;
+	std::string lastLine;
+	for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
+		std::stringstream line;
+		JSON_Array *comment = json_array_get_array(comments, i);
+		if (!comment) continue;
+		int number = json_array_get_number(comment, 0);
+		if (number < startFrom) continue;
+		const char *name = json_array_get_string(comment, 1);
+		const char *mail = json_array_get_string(comment, 2);
+		const char *date = json_array_get_string(comment, 3);
+		const char *id = json_array_get_string(comment, 4);
+		const char *be = json_array_get_string(comment, 5);
+		const char *body = json_array_get_string(comment, 6);
+		for (int j=prevNumber+1; j<number; j++) {
+			out += "broken<><>broken<> broken <>\n";
+		}
+		if (name) line << name;
+		else line << "削除";
+		line << "<>";
+		if (mail) line << mail;
+		else line << "削除";
+		line << "<>";
+		if (date) {
+			line << date;
+			if (id && *id) {
+				line << " ID:" << id;
+			}
+			if (be && *be) {
+				line << " BE:" << be;
+			}
+		}
+		else line << "削除";
+		line << "<>";
+		if (body) {
+			const char *ptr = strstr(body, "&gt;&gt;");
+			const char *start = body;
+			while (ptr) {
+				const char *tmp = ptr;
+				unsigned int num = strtoul(ptr+8, (char **)&ptr, 10);
+				if (num > 0) {
+					if (tmp != start) line << std::string(start, tmp-start);
+					line << "<a href=\"../test/read.cgi/" << boardAndKey << "/" << num << "\" rel=\"noopener noreferrer\" target=\"_blank\">";
+					line << std::string(tmp, ptr-tmp);
+					line << "</a>";
+				}
+				else line << std::string(start, ptr-start);
+				start = ptr;
+				ptr = strstr(start, "&gt;&gt;");
+			}
+			line << start;
+		}
+		else line << "削除";
+		line << "<>";
+		if (number == 1) {
+			line << title;
+		}
+		line << "\n";
+		prevNumber = number;
+		char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
+		if (lineSJIS) {
+			lastLine = lineSJIS;
+			out += lastLine;
+			free(lineSJIS);
+		} else {
+			lastLine = "broken<><>broken<> broken <>\n";
+			out += lastLine;
+		}
+		if (useCache && startFrom == number) {
+			PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
+			bool hit = false;
+			if (info) {
+				log_printf(5, "cache hit");
+				if (info->cachedData.size() == lastLine.size()) {
+					log_printf(5, "... size match");
+					if (info->cachedData == lastLine) {
+						log_printf(5, "... content match");
+						hit = true;
+						cachedSize = info->cachedSize - lastLine.size();
+					}
+				}
+				log_printf(5, "\n");
+			}
+			if (!hit) {
+				return "";
+			}
+		}
+	}
+	if (!lastLine.empty()) {
+		PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
+		info->lastResNum = prevNumber;
+		info->cachedSize = out.size() + cachedSize;
+		info->cachedData = lastLine;
+		_threadCache->set(_threadKey, info);
+		log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
+	}
+	if (lastModifiedOut) *lastModifiedOut = lastModified;
+	return out;
+}

+ 11 - 0
BBS2chProxyHTML2Dat.h

@@ -61,3 +61,14 @@ public:
 	virtual ~BBS2chProxyHTML2DatTalkHTML() {};
 	virtual std::string generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode);
 };
+
+class BBS2chProxyHTML2Dat5chItest : public BBS2chProxyHTML2DatTalk {
+protected:
+	JSON_Value *_cachedJson;
+public:
+	BBS2chProxyHTML2Dat5chItest(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl);
+	virtual ~BBS2chProxyHTML2Dat5chItest() {};
+	virtual std::string generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode);
+protected:
+	std::string json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache);
+};